diff --git a/.backportrc.json b/.backportrc.json index 59843f4d5f134..60f02bb586b22 100644 --- a/.backportrc.json +++ b/.backportrc.json @@ -1,9 +1,9 @@ { "upstream" : "elastic/elasticsearch", - "targetBranchChoices" : [ "main", "8.14", "8.13", "8.12", "8.11", "8.10", "8.9", "8.8", "8.7", "8.6", "8.5", "8.4", "8.3", "8.2", "8.1", "8.0", "7.17", "6.8" ], + "targetBranchChoices" : [ "main", "8.15", "8.14", "8.13", "8.12", "8.11", "8.10", "8.9", "8.8", "8.7", "8.6", "8.5", "8.4", "8.3", "8.2", "8.1", "8.0", "7.17", "6.8" ], "targetPRLabels" : [ "backport" ], "branchLabelMapping" : { - "^v8.15.0$" : "main", + "^v8.15.1$" : "main", "^v(\\d+).(\\d+).\\d+(?:-(?:alpha|beta|rc)\\d+)?$" : "$1.$2" } } \ No newline at end of file diff --git a/.buildkite/pipelines/dra-workflow.yml b/.buildkite/pipelines/dra-workflow.yml index 32a2b7d22134a..bcc6c9c57d756 100644 --- a/.buildkite/pipelines/dra-workflow.yml +++ b/.buildkite/pipelines/dra-workflow.yml @@ -7,7 +7,7 @@ steps: image: family/elasticsearch-ubuntu-2204 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 + diskSizeGb: 350 - wait # The hadoop build depends on the ES artifact # So let's trigger the hadoop build any time we build a new staging artifact diff --git a/.buildkite/pipelines/intake.yml b/.buildkite/pipelines/intake.yml index 4124d4e550d11..0b854304d2b74 100644 --- a/.buildkite/pipelines/intake.yml +++ b/.buildkite/pipelines/intake.yml @@ -62,7 +62,7 @@ steps: timeout_in_minutes: 300 matrix: setup: - BWC_VERSION: ["7.17.23", "8.14.2", "8.15.0"] + BWC_VERSION: ["7.17.24", "8.15.1"] agents: provider: gcp image: family/elasticsearch-ubuntu-2004 diff --git a/.buildkite/pipelines/periodic-packaging.yml b/.buildkite/pipelines/periodic-packaging.yml index 4217fc91bf0fd..306ff64375299 100644 --- a/.buildkite/pipelines/periodic-packaging.yml +++ b/.buildkite/pipelines/periodic-packaging.yml @@ -322,8 +322,8 @@ steps: env: BWC_VERSION: 7.16.3 - - label: "{{matrix.image}} / 7.17.23 / packaging-tests-upgrade" - command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v7.17.23 + - label: "{{matrix.image}} / 7.17.24 / packaging-tests-upgrade" + command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v7.17.24 timeout_in_minutes: 300 matrix: setup: @@ -337,7 +337,7 @@ steps: buildDirectory: /dev/shm/bk diskSizeGb: 250 env: - BWC_VERSION: 7.17.23 + BWC_VERSION: 7.17.24 - label: "{{matrix.image}} / 8.0.1 / packaging-tests-upgrade" command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v8.0.1 @@ -577,8 +577,8 @@ steps: env: BWC_VERSION: 8.13.4 - - label: "{{matrix.image}} / 8.14.2 / packaging-tests-upgrade" - command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v8.14.2 + - label: "{{matrix.image}} / 8.14.3 / packaging-tests-upgrade" + command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v8.14.3 timeout_in_minutes: 300 matrix: setup: @@ -592,10 +592,10 @@ steps: buildDirectory: /dev/shm/bk diskSizeGb: 250 env: - BWC_VERSION: 8.14.2 + BWC_VERSION: 8.14.3 - - label: "{{matrix.image}} / 8.15.0 / packaging-tests-upgrade" - command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v8.15.0 + - label: "{{matrix.image}} / 8.15.1 / packaging-tests-upgrade" + command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v8.15.1 timeout_in_minutes: 300 matrix: setup: @@ -609,7 +609,7 @@ steps: buildDirectory: /dev/shm/bk diskSizeGb: 250 env: - BWC_VERSION: 8.15.0 + BWC_VERSION: 8.15.1 - group: packaging-tests-windows steps: diff --git a/.buildkite/pipelines/periodic.yml b/.buildkite/pipelines/periodic.yml index 06e7ffbc8fb1c..7c97c5c6c6b75 100644 --- a/.buildkite/pipelines/periodic.yml +++ b/.buildkite/pipelines/periodic.yml @@ -342,8 +342,8 @@ steps: - signal_reason: agent_stop limit: 3 - - label: 7.17.23 / bwc - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v7.17.23#bwcTest + - label: 7.17.24 / bwc + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v7.17.24#bwcTest timeout_in_minutes: 300 agents: provider: gcp @@ -353,7 +353,7 @@ steps: preemptible: true diskSizeGb: 250 env: - BWC_VERSION: 7.17.23 + BWC_VERSION: 7.17.24 retry: automatic: - exit_status: "-1" @@ -642,8 +642,8 @@ steps: - signal_reason: agent_stop limit: 3 - - label: 8.14.2 / bwc - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v8.14.2#bwcTest + - label: 8.14.3 / bwc + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v8.14.3#bwcTest timeout_in_minutes: 300 agents: provider: gcp @@ -653,7 +653,7 @@ steps: preemptible: true diskSizeGb: 250 env: - BWC_VERSION: 8.14.2 + BWC_VERSION: 8.14.3 retry: automatic: - exit_status: "-1" @@ -662,8 +662,8 @@ steps: - signal_reason: agent_stop limit: 3 - - label: 8.15.0 / bwc - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v8.15.0#bwcTest + - label: 8.15.1 / bwc + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v8.15.1#bwcTest timeout_in_minutes: 300 agents: provider: gcp @@ -673,7 +673,7 @@ steps: preemptible: true diskSizeGb: 250 env: - BWC_VERSION: 8.15.0 + BWC_VERSION: 8.15.1 retry: automatic: - exit_status: "-1" @@ -751,7 +751,7 @@ steps: setup: ES_RUNTIME_JAVA: - openjdk17 - BWC_VERSION: ["7.17.23", "8.14.2", "8.15.0"] + BWC_VERSION: ["7.17.24", "8.15.1"] agents: provider: gcp image: family/elasticsearch-ubuntu-2004 @@ -801,7 +801,7 @@ steps: - openjdk21 - openjdk22 - openjdk23 - BWC_VERSION: ["7.17.23", "8.14.2", "8.15.0"] + BWC_VERSION: ["7.17.24", "8.15.1"] agents: provider: gcp image: family/elasticsearch-ubuntu-2004 diff --git a/.buildkite/pipelines/pull-request/bwc-snapshots-windows.yml b/.buildkite/pipelines/pull-request/bwc-snapshots-windows.yml deleted file mode 100644 index d37bdf380f926..0000000000000 --- a/.buildkite/pipelines/pull-request/bwc-snapshots-windows.yml +++ /dev/null @@ -1,20 +0,0 @@ -config: - allow-labels: test-windows -steps: - - group: bwc-snapshots-windows - steps: - - label: "{{matrix.BWC_VERSION}} / bwc-snapshots-windows" - key: "bwc-snapshots-windows" - command: .\.buildkite\scripts\run-script.ps1 bash .buildkite/scripts/windows-run-gradle.sh - env: - GRADLE_TASK: "v{{matrix.BWC_VERSION}}#bwcTest" - timeout_in_minutes: 300 - matrix: - setup: - BWC_VERSION: $SNAPSHOT_BWC_VERSIONS - agents: - provider: gcp - image: family/elasticsearch-windows-2022 - machineType: custom-32-98304 - diskType: pd-ssd - diskSizeGb: 350 diff --git a/.ci/bwcVersions b/.ci/bwcVersions index bce556e9fc352..e875bc48222d5 100644 --- a/.ci/bwcVersions +++ b/.ci/bwcVersions @@ -16,7 +16,7 @@ BWC_VERSION: - "7.14.2" - "7.15.2" - "7.16.3" - - "7.17.23" + - "7.17.24" - "8.0.1" - "8.1.3" - "8.2.3" @@ -31,5 +31,5 @@ BWC_VERSION: - "8.11.4" - "8.12.2" - "8.13.4" - - "8.14.2" - - "8.15.0" + - "8.14.3" + - "8.15.1" diff --git a/.ci/snapshotBwcVersions b/.ci/snapshotBwcVersions index 5fc4b6c072899..f79c2a44b5dc0 100644 --- a/.ci/snapshotBwcVersions +++ b/.ci/snapshotBwcVersions @@ -1,4 +1,3 @@ BWC_VERSION: - - "7.17.23" - - "8.14.2" - - "8.15.0" + - "7.17.24" + - "8.15.1" diff --git a/README.asciidoc b/README.asciidoc index dc27735d3c015..c1945e56b025b 100644 --- a/README.asciidoc +++ b/README.asciidoc @@ -1,6 +1,6 @@ = Elasticsearch -Elasticsearch is a distributed search and analytics engine optimized for speed and relevance on production-scale workloads. Elasticsearch is the foundation of Elastic's open Stack platform. Search in near real-time over massive datasets, perform vector searches, integrate with generative AI applications, and much more. +Elasticsearch is a distributed search and analytics engine, scalable data store and vector database optimized for speed and relevance on production-scale workloads. Elasticsearch is the foundation of Elastic's open Stack platform. Search in near real-time over massive datasets, perform vector searches, integrate with generative AI applications, and much more. Use cases enabled by Elasticsearch include: @@ -33,76 +33,144 @@ https://www.elastic.co/downloads/elasticsearch[elastic.co/downloads/elasticsearc === Run Elasticsearch locally //// -IMPORTANT: This content is replicated in the Elasticsearch guide. -If you make changes, you must also update setup/set-up-local-dev-deployment.asciidoc. +IMPORTANT: This content is replicated in the Elasticsearch guide. See `run-elasticsearch-locally.asciidoc`. +Both will soon be replaced by a quickstart script. //// -To try out Elasticsearch on your own machine, we recommend using Docker -and running both Elasticsearch and Kibana. -Docker images are available from the https://www.docker.elastic.co[Elastic Docker registry]. +[WARNING] +==== +DO NOT USE THESE INSTRUCTIONS FOR PRODUCTION DEPLOYMENTS. -NOTE: Starting in Elasticsearch 8.0, security is enabled by default. -The first time you start Elasticsearch, TLS encryption is configured automatically, -a password is generated for the `elastic` user, -and a Kibana enrollment token is created so you can connect Kibana to your secured cluster. +This setup is intended for local development and testing only. +==== -For other installation options, see the -https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html[Elasticsearch installation documentation]. +The following commands help you very quickly spin up a single-node Elasticsearch cluster, together with Kibana in Docker. +Use this setup for local development or testing. -**Start Elasticsearch** +==== Prerequisites -. Install and start https://www.docker.com/products/docker-desktop[Docker -Desktop]. Go to **Preferences > Resources > Advanced** and set Memory to at least 4GB. +If you don't have Docker installed, https://www.docker.com/products/docker-desktop[download and install Docker Desktop] for your operating system. -. Start an Elasticsearch container: -+ +==== Set environment variables + +Configure the following environment variables. + +[source,sh] +---- +export ELASTIC_PASSWORD="" # password for "elastic" username +export KIBANA_PASSWORD="" # Used internally by Kibana, must be at least 6 characters long +---- + +==== Create a Docker network + +To run both Elasticsearch and Kibana, you'll need to create a Docker network: + +[source,sh] ---- -docker network create elastic -docker pull docker.elastic.co/elasticsearch/elasticsearch:{version} <1> -docker run --name elasticsearch --net elastic -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" -t docker.elastic.co/elasticsearch/elasticsearch:{version} +docker network create elastic-net ---- -<1> Replace {version} with the version of Elasticsearch you want to run. -+ -When you start Elasticsearch for the first time, the generated `elastic` user password and -Kibana enrollment token are output to the terminal. -+ -NOTE: You might need to scroll back a bit in the terminal to view the password -and enrollment token. -. Copy the generated password and enrollment token and save them in a secure -location. These values are shown only when you start Elasticsearch for the first time. -You'll use these to enroll Kibana with your Elasticsearch cluster and log in. +==== Run Elasticsearch + +Start the Elasticsearch container with the following command: -**Start Kibana** +[source,sh] +---- +docker run -p 127.0.0.1:9200:9200 -d --name elasticsearch --network elastic-net \ + -e ELASTIC_PASSWORD=$ELASTIC_PASSWORD \ + -e "discovery.type=single-node" \ + -e "xpack.security.http.ssl.enabled=false" \ + -e "xpack.license.self_generated.type=trial" \ + docker.elastic.co/elasticsearch/elasticsearch:{version} +---- -Kibana enables you to easily send requests to Elasticsearch and analyze, visualize, and manage data interactively. +==== Run Kibana (optional) -. In a new terminal session, start Kibana and connect it to your Elasticsearch container: -+ +To run Kibana, you must first set the `kibana_system` password in the Elasticsearch container. + +[source,sh] ---- -docker pull docker.elastic.co/kibana/kibana:{version} <1> -docker run --name kibana --net elastic -p 5601:5601 docker.elastic.co/kibana/kibana:{version} +# configure the Kibana password in the ES container +curl -u elastic:$ELASTIC_PASSWORD \ + -X POST \ + http://localhost:9200/_security/user/kibana_system/_password \ + -d '{"password":"'"$KIBANA_PASSWORD"'"}' \ + -H 'Content-Type: application/json' ---- -<1> Replace {version} with the version of Kibana you want to run. -+ -When you start Kibana, a unique URL is output to your terminal. +// NOTCONSOLE -. To access Kibana, open the generated URL in your browser. +Start the Kibana container with the following command: - .. Paste the enrollment token that you copied when starting - Elasticsearch and click the button to connect your Kibana instance with Elasticsearch. +[source,sh] +---- +docker run -p 127.0.0.1:5601:5601 -d --name kibana --network elastic-net \ + -e ELASTICSEARCH_URL=http://elasticsearch:9200 \ + -e ELASTICSEARCH_HOSTS=http://elasticsearch:9200 \ + -e ELASTICSEARCH_USERNAME=kibana_system \ + -e ELASTICSEARCH_PASSWORD=$KIBANA_PASSWORD \ + -e "xpack.security.enabled=false" \ + -e "xpack.license.self_generated.type=trial" \ + docker.elastic.co/kibana/kibana:{version} +---- - .. Log in to Kibana as the `elastic` user with the password that was generated - when you started Elasticsearch. +.Trial license +[%collapsible] +==== +The service is started with a trial license. The trial license enables all features of Elasticsearch for a trial period of 30 days. After the trial period expires, the license is downgraded to a basic license, which is free forever. If you prefer to skip the trial and use the basic license, set the value of the `xpack.license.self_generated.type` variable to basic instead. For a detailed feature comparison between the different licenses, refer to our https://www.elastic.co/subscriptions[subscriptions page]. +==== -**Send requests to Elasticsearch** +==== Send requests to Elasticsearch You send data and other requests to Elasticsearch through REST APIs. You can interact with Elasticsearch using any client that sends HTTP requests, such as the https://www.elastic.co/guide/en/elasticsearch/client/index.html[Elasticsearch language clients] and https://curl.se[curl]. + +===== Using curl + +Here's an example curl command to create a new Elasticsearch index, using basic auth: + +[source,sh] +---- +curl -u elastic:$ELASTIC_PASSWORD \ + -X PUT \ + http://localhost:9200/my-new-index \ + -H 'Content-Type: application/json' +---- +// NOTCONSOLE + +===== Using a language client + +To connect to your local dev Elasticsearch cluster with a language client, you can use basic authentication with the `elastic` username and the password you set in the environment variable. + +You'll use the following connection details: + +* **Elasticsearch endpoint**: `http://localhost:9200` +* **Username**: `elastic` +* **Password**: `$ELASTIC_PASSWORD` (Value you set in the environment variable) + +For example, to connect with the Python `elasticsearch` client: + +[source,python] +---- +import os +from elasticsearch import Elasticsearch + +username = 'elastic' +password = os.getenv('ELASTIC_PASSWORD') # Value you set in the environment variable + +client = Elasticsearch( + "http://localhost:9200", + basic_auth=(username, password) +) + +print(client.info()) +---- + +===== Using the Dev Tools Console + Kibana's developer console provides an easy way to experiment and test requests. -To access the console, go to **Management > Dev Tools**. +To access the console, open Kibana, then go to **Management** > **Dev Tools**. **Add data** diff --git a/build-tools-internal/gradle/wrapper/gradle-wrapper.properties b/build-tools-internal/gradle/wrapper/gradle-wrapper.properties index 515ab9d5f1822..efe2ff3449216 100644 --- a/build-tools-internal/gradle/wrapper/gradle-wrapper.properties +++ b/build-tools-internal/gradle/wrapper/gradle-wrapper.properties @@ -1,7 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionSha256Sum=f8b4f4772d302c8ff580bc40d0f56e715de69b163546944f787c87abf209c961 -distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-all.zip +distributionSha256Sum=258e722ec21e955201e31447b0aed14201765a3bfbae296a46cf60b70e66db70 +distributionUrl=https\://services.gradle.org/distributions/gradle-8.9-all.zip networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/BwcSetupExtension.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/BwcSetupExtension.java index 7010ed92d4c57..4112d96c7296b 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/BwcSetupExtension.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/BwcSetupExtension.java @@ -26,7 +26,6 @@ import org.gradle.api.tasks.TaskProvider; import org.gradle.jvm.toolchain.JavaLanguageVersion; import org.gradle.jvm.toolchain.JavaToolchainService; -import org.gradle.jvm.toolchain.JvmVendorSpec; import java.io.File; import java.io.IOException; @@ -161,10 +160,8 @@ private static TaskProvider createRunBwcGradleTask( /** A convenience method for getting java home for a version of java and requiring that version for the given task to execute */ private static Provider getJavaHome(ObjectFactory objectFactory, JavaToolchainService toolChainService, final int version) { Property value = objectFactory.property(JavaLanguageVersion.class).value(JavaLanguageVersion.of(version)); - return toolChainService.launcherFor(javaToolchainSpec -> { - javaToolchainSpec.getLanguageVersion().value(value); - javaToolchainSpec.getVendor().set(JvmVendorSpec.ORACLE); - }).map(launcher -> launcher.getMetadata().getInstallationPath().getAsFile().getAbsolutePath()); + return toolChainService.launcherFor(javaToolchainSpec -> { javaToolchainSpec.getLanguageVersion().value(value); }) + .map(launcher -> launcher.getMetadata().getInstallationPath().getAsFile().getAbsolutePath()); } private static String readFromFile(File file) { diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java index 4f9498c8f33a6..b513fd7b93631 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java @@ -8,7 +8,7 @@ package org.elasticsearch.gradle.internal; -import com.gradle.scan.plugin.BuildScanExtension; +import com.gradle.develocity.agent.gradle.DevelocityConfiguration; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; @@ -64,7 +64,7 @@ public void apply(Project target) { File targetFile = target.file("build/" + buildNumber + ".tar.bz2"); File projectDir = target.getProjectDir(); File gradleWorkersDir = new File(target.getGradle().getGradleUserHomeDir(), "workers/"); - BuildScanExtension extension = target.getExtensions().getByType(BuildScanExtension.class); + DevelocityConfiguration extension = target.getExtensions().getByType(DevelocityConfiguration.class); File daemonsLogDir = new File(target.getGradle().getGradleUserHomeDir(), "daemon/" + target.getGradle().getGradleVersion()); getFlowScope().always(BuildFinishedFlowAction.class, spec -> { @@ -125,7 +125,7 @@ interface Parameters extends FlowParameters { ListProperty getFilteredFiles(); @Input - Property getBuildScan(); + Property getBuildScan(); } @@ -198,7 +198,7 @@ public void execute(BuildFinishedFlowAction.Parameters parameters) throws FileNo + System.getenv("BUILDKITE_JOB_ID") + "/artifacts/" + artifactUuid; - parameters.getBuildScan().get().link("Artifact Upload", targetLink); + parameters.getBuildScan().get().getBuildScan().link("Artifact Upload", targetLink); } } catch (Exception e) { System.out.println("Failed to upload buildkite artifact " + e.getMessage()); diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/EmbeddedProviderExtension.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/EmbeddedProviderExtension.java index e9e75a711a8ff..03b8f19d10b13 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/EmbeddedProviderExtension.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/EmbeddedProviderExtension.java @@ -43,17 +43,16 @@ void impl(String implName, Project implProject) { }); String manifestTaskName = "generate" + capitalName + "ProviderManifest"; - Provider generatedResourcesDir = project.getLayout().getBuildDirectory().dir("generated-resources"); + Provider generatedResourcesRoot = project.getLayout().getBuildDirectory().dir("generated-resources"); var generateProviderManifest = project.getTasks().register(manifestTaskName, GenerateProviderManifest.class); generateProviderManifest.configure(t -> { - t.getManifestFile().set(generatedResourcesDir.map(d -> d.file("LISTING.TXT"))); + t.getManifestFile().set(generatedResourcesRoot.map(d -> d.dir(manifestTaskName).file("LISTING.TXT"))); t.getProviderImplClasspath().from(implConfig); }); - String implTaskName = "generate" + capitalName + "ProviderImpl"; var generateProviderImpl = project.getTasks().register(implTaskName, Sync.class); generateProviderImpl.configure(t -> { - t.into(generatedResourcesDir); + t.into(generatedResourcesRoot.map(d -> d.dir(implTaskName))); t.into("IMPL-JARS/" + implName, spec -> { spec.from(implConfig); spec.from(generateProviderManifest); diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/info/GlobalBuildInfoPlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/info/GlobalBuildInfoPlugin.java index 42834928bafed..e61bbefc9a973 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/info/GlobalBuildInfoPlugin.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/info/GlobalBuildInfoPlugin.java @@ -51,6 +51,7 @@ import java.util.Arrays; import java.util.List; import java.util.Locale; +import java.util.Optional; import java.util.Random; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; @@ -97,24 +98,25 @@ public void apply(Project project) { JavaVersion minimumCompilerVersion = JavaVersion.toVersion(getResourceContents("/minimumCompilerVersion")); JavaVersion minimumRuntimeVersion = JavaVersion.toVersion(getResourceContents("/minimumRuntimeVersion")); - File runtimeJavaHome = findRuntimeJavaHome(); - boolean isRuntimeJavaHomeSet = Jvm.current().getJavaHome().equals(runtimeJavaHome) == false; + Optional selectedRuntimeJavaHome = findRuntimeJavaHome(); + File actualRuntimeJavaHome = selectedRuntimeJavaHome.orElse(Jvm.current().getJavaHome()); + boolean isRuntimeJavaHomeSet = selectedRuntimeJavaHome.isPresent(); GitInfo gitInfo = GitInfo.gitInfo(project.getRootDir()); BuildParams.init(params -> { params.reset(); - params.setRuntimeJavaHome(runtimeJavaHome); + params.setRuntimeJavaHome(actualRuntimeJavaHome); params.setJavaToolChainSpec(resolveToolchainSpecFromEnv()); params.setRuntimeJavaVersion( determineJavaVersion( "runtime java.home", - runtimeJavaHome, + actualRuntimeJavaHome, isRuntimeJavaHomeSet ? minimumRuntimeVersion : Jvm.current().getJavaVersion() ) ); params.setIsRuntimeJavaHomeSet(isRuntimeJavaHomeSet); - JvmInstallationMetadata runtimeJdkMetaData = metadataDetector.getMetadata(getJavaInstallation(runtimeJavaHome)); + JvmInstallationMetadata runtimeJdkMetaData = metadataDetector.getMetadata(getJavaInstallation(actualRuntimeJavaHome)); params.setRuntimeJavaDetails(formatJavaVendorDetails(runtimeJdkMetaData)); params.setJavaVersions(getAvailableJavaVersions()); params.setMinimumCompilerVersion(minimumCompilerVersion); @@ -298,19 +300,19 @@ private static void assertMinimumCompilerVersion(JavaVersion minimumCompilerVers } } - private File findRuntimeJavaHome() { + private Optional findRuntimeJavaHome() { String runtimeJavaProperty = System.getProperty("runtime.java"); if (runtimeJavaProperty != null) { - return resolveJavaHomeFromToolChainService(runtimeJavaProperty); + return Optional.of(resolveJavaHomeFromToolChainService(runtimeJavaProperty)); } String env = System.getenv("RUNTIME_JAVA_HOME"); if (env != null) { - return new File(env); + return Optional.of(new File(env)); } // fall back to tool chain if set. env = System.getenv("JAVA_TOOLCHAIN_HOME"); - return env == null ? Jvm.current().getJavaHome() : new File(env); + return env == null ? Optional.empty() : Optional.of(new File(env)); } @NotNull @@ -348,7 +350,6 @@ private File resolveJavaHomeFromToolChainService(String version) { Property value = objectFactory.property(JavaLanguageVersion.class).value(JavaLanguageVersion.of(version)); Provider javaLauncherProvider = toolChainService.launcherFor(javaToolchainSpec -> { javaToolchainSpec.getLanguageVersion().value(value); - javaToolchainSpec.getVendor().set(JvmVendorSpec.ORACLE); }); return javaLauncherProvider.get().getMetadata().getInstallationPath().getAsFile(); } diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/precommit/ThirdPartyAuditTask.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/precommit/ThirdPartyAuditTask.java index 4263ef2b1f76f..489cff65976b1 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/precommit/ThirdPartyAuditTask.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/precommit/ThirdPartyAuditTask.java @@ -193,6 +193,11 @@ public Set getMissingClassExcludes() { @SkipWhenEmpty public abstract ConfigurableFileCollection getJarsToScan(); + @Classpath + public FileCollection getClasspath() { + return classpath; + } + @TaskAction public void runThirdPartyAudit() throws IOException { Set jars = getJarsToScan().getFiles(); diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/ReleaseToolsPlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/ReleaseToolsPlugin.java index 08abb02ea831e..ec79fe20492e1 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/ReleaseToolsPlugin.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/ReleaseToolsPlugin.java @@ -52,7 +52,7 @@ public void apply(Project project) { project.getTasks().register("extractCurrentVersions", ExtractCurrentVersionsTask.class); project.getTasks().register("tagVersions", TagVersionsTask.class); - project.getTasks().register("setCompatibleVersions", SetCompatibleVersionsTask.class); + project.getTasks().register("setCompatibleVersions", SetCompatibleVersionsTask.class, t -> t.setThisVersion(version)); final FileTree yamlFiles = projectDirectory.dir("docs/changelog") .getAsFileTree() diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/SetCompatibleVersionsTask.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/SetCompatibleVersionsTask.java index 15e0a0cc345d5..17761e5183b31 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/SetCompatibleVersionsTask.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/SetCompatibleVersionsTask.java @@ -14,6 +14,7 @@ import com.github.javaparser.ast.expr.NameExpr; import com.github.javaparser.printer.lexicalpreservation.LexicalPreservingPrinter; +import org.elasticsearch.gradle.Version; import org.gradle.api.tasks.TaskAction; import org.gradle.api.tasks.options.Option; import org.gradle.initialization.layout.BuildLayout; @@ -28,6 +29,8 @@ public class SetCompatibleVersionsTask extends AbstractVersionsTask { + private Version thisVersion; + private Version releaseVersion; private Map versionIds = Map.of(); @Inject @@ -35,21 +38,35 @@ public SetCompatibleVersionsTask(BuildLayout layout) { super(layout); } + public void setThisVersion(Version version) { + thisVersion = version; + } + @Option(option = "version-id", description = "Version id used for the release. Of the form :.") public void versionIds(List version) { this.versionIds = splitVersionIds(version); } + @Option(option = "release", description = "The version being released") + public void releaseVersion(String version) { + releaseVersion = Version.fromString(version); + } + @TaskAction public void executeTask() throws IOException { if (versionIds.isEmpty()) { throw new IllegalArgumentException("No version ids specified"); } + + if (releaseVersion.getMajor() < thisVersion.getMajor()) { + // don't need to update CCS version - this is for a different major + return; + } + Integer transportVersion = versionIds.get(TRANSPORT_VERSION_TYPE); if (transportVersion == null) { throw new IllegalArgumentException("TransportVersion id not specified"); } - Path versionJava = rootDir.resolve(TRANSPORT_VERSIONS_FILE_PATH); CompilationUnit file = LexicalPreservingPrinter.setup(StaticJavaParser.parse(versionJava)); diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/toolchain/ArchivedOracleJdkToolchainResolver.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/toolchain/ArchivedOracleJdkToolchainResolver.java index b8cffae0189ce..913a15517f0af 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/toolchain/ArchivedOracleJdkToolchainResolver.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/toolchain/ArchivedOracleJdkToolchainResolver.java @@ -23,9 +23,12 @@ import java.util.Map; import java.util.Optional; +/** + * Resolves released Oracle JDKs that are EOL. + */ public abstract class ArchivedOracleJdkToolchainResolver extends AbstractCustomJavaToolchainResolver { - private static final Map ARCHIVED_BASE_VERSIONS = Maps.of(20, "20.0.2", 19, "19.0.2", 18, "18.0.2.1", 17, "17.0.7"); + private static final Map ARCHIVED_BASE_VERSIONS = Maps.of(20, "20.0.2", 19, "19.0.2", 18, "18.0.2.1"); @Override public Optional resolve(JavaToolchainRequest request) { diff --git a/build-tools-internal/src/main/resources/minimumGradleVersion b/build-tools-internal/src/main/resources/minimumGradleVersion index 83ea3179ddacc..f7b1c8ff61774 100644 --- a/build-tools-internal/src/main/resources/minimumGradleVersion +++ b/build-tools-internal/src/main/resources/minimumGradleVersion @@ -1 +1 @@ -8.8 \ No newline at end of file +8.9 \ No newline at end of file diff --git a/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/toolchain/ArchivedOracleJdkToolchainResolverSpec.groovy b/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/toolchain/ArchivedOracleJdkToolchainResolverSpec.groovy index b7f08b6016679..dd6e7b324e745 100644 --- a/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/toolchain/ArchivedOracleJdkToolchainResolverSpec.groovy +++ b/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/toolchain/ArchivedOracleJdkToolchainResolverSpec.groovy @@ -40,12 +40,6 @@ class ArchivedOracleJdkToolchainResolverSpec extends AbstractToolchainResolverSp [18, ORACLE, LINUX, X86_64, "https://download.oracle.com/java/18/archive/jdk-18.0.2.1_linux-x64_bin.tar.gz"], [18, ORACLE, LINUX, AARCH64, "https://download.oracle.com/java/18/archive/jdk-18.0.2.1_linux-aarch64_bin.tar.gz"], [18, ORACLE, WINDOWS, X86_64, "https://download.oracle.com/java/18/archive/jdk-18.0.2.1_windows-x64_bin.zip"], - - [17, ORACLE, MAC_OS, X86_64, "https://download.oracle.com/java/17/archive/jdk-17.0.7_macos-x64_bin.tar.gz"], - [17, ORACLE, MAC_OS, AARCH64, "https://download.oracle.com/java/17/archive/jdk-17.0.7_macos-aarch64_bin.tar.gz"], - [17, ORACLE, LINUX, X86_64, "https://download.oracle.com/java/17/archive/jdk-17.0.7_linux-x64_bin.tar.gz"], - [17, ORACLE, LINUX, AARCH64, "https://download.oracle.com/java/17/archive/jdk-17.0.7_linux-aarch64_bin.tar.gz"], - [17, ORACLE, WINDOWS, X86_64, "https://download.oracle.com/java/17/archive/jdk-17.0.7_windows-x64_bin.zip"] ] } diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 0fa6142789381..0c32da70d3227 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,4 +1,4 @@ -elasticsearch = 8.15.0 +elasticsearch = 8.15.1 lucene = 9.11.1 bundled_jdk_vendor = openjdk diff --git a/build-tools/src/main/java/org/elasticsearch/gradle/DistributionDownloadPlugin.java b/build-tools/src/main/java/org/elasticsearch/gradle/DistributionDownloadPlugin.java index 2bc4aa1a1be36..fe7d303dc522b 100644 --- a/build-tools/src/main/java/org/elasticsearch/gradle/DistributionDownloadPlugin.java +++ b/build-tools/src/main/java/org/elasticsearch/gradle/DistributionDownloadPlugin.java @@ -42,8 +42,10 @@ public class DistributionDownloadPlugin implements Plugin { private static final String FAKE_SNAPSHOT_IVY_GROUP = "elasticsearch-distribution-snapshot"; private static final String DOWNLOAD_REPO_NAME = "elasticsearch-downloads"; private static final String SNAPSHOT_REPO_NAME = "elasticsearch-snapshots"; - public static final String DISTRO_EXTRACTED_CONFIG_PREFIX = "es_distro_extracted_"; - public static final String DISTRO_CONFIG_PREFIX = "es_distro_file_"; + + public static final String ES_DISTRO_CONFIG_PREFIX = "es_distro_"; + public static final String DISTRO_EXTRACTED_CONFIG_PREFIX = ES_DISTRO_CONFIG_PREFIX + "extracted_"; + public static final String DISTRO_CONFIG_PREFIX = ES_DISTRO_CONFIG_PREFIX + "file_"; private final ObjectFactory objectFactory; private NamedDomainObjectContainer distributionsContainer; diff --git a/build-tools/src/main/java/org/elasticsearch/gradle/test/GradleTestPolicySetupPlugin.java b/build-tools/src/main/java/org/elasticsearch/gradle/test/GradleTestPolicySetupPlugin.java index a1da860abe26a..9593a281686e7 100644 --- a/build-tools/src/main/java/org/elasticsearch/gradle/test/GradleTestPolicySetupPlugin.java +++ b/build-tools/src/main/java/org/elasticsearch/gradle/test/GradleTestPolicySetupPlugin.java @@ -22,6 +22,9 @@ public void apply(Project project) { test.systemProperty("tests.gradle", true); test.systemProperty("tests.task", test.getPath()); + // Flag is required for later Java versions since our tests use a custom security manager + test.jvmArgs("-Djava.security.manager=allow"); + SystemPropertyCommandLineArgumentProvider nonInputProperties = new SystemPropertyCommandLineArgumentProvider(); // don't track these as inputs since they contain absolute paths and break cache relocatability nonInputProperties.systemProperty("gradle.dist.lib", gradle.getGradleHomeDir().getAbsolutePath() + "/lib"); diff --git a/build.gradle b/build.gradle index 3869d21b49bfe..01fdace570ce0 100644 --- a/build.gradle +++ b/build.gradle @@ -19,6 +19,7 @@ import org.elasticsearch.gradle.internal.info.BuildParams import org.elasticsearch.gradle.util.GradleUtils import org.gradle.plugins.ide.eclipse.model.AccessRule import org.gradle.plugins.ide.eclipse.model.ProjectDependency +import org.elasticsearch.gradle.DistributionDownloadPlugin import java.nio.file.Files @@ -284,11 +285,16 @@ allprojects { } tasks.register('resolveAllDependencies', ResolveAllDependencies) { - configs = project.configurations + def ignoredPrefixes = [DistributionDownloadPlugin.ES_DISTRO_CONFIG_PREFIX, "jdbcDriver"] + configs = project.configurations.matching { config -> ignoredPrefixes.any { config.name.startsWith(it) } == false } resolveJavaToolChain = true if (project.path.contains("fixture")) { dependsOn tasks.withType(ComposePull) } + if (project.path.contains(":distribution:docker")) { + enabled = false + } + } plugins.withId('lifecycle-base') { diff --git a/docs/changelog/101373.yaml b/docs/changelog/101373.yaml deleted file mode 100644 index 53b5680301c79..0000000000000 --- a/docs/changelog/101373.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 101373 -summary: Adding aggregations support for the `_ignored` field -area: Search -type: feature -issues: - - 59946 diff --git a/docs/changelog/103374.yaml b/docs/changelog/103374.yaml deleted file mode 100644 index fcdee9185eb92..0000000000000 --- a/docs/changelog/103374.yaml +++ /dev/null @@ -1,16 +0,0 @@ -pr: 103374 -summary: Cut over stored fields to ZSTD for compression -area: Search -type: enhancement -issues: [] -highlight: - title: Stored fields are now compressed with ZStandard instead of LZ4/DEFLATE - body: |- - Stored fields are now compressed by splitting documents into blocks, which - are then compressed independently with ZStandard. `index.codec: default` - (default) uses blocks of at most 14kB or 128 documents compressed with level - 0, while `index.codec: best_compression` uses blocks of at most 240kB or - 2048 documents compressed at level 3. On most datasets that we tested - against, this yielded storage improvements in the order of 10%, slightly - faster indexing and similar retrieval latencies. - notable: true diff --git a/docs/changelog/105792.yaml b/docs/changelog/105792.yaml deleted file mode 100644 index 2ad5aa970c214..0000000000000 --- a/docs/changelog/105792.yaml +++ /dev/null @@ -1,18 +0,0 @@ -pr: 105792 -summary: "Change `skip_unavailable` remote cluster setting default value to true" -area: Search -type: breaking -issues: [] -breaking: - title: "Change `skip_unavailable` remote cluster setting default value to true" - area: Cluster and node setting - details: The default value of the `skip_unavailable` setting is now set to true. - All existing and future remote clusters that do not define this setting will use the new default. - This setting only affects cross-cluster searches using the _search or _async_search API. - impact: Unavailable remote clusters in a cross-cluster search will no longer cause the search to fail unless - skip_unavailable is configured to be `false` in elasticsearch.yml or via the `_cluster/settings` API. - Unavailable clusters with `skip_unavailable`=`true` (either explicitly or by using the new default) are marked - as SKIPPED in the search response metadata section and do not fail the entire search. If users want to ensure that a - search returns a failure when a particular remote cluster is not available, `skip_unavailable` must be now be - set explicitly. - notable: false diff --git a/docs/changelog/105829.yaml b/docs/changelog/105829.yaml deleted file mode 100644 index d9f8439e4b887..0000000000000 --- a/docs/changelog/105829.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 105829 -summary: Log shard movements -area: Allocation -type: enhancement -issues: [] diff --git a/docs/changelog/106252.yaml b/docs/changelog/106252.yaml deleted file mode 100644 index 5e3f084632b9d..0000000000000 --- a/docs/changelog/106252.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 106252 -summary: Add min/max range of the `event.ingested` field to cluster state for searchable - snapshots -area: Search -type: enhancement -issues: [] diff --git a/docs/changelog/106253.yaml b/docs/changelog/106253.yaml deleted file mode 100644 index b80cda37f63c7..0000000000000 --- a/docs/changelog/106253.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 106253 -summary: Fix for from parameter when using `sub_searches` and rank -area: Ranking -type: bug -issues: - - 99011 diff --git a/docs/changelog/106486.yaml b/docs/changelog/106486.yaml deleted file mode 100644 index b33df50780e02..0000000000000 --- a/docs/changelog/106486.yaml +++ /dev/null @@ -1,17 +0,0 @@ -pr: 106486 -summary: Create custom parser for ISO-8601 datetimes -area: Infra/Core -type: enhancement -issues: - - 102063 -highlight: - title: New custom parser for ISO-8601 datetimes - body: |- - This introduces a new custom parser for ISO-8601 datetimes, for the `iso8601`, `strict_date_optional_time`, and - `strict_date_optional_time_nanos` built-in date formats. This provides a performance improvement over the - default Java date-time parsing. Whilst it maintains much of the same behaviour, - the new parser does not accept nonsensical date-time strings that have multiple fractional seconds fields - or multiple timezone specifiers. If the new parser fails to parse a string, it will then use the previous parser - to parse it. If a large proportion of the input data consists of these invalid strings, this may cause - a small performance degradation. If you wish to force the use of the old parsers regardless, - set the JVM property `es.datetime.java_time_parsers=true` on all ES nodes. diff --git a/docs/changelog/106553.yaml b/docs/changelog/106553.yaml deleted file mode 100644 index 0ec5b1bb02da8..0000000000000 --- a/docs/changelog/106553.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 106553 -summary: Add support for hiragana_uppercase & katakana_uppercase token filters in kuromoji analysis plugin -area: Search -type: enhancement -issues: [] diff --git a/docs/changelog/106591.yaml b/docs/changelog/106591.yaml deleted file mode 100644 index 6a7814cb9cede..0000000000000 --- a/docs/changelog/106591.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 106591 -summary: Make dense vector field type updatable -area: Search -type: enhancement -issues: [] diff --git a/docs/changelog/106820.yaml b/docs/changelog/106820.yaml deleted file mode 100644 index d854e3984c13d..0000000000000 --- a/docs/changelog/106820.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 106820 -summary: Add a capabilities API to check node and cluster capabilities -area: Infra/REST API -type: feature -issues: [] diff --git a/docs/changelog/107081.yaml b/docs/changelog/107081.yaml deleted file mode 100644 index 2acd2f919b476..0000000000000 --- a/docs/changelog/107081.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107081 -summary: Implement synthetic source support for range fields -area: Mapping -type: feature -issues: [] diff --git a/docs/changelog/107088.yaml b/docs/changelog/107088.yaml deleted file mode 100644 index 01a926f185eea..0000000000000 --- a/docs/changelog/107088.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107088 -summary: Introduce role description field -area: Authorization -type: enhancement -issues: [] diff --git a/docs/changelog/107191.yaml b/docs/changelog/107191.yaml deleted file mode 100644 index 5ef6297c0f3f1..0000000000000 --- a/docs/changelog/107191.yaml +++ /dev/null @@ -1,17 +0,0 @@ -pr: 107191 -summary: Stricter failure handling in multi-repo get-snapshots request handling -area: Snapshot/Restore -type: bug -issues: [] -highlight: - title: Stricter failure handling in multi-repo get-snapshots request handling - body: | - If a multi-repo get-snapshots request encounters a failure in one of the - targeted repositories then earlier versions of Elasticsearch would proceed - as if the faulty repository did not exist, except for a per-repository - failure report in a separate section of the response body. This makes it - impossible to paginate the results properly in the presence of failures. In - versions 8.15.0 and later this API's failure handling behaviour has been - made stricter, reporting an overall failure if any targeted repository's - contents cannot be listed. - notable: true diff --git a/docs/changelog/107216.yaml b/docs/changelog/107216.yaml deleted file mode 100644 index 7144eedf9bea4..0000000000000 --- a/docs/changelog/107216.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107216 -summary: Add per-field KNN vector format to Index Segments API -area: Search -type: enhancement -issues: [] diff --git a/docs/changelog/107240.yaml b/docs/changelog/107240.yaml deleted file mode 100644 index baf4c222a9a27..0000000000000 --- a/docs/changelog/107240.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 107240 -summary: Include doc size info in ingest stats -area: Ingest Node -type: enhancement -issues: - - 106386 diff --git a/docs/changelog/107244.yaml b/docs/changelog/107244.yaml deleted file mode 100644 index f805796674f93..0000000000000 --- a/docs/changelog/107244.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107244 -summary: Support effective watermark thresholds in node stats API -area: Allocation -type: enhancement -issues: [106676] diff --git a/docs/changelog/107279.yaml b/docs/changelog/107279.yaml deleted file mode 100644 index a2940ecc9ba2d..0000000000000 --- a/docs/changelog/107279.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107279 -summary: Introduce _transform/_node_stats API -area: Transform -type: feature -issues: [] diff --git a/docs/changelog/107409.yaml b/docs/changelog/107409.yaml deleted file mode 100644 index 6f2350239772f..0000000000000 --- a/docs/changelog/107409.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107409 -summary: "ESQL: Introduce a casting operator, `::`" -area: ES|QL -type: feature -issues: [] diff --git a/docs/changelog/107410.yaml b/docs/changelog/107410.yaml deleted file mode 100644 index 5026e88cfa762..0000000000000 --- a/docs/changelog/107410.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107410 -summary: Cluster-state based Security role mapper -area: Authorization -type: enhancement -issues: [] diff --git a/docs/changelog/107415.yaml b/docs/changelog/107415.yaml deleted file mode 100644 index 8877d0426c60d..0000000000000 --- a/docs/changelog/107415.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 107415 -summary: Fix `DecayFunctions'` `toString` -area: Search -type: bug -issues: - - 100870 diff --git a/docs/changelog/107426.yaml b/docs/changelog/107426.yaml deleted file mode 100644 index 2feed3df56108..0000000000000 --- a/docs/changelog/107426.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107426 -summary: Support wait indefinitely for search tasks to complete on node shutdown -area: Infra/Node Lifecycle -type: bug -issues: [] diff --git a/docs/changelog/107435.yaml b/docs/changelog/107435.yaml deleted file mode 100644 index ae5d2215419c4..0000000000000 --- a/docs/changelog/107435.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 107435 -summary: '`NoSuchRemoteClusterException` should not be thrown when a remote is configured' -area: Network -type: bug -issues: - - 107381 diff --git a/docs/changelog/107493.yaml b/docs/changelog/107493.yaml deleted file mode 100644 index dfd45e1493c95..0000000000000 --- a/docs/changelog/107493.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107493 -summary: Remote cluster - API key security model - cluster privileges -area: Security -type: enhancement -issues: [] diff --git a/docs/changelog/107545.yaml b/docs/changelog/107545.yaml deleted file mode 100644 index ad457cc5a533f..0000000000000 --- a/docs/changelog/107545.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 107545 -summary: "ESQL: Union Types Support" -area: ES|QL -type: enhancement -issues: - - 100603 diff --git a/docs/changelog/107549.yaml b/docs/changelog/107549.yaml deleted file mode 100644 index 36250cf65b4d9..0000000000000 --- a/docs/changelog/107549.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107549 -summary: Add synthetic source support for binary fields -area: Mapping -type: feature -issues: [] diff --git a/docs/changelog/107567.yaml b/docs/changelog/107567.yaml deleted file mode 100644 index 558b5b570b1fb..0000000000000 --- a/docs/changelog/107567.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107567 -summary: Add ignored field values to synthetic source -area: Mapping -type: enhancement -issues: [] diff --git a/docs/changelog/107579.yaml b/docs/changelog/107579.yaml deleted file mode 100644 index fdee59424b8de..0000000000000 --- a/docs/changelog/107579.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 107579 -summary: Adding `hits_time_in_millis` and `misses_time_in_millis` to enrich cache - stats -area: Ingest Node -type: enhancement -issues: [] diff --git a/docs/changelog/107593.yaml b/docs/changelog/107593.yaml deleted file mode 100644 index 2e3d2cbc80119..0000000000000 --- a/docs/changelog/107593.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107593 -summary: Add auto-sharding APM metrics -area: Infra/Metrics -type: enhancement -issues: [] diff --git a/docs/changelog/107640.yaml b/docs/changelog/107640.yaml deleted file mode 100644 index 9871943481f20..0000000000000 --- a/docs/changelog/107640.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 107640 -summary: "Unified Highlighter to support matched_fields " -area: Highlighting -type: enhancement -issues: - - 5172 diff --git a/docs/changelog/107645.yaml b/docs/changelog/107645.yaml deleted file mode 100644 index 93fc0f2a89b3a..0000000000000 --- a/docs/changelog/107645.yaml +++ /dev/null @@ -1,7 +0,0 @@ -pr: 107645 -summary: Add `_name` support for top level `knn` clauses -area: Search -type: enhancement -issues: - - 106254 - - 107448 diff --git a/docs/changelog/107647.yaml b/docs/changelog/107647.yaml deleted file mode 100644 index 97d98a7c91079..0000000000000 --- a/docs/changelog/107647.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107647 -summary: Adding human readable times to geoip stats -area: Ingest Node -type: enhancement -issues: [] diff --git a/docs/changelog/107663.yaml b/docs/changelog/107663.yaml deleted file mode 100644 index a7c3dc185425a..0000000000000 --- a/docs/changelog/107663.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107663 -summary: Optimize `GeoBounds` and `GeoCentroid` aggregations for single value fields -area: Geo -type: enhancement -issues: [] diff --git a/docs/changelog/107675.yaml b/docs/changelog/107675.yaml deleted file mode 100644 index b1d51cd3f8538..0000000000000 --- a/docs/changelog/107675.yaml +++ /dev/null @@ -1,17 +0,0 @@ -pr: 107675 -summary: Interpret `?timeout=-1` as infinite ack timeout -area: Cluster Coordination -type: breaking -issues: [] -breaking: - title: Interpret `?timeout=-1` as infinite ack timeout - area: REST API - details: | - Today {es} accepts the parameter `?timeout=-1` in many APIs, but interprets - this to mean the same as `?timeout=0`. From 8.15 onwards `?timeout=-1` will - mean to wait indefinitely, aligning the behaviour of this parameter with - other similar parameters such as `?master_timeout`. - impact: | - Use `?timeout=0` to force relevant operations to time out immediately - instead of `?timeout=-1` - notable: false diff --git a/docs/changelog/107676.yaml b/docs/changelog/107676.yaml deleted file mode 100644 index b14bc29e66efd..0000000000000 --- a/docs/changelog/107676.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107676 -summary: Add model download progress to the download task status -area: Machine Learning -type: enhancement -issues: [] diff --git a/docs/changelog/107706.yaml b/docs/changelog/107706.yaml deleted file mode 100644 index 76b7f662bf0e0..0000000000000 --- a/docs/changelog/107706.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107706 -summary: Add rate limiting support for the Inference API -area: Machine Learning -type: enhancement -issues: [] diff --git a/docs/changelog/107735.yaml b/docs/changelog/107735.yaml deleted file mode 100644 index 372cb59ba8b1f..0000000000000 --- a/docs/changelog/107735.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107735 -summary: Implement synthetic source support for annotated text field -area: Mapping -type: feature -issues: [] diff --git a/docs/changelog/107739.yaml b/docs/changelog/107739.yaml deleted file mode 100644 index c55a0e332b4f6..0000000000000 --- a/docs/changelog/107739.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 107739 -summary: Binary field enables doc values by default for index mode with synthe… -area: Mapping -type: enhancement -issues: - - 107554 diff --git a/docs/changelog/107764.yaml b/docs/changelog/107764.yaml deleted file mode 100644 index 3f83efc789014..0000000000000 --- a/docs/changelog/107764.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107764 -summary: Increase size of big arrays only when there is an actual value in the aggregators -area: Aggregations -type: enhancement -issues: [] diff --git a/docs/changelog/107779.yaml b/docs/changelog/107779.yaml deleted file mode 100644 index a41c19a2329e0..0000000000000 --- a/docs/changelog/107779.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 107779 -summary: Allow rescorer with field collapsing -area: Search -type: enhancement -issues: - - 27243 \ No newline at end of file diff --git a/docs/changelog/107792.yaml b/docs/changelog/107792.yaml deleted file mode 100644 index bd9730d49d5d6..0000000000000 --- a/docs/changelog/107792.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107792 -summary: Halt Indexer on Stop/Abort API -area: Transform -type: bug -issues: [] diff --git a/docs/changelog/107813.yaml b/docs/changelog/107813.yaml deleted file mode 100644 index 1cbb518a8be5b..0000000000000 --- a/docs/changelog/107813.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 107813 -summary: Increase size of big arrays only when there is an actual value in the aggregators - (Analytics module) -area: Aggregations -type: enhancement -issues: [] diff --git a/docs/changelog/107827.yaml b/docs/changelog/107827.yaml deleted file mode 100644 index 7cf217567b745..0000000000000 --- a/docs/changelog/107827.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107827 -summary: Add permission to secure access to certain config files -area: Security -type: bug -issues: [] diff --git a/docs/changelog/107832.yaml b/docs/changelog/107832.yaml deleted file mode 100644 index 491c491736005..0000000000000 --- a/docs/changelog/107832.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107832 -summary: Optimise few metric aggregations for single value fields -area: Aggregations -type: enhancement -issues: [] diff --git a/docs/changelog/107862.yaml b/docs/changelog/107862.yaml deleted file mode 100644 index 77f7a8c9fb02a..0000000000000 --- a/docs/changelog/107862.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 107862 -summary: Fix serialization of put-shutdown request -area: Infra/Node Lifecycle -type: bug -issues: - - 107857 diff --git a/docs/changelog/107876.yaml b/docs/changelog/107876.yaml deleted file mode 100644 index 21624cacf7e1d..0000000000000 --- a/docs/changelog/107876.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107876 -summary: "ESQL: Add aggregates node level reduction" -area: ES|QL -type: enhancement -issues: [] diff --git a/docs/changelog/107877.yaml b/docs/changelog/107877.yaml deleted file mode 100644 index cf458b3aa3a42..0000000000000 --- a/docs/changelog/107877.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107877 -summary: Support metrics counter types in ESQL -area: "ES|QL" -type: enhancement -issues: [] diff --git a/docs/changelog/107886.yaml b/docs/changelog/107886.yaml deleted file mode 100644 index a328bc2a2a208..0000000000000 --- a/docs/changelog/107886.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107886 -summary: Cluster state role mapper file settings service -area: Authorization -type: enhancement -issues: [] diff --git a/docs/changelog/107892.yaml b/docs/changelog/107892.yaml deleted file mode 100644 index 5fd5404c48d02..0000000000000 --- a/docs/changelog/107892.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107892 -summary: Optimise cardinality aggregations for single value fields -area: Aggregations -type: enhancement -issues: [] diff --git a/docs/changelog/107893.yaml b/docs/changelog/107893.yaml deleted file mode 100644 index 61f0f4d76e679..0000000000000 --- a/docs/changelog/107893.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107893 -summary: Optimise histogram aggregations for single value fields -area: Aggregations -type: enhancement -issues: [] diff --git a/docs/changelog/107897.yaml b/docs/changelog/107897.yaml deleted file mode 100644 index e4a2a5270475d..0000000000000 --- a/docs/changelog/107897.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107897 -summary: Optimise composite aggregations for single value fields -area: Aggregations -type: enhancement -issues: [] diff --git a/docs/changelog/107917.yaml b/docs/changelog/107917.yaml deleted file mode 100644 index 18125bf46f4b7..0000000000000 --- a/docs/changelog/107917.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 107917 -summary: Exit gracefully when deleted -area: Transform -type: bug -issues: - - 107266 diff --git a/docs/changelog/107922.yaml b/docs/changelog/107922.yaml deleted file mode 100644 index e28d0f6262af4..0000000000000 --- a/docs/changelog/107922.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 107922 -summary: Feature/annotated text store defaults -area: Mapping -type: enhancement -issues: - - 107734 diff --git a/docs/changelog/107930.yaml b/docs/changelog/107930.yaml deleted file mode 100644 index 90af5c55b8604..0000000000000 --- a/docs/changelog/107930.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107930 -summary: Optimise terms aggregations for single value fields -area: Aggregations -type: enhancement -issues: [] diff --git a/docs/changelog/107937.yaml b/docs/changelog/107937.yaml deleted file mode 100644 index 5938c8e8b6602..0000000000000 --- a/docs/changelog/107937.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107937 -summary: Optimise multiterms aggregation for single value fields -area: Aggregations -type: enhancement -issues: [] diff --git a/docs/changelog/107947.yaml b/docs/changelog/107947.yaml deleted file mode 100644 index 637ac3c005779..0000000000000 --- a/docs/changelog/107947.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 107947 -summary: "ESQL: Fix equals `hashCode` for functions" -area: ES|QL -type: bug -issues: - - 104393 diff --git a/docs/changelog/107967.yaml b/docs/changelog/107967.yaml deleted file mode 100644 index 159370e44f236..0000000000000 --- a/docs/changelog/107967.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 107967 -summary: Sort time series indices by time range in `GetDataStreams` API -area: TSDB -type: bug -issues: - - 102088 diff --git a/docs/changelog/107972.yaml b/docs/changelog/107972.yaml deleted file mode 100644 index 3ec83d6a56954..0000000000000 --- a/docs/changelog/107972.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107972 -summary: Require question to be non-null in `QuestionAnsweringConfig` -area: Machine Learning -type: bug -issues: [] diff --git a/docs/changelog/107977.yaml b/docs/changelog/107977.yaml deleted file mode 100644 index fdbbb57d7e48f..0000000000000 --- a/docs/changelog/107977.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 107977 -summary: Fix off by one error when handling null values in range fields -area: Mapping -type: bug -issues: - - 107282 diff --git a/docs/changelog/107978.yaml b/docs/changelog/107978.yaml deleted file mode 100644 index 50115df9ee092..0000000000000 --- a/docs/changelog/107978.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 107978 -summary: Drop shards close timeout when stopping node. -area: Engine -type: enhancement -issues: - - 107938 diff --git a/docs/changelog/107987.yaml b/docs/changelog/107987.yaml deleted file mode 100644 index e8afebde0b190..0000000000000 --- a/docs/changelog/107987.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 107987 -summary: "ESQL: Implement LOOKUP, an \"inline\" enrich" -area: ES|QL -type: enhancement -issues: - - 107306 diff --git a/docs/changelog/107990.yaml b/docs/changelog/107990.yaml deleted file mode 100644 index 80cb96aca4426..0000000000000 --- a/docs/changelog/107990.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 107990 -summary: Optimise `time_series` aggregation for single value fields -area: TSDB -type: enhancement -issues: [] diff --git a/docs/changelog/108016.yaml b/docs/changelog/108016.yaml deleted file mode 100644 index 0aa3f86a6f859..0000000000000 --- a/docs/changelog/108016.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108016 -summary: Optimise `BinaryRangeAggregator` for single value fields -area: Aggregations -type: enhancement -issues: [] diff --git a/docs/changelog/108019.yaml b/docs/changelog/108019.yaml deleted file mode 100644 index 69e8e9fd371f8..0000000000000 --- a/docs/changelog/108019.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108019 -summary: Ignore additional cpu.stat fields -area: Infra/Core -type: bug -issues: - - 107983 diff --git a/docs/changelog/108051.yaml b/docs/changelog/108051.yaml deleted file mode 100644 index a47e1192c6090..0000000000000 --- a/docs/changelog/108051.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108051 -summary: Track synthetic source for disabled objects -area: Mapping -type: enhancement -issues: [] diff --git a/docs/changelog/108065.yaml b/docs/changelog/108065.yaml deleted file mode 100644 index 2ec93bf6e6295..0000000000000 --- a/docs/changelog/108065.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108065 -summary: '`DenseVectorFieldMapper` fixed typo' -area: Mapping -type: bug -issues: [] diff --git a/docs/changelog/108070.yaml b/docs/changelog/108070.yaml deleted file mode 100644 index cde191aa50804..0000000000000 --- a/docs/changelog/108070.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108070 -summary: Redirect `VersionConflict` to reset code -area: Transform -type: bug -issues: [] diff --git a/docs/changelog/108088.yaml b/docs/changelog/108088.yaml deleted file mode 100644 index 95c58f6dc19f1..0000000000000 --- a/docs/changelog/108088.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108088 -summary: Add a SIMD (AVX2) optimised vector distance function for int7 on x64 -area: "Search" -type: enhancement -issues: [] diff --git a/docs/changelog/108089.yaml b/docs/changelog/108089.yaml deleted file mode 100644 index 02fb6349185a6..0000000000000 --- a/docs/changelog/108089.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108089 -summary: "ES|QL: limit query depth to 500 levels" -area: ES|QL -type: bug -issues: - - 107752 diff --git a/docs/changelog/108106.yaml b/docs/changelog/108106.yaml deleted file mode 100644 index e9dd438e620c4..0000000000000 --- a/docs/changelog/108106.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108106 -summary: Simulate should succeed if `ignore_missing_pipeline` -area: Ingest Node -type: bug -issues: - - 107314 diff --git a/docs/changelog/108118.yaml b/docs/changelog/108118.yaml deleted file mode 100644 index b9b0f1c1406e0..0000000000000 --- a/docs/changelog/108118.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108118 -summary: Optimize for single value in ordinals grouping -area: ES|QL -type: enhancement -issues: [] diff --git a/docs/changelog/108122.yaml b/docs/changelog/108122.yaml deleted file mode 100644 index 981ab39b9dad8..0000000000000 --- a/docs/changelog/108122.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108122 -summary: Correct query profiling for conjunctions -area: Search -type: bug -issues: - - 108116 diff --git a/docs/changelog/108130.yaml b/docs/changelog/108130.yaml deleted file mode 100644 index 5b431bdb0cc1b..0000000000000 --- a/docs/changelog/108130.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108130 -summary: Optimise frequent item sets aggregation for single value fields -area: Machine Learning -type: enhancement -issues: [] diff --git a/docs/changelog/108131.yaml b/docs/changelog/108131.yaml deleted file mode 100644 index 7a4286c1e44a0..0000000000000 --- a/docs/changelog/108131.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108131 -summary: "Inference Processor: skip inference when all fields are missing" -area: Machine Learning -type: enhancement -issues: [] diff --git a/docs/changelog/108144.yaml b/docs/changelog/108144.yaml deleted file mode 100644 index 6ff5b1d600d0e..0000000000000 --- a/docs/changelog/108144.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108144 -summary: Bump Tika dependency to 2.9.2 -area: Ingest Node -type: upgrade -issues: [] diff --git a/docs/changelog/108145.yaml b/docs/changelog/108145.yaml deleted file mode 100644 index b8c9428c1e3a8..0000000000000 --- a/docs/changelog/108145.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108145 -summary: Async close of `IndexShard` -area: Engine -type: bug -issues: [] diff --git a/docs/changelog/108146.yaml b/docs/changelog/108146.yaml deleted file mode 100644 index 2a4f917134090..0000000000000 --- a/docs/changelog/108146.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108146 -summary: Allow deletion of the ELSER inference service when reference in ingest -area: Machine Learning -type: bug -issues: [] diff --git a/docs/changelog/108155.yaml b/docs/changelog/108155.yaml deleted file mode 100644 index 57db86b4005b9..0000000000000 --- a/docs/changelog/108155.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108155 -summary: Upgrade to Netty 4.1.109 -area: Network -type: upgrade -issues: [] diff --git a/docs/changelog/108161.yaml b/docs/changelog/108161.yaml deleted file mode 100644 index 73fa41e2089d3..0000000000000 --- a/docs/changelog/108161.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108161 -summary: Refactor TextEmbeddingResults to use primitives rather than objects -area: Machine Learning -type: bug -issues: [] diff --git a/docs/changelog/108165.yaml b/docs/changelog/108165.yaml deleted file mode 100644 index b88b0f5e217dd..0000000000000 --- a/docs/changelog/108165.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108165 -summary: Add `BlockHash` for 3 `BytesRefs` -area: ES|QL -type: enhancement -issues: [] diff --git a/docs/changelog/108171.yaml b/docs/changelog/108171.yaml deleted file mode 100644 index 1ec17bb3e411d..0000000000000 --- a/docs/changelog/108171.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108171 -summary: "add Elastic-internal stable bridge api for use by Logstash" -area: Infra/Core -type: enhancement -issues: [] diff --git a/docs/changelog/108222.yaml b/docs/changelog/108222.yaml deleted file mode 100644 index 701b853441e32..0000000000000 --- a/docs/changelog/108222.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108222 -summary: Add generic fallback implementation for synthetic source -area: Mapping -type: feature -issues: [] diff --git a/docs/changelog/108223.yaml b/docs/changelog/108223.yaml deleted file mode 100644 index ba8756a8f9c68..0000000000000 --- a/docs/changelog/108223.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108223 -summary: Upgrade bouncy castle (non-fips) to 1.78.1 -area: Security -type: upgrade -issues: [] diff --git a/docs/changelog/108227.yaml b/docs/changelog/108227.yaml deleted file mode 100644 index 79f69bc4aaff6..0000000000000 --- a/docs/changelog/108227.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108227 -summary: "Apm-data: improve indexing resilience" -area: Data streams -type: enhancement -issues: [] diff --git a/docs/changelog/108254.yaml b/docs/changelog/108254.yaml deleted file mode 100644 index 3bf08e8b8f5fc..0000000000000 --- a/docs/changelog/108254.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108254 -summary: Add `sparse_vector` query -area: Machine Learning -type: enhancement -issues: [] diff --git a/docs/changelog/108266.yaml b/docs/changelog/108266.yaml deleted file mode 100644 index 5a189cfcdc258..0000000000000 --- a/docs/changelog/108266.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108266 -summary: Log details of non-green indicators in `HealthPeriodicLogger` -area: Health -type: enhancement -issues: [] diff --git a/docs/changelog/108300.yaml b/docs/changelog/108300.yaml deleted file mode 100644 index c4d6e468113a4..0000000000000 --- a/docs/changelog/108300.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108300 -summary: "ESQL: Add more time span units" -area: ES|QL -type: enhancement -issues: [] diff --git a/docs/changelog/108306.yaml b/docs/changelog/108306.yaml deleted file mode 100644 index 7a104ce880f43..0000000000000 --- a/docs/changelog/108306.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108306 -summary: Enable inter-segment concurrency for low cardinality numeric terms aggs -area: Aggregations -type: enhancement -issues: [] diff --git a/docs/changelog/108333.yaml b/docs/changelog/108333.yaml deleted file mode 100644 index c3152500ce1b2..0000000000000 --- a/docs/changelog/108333.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108333 -summary: Allow `read_slm` to call GET /_slm/status -area: ILM+SLM -type: bug -issues: [] diff --git a/docs/changelog/108340.yaml b/docs/changelog/108340.yaml deleted file mode 100644 index fb2ea72c0a0f5..0000000000000 --- a/docs/changelog/108340.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108340 -summary: "Apm-data: increase version for templates" -area: Data streams -type: enhancement -issues: [] diff --git a/docs/changelog/108349.yaml b/docs/changelog/108349.yaml deleted file mode 100644 index 6d9ea3d658dca..0000000000000 --- a/docs/changelog/108349.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108349 -summary: "Ecs@mappings: reduce scope for `ecs_geo_point`" -area: Data streams -type: bug -issues: - - 108338 diff --git a/docs/changelog/108379.yaml b/docs/changelog/108379.yaml deleted file mode 100644 index 312856a5db33d..0000000000000 --- a/docs/changelog/108379.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108379 -summary: Create a new `NodeRequest` for every `NodesDataTiersUsageTransport` use -area: Indices APIs -type: bug -issues: [] diff --git a/docs/changelog/108394.yaml b/docs/changelog/108394.yaml deleted file mode 100644 index 58f48fa548c6e..0000000000000 --- a/docs/changelog/108394.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108394 -summary: Handle `IndexNotFoundException` -area: Transform -type: bug -issues: - - 107263 diff --git a/docs/changelog/108395.yaml b/docs/changelog/108395.yaml deleted file mode 100644 index c33cf169a99fa..0000000000000 --- a/docs/changelog/108395.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108395 -summary: "ESQL: change from quoting from backtick to quote" -area: ES|QL -type: bug -issues: [] diff --git a/docs/changelog/108396.yaml b/docs/changelog/108396.yaml deleted file mode 100644 index 63937646b755c..0000000000000 --- a/docs/changelog/108396.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108396 -summary: "Apm-data: improve default pipeline performance" -area: Data streams -type: enhancement -issues: - - 108290 diff --git a/docs/changelog/108409.yaml b/docs/changelog/108409.yaml deleted file mode 100644 index 6cff86cf93930..0000000000000 --- a/docs/changelog/108409.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108409 -summary: Support multiple associated groups for TopN -area: Application -type: enhancement -issues: - - 108018 diff --git a/docs/changelog/108410.yaml b/docs/changelog/108410.yaml deleted file mode 100644 index 5fd831231a3be..0000000000000 --- a/docs/changelog/108410.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108410 -summary: GeoIP tasks should wait longer for master -area: Ingest Node -type: bug -issues: [] diff --git a/docs/changelog/108417.yaml b/docs/changelog/108417.yaml deleted file mode 100644 index bb650922f1be5..0000000000000 --- a/docs/changelog/108417.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108417 -summary: Track source for arrays of objects -area: Mapping -type: enhancement -issues: - - 90708 diff --git a/docs/changelog/108421.yaml b/docs/changelog/108421.yaml deleted file mode 100644 index 1f077a4a2cb7c..0000000000000 --- a/docs/changelog/108421.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108421 -summary: "[ES|QL] Support Named and Positional Parameters in `EsqlQueryRequest`" -area: ES|QL -type: enhancement -issues: - - 107029 diff --git a/docs/changelog/108429.yaml b/docs/changelog/108429.yaml deleted file mode 100644 index 562454a0de256..0000000000000 --- a/docs/changelog/108429.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108429 -summary: Fix `ClassCastException` in Significant Terms -area: Aggregations -type: bug -issues: - - 108427 diff --git a/docs/changelog/108444.yaml b/docs/changelog/108444.yaml deleted file mode 100644 index c946ab24f939a..0000000000000 --- a/docs/changelog/108444.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108444 -summary: "Apm-data: ignore malformed fields, and too many dynamic fields" -area: Data streams -type: enhancement -issues: [] diff --git a/docs/changelog/108452.yaml b/docs/changelog/108452.yaml deleted file mode 100644 index fdf531602c806..0000000000000 --- a/docs/changelog/108452.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108452 -summary: Add the rerank task to the Elasticsearch internal inference service -area: Machine Learning -type: enhancement -issues: [] diff --git a/docs/changelog/108455.yaml b/docs/changelog/108455.yaml deleted file mode 100644 index 8397af7b07cf1..0000000000000 --- a/docs/changelog/108455.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108455 -summary: "[ES|QL] Convert string to datetime when the other size of an arithmetic\ - \ operator is `date_period` or `time_duration`" -area: ES|QL -type: enhancement -issues: [] diff --git a/docs/changelog/108459.yaml b/docs/changelog/108459.yaml deleted file mode 100644 index 5e05797f284be..0000000000000 --- a/docs/changelog/108459.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108459 -summary: Do not use global ordinals strategy if the leaf reader context cannot be - obtained -area: Machine Learning -type: bug -issues: [] diff --git a/docs/changelog/108472.yaml b/docs/changelog/108472.yaml deleted file mode 100644 index 82481e4edec3a..0000000000000 --- a/docs/changelog/108472.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108472 -summary: Add support for Azure AI Studio embeddings and completions to the inference service. -area: Machine Learning -type: feature -issues: [] diff --git a/docs/changelog/108517.yaml b/docs/changelog/108517.yaml deleted file mode 100644 index 359c8302fdf6c..0000000000000 --- a/docs/changelog/108517.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108517 -summary: Forward `indexServiceSafe` exception to listener -area: Transform -type: bug -issues: - - 108418 diff --git a/docs/changelog/108521.yaml b/docs/changelog/108521.yaml deleted file mode 100644 index adc7c11a4decd..0000000000000 --- a/docs/changelog/108521.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108521 -summary: Adding override for lintian false positive on `libvec.so` -area: "Packaging" -type: bug -issues: - - 108514 diff --git a/docs/changelog/108522.yaml b/docs/changelog/108522.yaml deleted file mode 100644 index 5bc064d7995e9..0000000000000 --- a/docs/changelog/108522.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108522 -summary: Ensure we return non-negative scores when scoring scalar dot-products -area: Vector Search -type: bug -issues: [] diff --git a/docs/changelog/108537.yaml b/docs/changelog/108537.yaml deleted file mode 100644 index 1c0228a71d449..0000000000000 --- a/docs/changelog/108537.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108537 -summary: Limit the value in prefix query -area: Search -type: enhancement -issues: - - 108486 diff --git a/docs/changelog/108538.yaml b/docs/changelog/108538.yaml deleted file mode 100644 index 10ae49f0c1670..0000000000000 --- a/docs/changelog/108538.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108538 -summary: Adding RankFeature search phase implementation -area: Search -type: feature -issues: [] diff --git a/docs/changelog/108574.yaml b/docs/changelog/108574.yaml deleted file mode 100644 index b3c957721e01e..0000000000000 --- a/docs/changelog/108574.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108574 -summary: "[ESQL] CBRT function" -area: ES|QL -type: enhancement -issues: [] diff --git a/docs/changelog/108602.yaml b/docs/changelog/108602.yaml deleted file mode 100644 index d544c89980123..0000000000000 --- a/docs/changelog/108602.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108602 -summary: "[Inference API] Extract optional long instead of integer in `RateLimitSettings#of`" -area: Machine Learning -type: bug -issues: [] diff --git a/docs/changelog/108606.yaml b/docs/changelog/108606.yaml deleted file mode 100644 index 04780bff58800..0000000000000 --- a/docs/changelog/108606.yaml +++ /dev/null @@ -1,14 +0,0 @@ -pr: 108606 -summary: "Extend ISO8601 datetime parser to specify forbidden fields, allowing it to be used\ - \ on more formats" -area: Infra/Core -type: enhancement -issues: [] -highlight: - title: New custom parser for more ISO-8601 date formats - body: |- - Following on from #106486, this extends the custom ISO-8601 datetime parser to cover the `strict_year`, - `strict_year_month`, `strict_date_time`, `strict_date_time_no_millis`, `strict_date_hour_minute_second`, - `strict_date_hour_minute_second_millis`, and `strict_date_hour_minute_second_fraction` date formats. - As before, the parser will use the existing java.time parser if there are parsing issues, and the - `es.datetime.java_time_parsers=true` JVM property will force the use of the old parsers regardless. diff --git a/docs/changelog/108607.yaml b/docs/changelog/108607.yaml deleted file mode 100644 index 9ad4cf91e67b9..0000000000000 --- a/docs/changelog/108607.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108607 -summary: Specify parse index when error occurs on multiple datetime parses -area: Infra/Core -type: bug -issues: [] diff --git a/docs/changelog/108612.yaml b/docs/changelog/108612.yaml deleted file mode 100644 index 7a3dfa2b7ba44..0000000000000 --- a/docs/changelog/108612.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108612 -summary: "[Connector API] Change `UpdateConnectorFiltering` API to have better defaults" -area: Application -type: enhancement -issues: [] diff --git a/docs/changelog/108624.yaml b/docs/changelog/108624.yaml deleted file mode 100644 index 0da1fd2902c03..0000000000000 --- a/docs/changelog/108624.yaml +++ /dev/null @@ -1,12 +0,0 @@ -pr: 108624 -summary: Disallow new rollup jobs in clusters with no rollup usage -area: Rollup -type: breaking -issues: - - 108381 -breaking: - title: Disallow new rollup jobs in clusters with no rollup usage - area: Rollup - details: The put rollup API will fail with an error when a rollup job is created in a cluster with no rollup usage - impact: Clusters with no rollup usage (either no rollup job or index) can not create new rollup jobs - notable: true diff --git a/docs/changelog/108639.yaml b/docs/changelog/108639.yaml deleted file mode 100644 index e4964cbeb0285..0000000000000 --- a/docs/changelog/108639.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108639 -summary: Add support for the 'Domain' database to the geoip processor -area: Ingest Node -type: enhancement -issues: [] diff --git a/docs/changelog/108643.yaml b/docs/changelog/108643.yaml deleted file mode 100644 index f71a943673326..0000000000000 --- a/docs/changelog/108643.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108643 -summary: Use `scheduleUnlessShuttingDown` in `LeaderChecker` -area: Cluster Coordination -type: bug -issues: - - 108642 diff --git a/docs/changelog/108651.yaml b/docs/changelog/108651.yaml deleted file mode 100644 index 227c464909d50..0000000000000 --- a/docs/changelog/108651.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108651 -summary: Add support for the 'ISP' database to the geoip processor -area: Ingest Node -type: enhancement -issues: [] diff --git a/docs/changelog/108672.yaml b/docs/changelog/108672.yaml deleted file mode 100644 index e1261fcf6f232..0000000000000 --- a/docs/changelog/108672.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108672 -summary: Add bounds checking to parsing ISO8601 timezone offset values -area: Infra/Core -type: bug -issues: [] diff --git a/docs/changelog/108679.yaml b/docs/changelog/108679.yaml deleted file mode 100644 index 62cd82a52c5bb..0000000000000 --- a/docs/changelog/108679.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108679 -summary: Suppress deprecation warnings from ingest pipelines when deleting trained model -area: Machine Learning -type: bug -issues: - - 105004 diff --git a/docs/changelog/108682.yaml b/docs/changelog/108682.yaml deleted file mode 100644 index bd566acab8306..0000000000000 --- a/docs/changelog/108682.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108682 -summary: Adding support for explain in rrf -area: Search -type: enhancement -issues: [] diff --git a/docs/changelog/108683.yaml b/docs/changelog/108683.yaml deleted file mode 100644 index b9e7df5fefc18..0000000000000 --- a/docs/changelog/108683.yaml +++ /dev/null @@ -1,14 +0,0 @@ -pr: 108683 -summary: Add support for the 'Connection Type' database to the geoip processor -area: Ingest Node -type: enhancement -issues: [] -highlight: - title: "Preview: Support for the 'Connection Type, 'Domain', and 'ISP' databases in the geoip processor" - body: |- - As a Technical Preview, the {ref}/geoip-processor.html[`geoip`] processor can now use the commercial - https://dev.maxmind.com/geoip/docs/databases/connection-type[GeoIP2 'Connection Type'], - https://dev.maxmind.com/geoip/docs/databases/domain[GeoIP2 'Domain'], - and - https://dev.maxmind.com/geoip/docs/databases/isp[GeoIP2 'ISP'] - databases from MaxMind. diff --git a/docs/changelog/108684.yaml b/docs/changelog/108684.yaml deleted file mode 100644 index 91684d2998be6..0000000000000 --- a/docs/changelog/108684.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108684 -summary: Check if `CsvTests` required capabilities exist -area: ES|QL -type: enhancement -issues: [] diff --git a/docs/changelog/108687.yaml b/docs/changelog/108687.yaml deleted file mode 100644 index 771516d551567..0000000000000 --- a/docs/changelog/108687.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108687 -summary: Adding `user_type` support for the enterprise database for the geoip processor -area: Ingest Node -type: enhancement -issues: [] diff --git a/docs/changelog/108693.yaml b/docs/changelog/108693.yaml deleted file mode 100644 index ee701e0f57736..0000000000000 --- a/docs/changelog/108693.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108693 -summary: Test pipeline run after reroute -area: Ingest Node -type: enhancement -issues: [] diff --git a/docs/changelog/108705.yaml b/docs/changelog/108705.yaml deleted file mode 100644 index fd08734831018..0000000000000 --- a/docs/changelog/108705.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108705 -summary: Associate restore snapshot task to parent mount task -area: Distributed -type: bug -issues: - - 105830 diff --git a/docs/changelog/108713.yaml b/docs/changelog/108713.yaml deleted file mode 100644 index d6b1ddabd6c1e..0000000000000 --- a/docs/changelog/108713.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108713 -summary: Rewrite away type converting functions that do not convert types -area: ES|QL -type: enhancement -issues: - - 107716 diff --git a/docs/changelog/108726.yaml b/docs/changelog/108726.yaml deleted file mode 100644 index 2e800a45e6975..0000000000000 --- a/docs/changelog/108726.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108726 -summary: Allow RA metrics to be reported upon parsing completed or accumulated -area: Infra/Metrics -type: enhancement -issues: [] diff --git a/docs/changelog/108733.yaml b/docs/changelog/108733.yaml deleted file mode 100644 index 76a969219ea4c..0000000000000 --- a/docs/changelog/108733.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108733 -summary: Query Roles API -area: Security -type: feature -issues: [] diff --git a/docs/changelog/108746.yaml b/docs/changelog/108746.yaml deleted file mode 100644 index 93ed917f3b56e..0000000000000 --- a/docs/changelog/108746.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108746 -summary: Support synthetic source for `aggregate_metric_double` when ignore_malf… -area: Mapping -type: feature -issues: [] diff --git a/docs/changelog/108759.yaml b/docs/changelog/108759.yaml deleted file mode 100644 index dfc2b30fe6c57..0000000000000 --- a/docs/changelog/108759.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108759 -summary: Expose `?master_timeout` in autoscaling APIs -area: Autoscaling -type: bug -issues: [] diff --git a/docs/changelog/108761.yaml b/docs/changelog/108761.yaml deleted file mode 100644 index 92aa67ebe0bfe..0000000000000 --- a/docs/changelog/108761.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108761 -summary: Add some missing timeout params to REST API specs -area: Infra/REST API -type: bug -issues: [] diff --git a/docs/changelog/108764.yaml b/docs/changelog/108764.yaml deleted file mode 100644 index 94de27eb52c9b..0000000000000 --- a/docs/changelog/108764.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108764 -summary: ST_DISTANCE Function -area: ES|QL -type: enhancement -issues: - - 108212 diff --git a/docs/changelog/108780.yaml b/docs/changelog/108780.yaml deleted file mode 100644 index 40e66326e6b9b..0000000000000 --- a/docs/changelog/108780.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108780 -summary: Add `continent_code` support to the geoip processor -area: Ingest Node -type: enhancement -issues: - - 85820 diff --git a/docs/changelog/108786.yaml b/docs/changelog/108786.yaml deleted file mode 100644 index 1c07a3ceac900..0000000000000 --- a/docs/changelog/108786.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108786 -summary: Make ingest byte stat names more descriptive -area: Ingest Node -type: enhancement -issues: [] diff --git a/docs/changelog/108793.yaml b/docs/changelog/108793.yaml deleted file mode 100644 index 87668c8ee009b..0000000000000 --- a/docs/changelog/108793.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108793 -summary: Add `SparseVectorStats` -area: Search -type: enhancement -issues: [] diff --git a/docs/changelog/108796.yaml b/docs/changelog/108796.yaml deleted file mode 100644 index 808247cf347d9..0000000000000 --- a/docs/changelog/108796.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108796 -summary: Return ingest byte stats even when 0-valued -area: Ingest Node -type: enhancement -issues: [] diff --git a/docs/changelog/108814.yaml b/docs/changelog/108814.yaml deleted file mode 100644 index 94298838c372e..0000000000000 --- a/docs/changelog/108814.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108814 -summary: Deserialize publish requests on generic thread-pool -area: Cluster Coordination -type: bug -issues: - - 106352 diff --git a/docs/changelog/108818.yaml b/docs/changelog/108818.yaml deleted file mode 100644 index ed60fb5f64abd..0000000000000 --- a/docs/changelog/108818.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108818 -summary: Store source for nested objects -area: Mapping -type: enhancement -issues: [] diff --git a/docs/changelog/108820.yaml b/docs/changelog/108820.yaml deleted file mode 100644 index 55045ffce3dfa..0000000000000 --- a/docs/changelog/108820.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108820 -summary: Allow `LuceneSourceOperator` to early terminate -area: ES|QL -type: enhancement -issues: [] diff --git a/docs/changelog/108822.yaml b/docs/changelog/108822.yaml deleted file mode 100644 index 8cec4da5dbc7f..0000000000000 --- a/docs/changelog/108822.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108822 -summary: Update ASM to 9.7 for plugin scanner -area: Infra/Plugins -type: upgrade -issues: - - 108776 diff --git a/docs/changelog/108831.yaml b/docs/changelog/108831.yaml deleted file mode 100644 index 496bc0108f9d2..0000000000000 --- a/docs/changelog/108831.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108831 -summary: Rename rule query and add support for multiple rulesets -area: Application -type: enhancement -issues: [ ] diff --git a/docs/changelog/108849.yaml b/docs/changelog/108849.yaml deleted file mode 100644 index 7c503efe9187b..0000000000000 --- a/docs/changelog/108849.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108849 -summary: "[Osquery] Extend `kibana_system` role with an access to new `osquery_manager`\ - \ index" -area: Authorization -type: enhancement -issues: [] diff --git a/docs/changelog/108856.yaml b/docs/changelog/108856.yaml deleted file mode 100644 index 9b8f42248a442..0000000000000 --- a/docs/changelog/108856.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108856 -summary: Return noop instance `DocSizeObserver` for updates with scripts -area: Infra/Metrics -type: enhancement -issues: [] diff --git a/docs/changelog/108860.yaml b/docs/changelog/108860.yaml deleted file mode 100644 index 93aa8ce7c08ff..0000000000000 --- a/docs/changelog/108860.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108860 -summary: "Apm-data: enable plugin by default" -area: Data streams -type: enhancement -issues: [] diff --git a/docs/changelog/108862.yaml b/docs/changelog/108862.yaml deleted file mode 100644 index ddba15f11e8f5..0000000000000 --- a/docs/changelog/108862.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108862 -summary: "Apm-data: set codec: best_compression for logs-apm.* data streams" -area: Data streams -type: enhancement -issues: [] diff --git a/docs/changelog/108868.yaml b/docs/changelog/108868.yaml deleted file mode 100644 index d0643f056cce8..0000000000000 --- a/docs/changelog/108868.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108868 -summary: GA the update trained model action -area: Machine Learning -type: enhancement -issues: [] diff --git a/docs/changelog/108870.yaml b/docs/changelog/108870.yaml deleted file mode 100644 index 435eea9845f16..0000000000000 --- a/docs/changelog/108870.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108870 -summary: Adding score from `RankDoc` to `SearchHit` -area: Search -type: bug -issues: [] diff --git a/docs/changelog/108871.yaml b/docs/changelog/108871.yaml deleted file mode 100644 index 46bf8ca9d8404..0000000000000 --- a/docs/changelog/108871.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108871 -summary: "Reapply \"ESQL: Expose \"_ignored\" metadata field\"" -area: ES|QL -type: feature -issues: [] diff --git a/docs/changelog/108878.yaml b/docs/changelog/108878.yaml deleted file mode 100644 index 1a8127869a647..0000000000000 --- a/docs/changelog/108878.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108878 -summary: Support arrays in fallback synthetic source implementation -area: Mapping -type: feature -issues: [] diff --git a/docs/changelog/108881.yaml b/docs/changelog/108881.yaml deleted file mode 100644 index b6de1129cfa03..0000000000000 --- a/docs/changelog/108881.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108881 -summary: Add synthetic source support for `geo_shape` via fallback implementation -area: Mapping -type: feature -issues: [] diff --git a/docs/changelog/108885.yaml b/docs/changelog/108885.yaml deleted file mode 100644 index c66843e082e29..0000000000000 --- a/docs/changelog/108885.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108885 -summary: "Apm-data: increase priority above Fleet templates" -area: Data streams -type: enhancement -issues: [] diff --git a/docs/changelog/108886.yaml b/docs/changelog/108886.yaml deleted file mode 100644 index 18df59e577713..0000000000000 --- a/docs/changelog/108886.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108886 -summary: Expose `?master_timeout` on get-shutdown API -area: Infra/Node Lifecycle -type: bug -issues: [] diff --git a/docs/changelog/108891.yaml b/docs/changelog/108891.yaml deleted file mode 100644 index 8282b616b34a9..0000000000000 --- a/docs/changelog/108891.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108891 -summary: Fix NPE during destination index creation -area: Transform -type: bug -issues: - - 108890 diff --git a/docs/changelog/108895.yaml b/docs/changelog/108895.yaml deleted file mode 100644 index 15293896b20c5..0000000000000 --- a/docs/changelog/108895.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108895 -summary: Add permission to secure access to certain config files specified by settings -area: "Security" -type: bug -issues: [] diff --git a/docs/changelog/108896.yaml b/docs/changelog/108896.yaml deleted file mode 100644 index c52f074b65605..0000000000000 --- a/docs/changelog/108896.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 108896 -summary: Introduce `logs` index mode as Tech Preview -area: Logs -type: feature -issues: - - 108896 diff --git a/docs/changelog/108911.yaml b/docs/changelog/108911.yaml deleted file mode 100644 index 8832e01f7426e..0000000000000 --- a/docs/changelog/108911.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108911 -summary: Store source for fields in objects with `dynamic` override -area: Mapping -type: enhancement -issues: [] diff --git a/docs/changelog/108942.yaml b/docs/changelog/108942.yaml deleted file mode 100644 index c58b06a92cee8..0000000000000 --- a/docs/changelog/108942.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108942 -summary: Fix NPE in trained model assignment updater -area: Machine Learning -type: bug -issues: [] diff --git a/docs/changelog/108947.yaml b/docs/changelog/108947.yaml deleted file mode 100644 index 8aa4293242985..0000000000000 --- a/docs/changelog/108947.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108947 -summary: Provide the `DocumentSizeReporter` with index mode -area: Infra/Metrics -type: enhancement -issues: [] diff --git a/docs/changelog/108999.yaml b/docs/changelog/108999.yaml deleted file mode 100644 index 089d765b4e2d0..0000000000000 --- a/docs/changelog/108999.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 108999 -summary: Use default translog durability on AD results index -area: Machine Learning -type: bug -issues: [] diff --git a/docs/changelog/109007.yaml b/docs/changelog/109007.yaml deleted file mode 100644 index c828db64220fb..0000000000000 --- a/docs/changelog/109007.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109007 -summary: Multivalue Sparse Vector Support -area: Search -type: enhancement -issues: [] diff --git a/docs/changelog/109025.yaml b/docs/changelog/109025.yaml deleted file mode 100644 index 38d19cab13d30..0000000000000 --- a/docs/changelog/109025.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109025 -summary: Introduce a setting controlling the activation of the `logs` index mode in logs@settings -area: Logs -type: feature -issues: - - 108762 diff --git a/docs/changelog/109042.yaml b/docs/changelog/109042.yaml deleted file mode 100644 index 5aa80db991c0d..0000000000000 --- a/docs/changelog/109042.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109042 -summary: Add Create or update query rule API call -area: Application -type: enhancement -issues: [ ] diff --git a/docs/changelog/109043.yaml b/docs/changelog/109043.yaml deleted file mode 100644 index bdfe3addea8e9..0000000000000 --- a/docs/changelog/109043.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109043 -summary: "Apm-data: set concrete values for `metricset.interval`" -area: Data streams -type: bug -issues: [] diff --git a/docs/changelog/109044.yaml b/docs/changelog/109044.yaml deleted file mode 100644 index 9e50c377606a0..0000000000000 --- a/docs/changelog/109044.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109044 -summary: Enable fallback synthetic source for `token_count` -area: Mapping -type: feature -issues: [] diff --git a/docs/changelog/109047.yaml b/docs/changelog/109047.yaml deleted file mode 100644 index 85a8808353a08..0000000000000 --- a/docs/changelog/109047.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109047 -summary: Prevent concurrent jobs during cleanup -area: Transform -type: bug -issues: [] diff --git a/docs/changelog/109070.yaml b/docs/changelog/109070.yaml deleted file mode 100644 index 8dbc0ec1c6cf2..0000000000000 --- a/docs/changelog/109070.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109070 -summary: "ESQL: Add `ip_prefix` function" -area: ES|QL -type: feature -issues: - - 99064 diff --git a/docs/changelog/109071.yaml b/docs/changelog/109071.yaml deleted file mode 100644 index 275a5433cc1d8..0000000000000 --- a/docs/changelog/109071.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109071 -summary: Better handling of multiple rescorers clauses with LTR -area: "Search" -type: bug -issues: [] diff --git a/docs/changelog/109078.yaml b/docs/changelog/109078.yaml deleted file mode 100644 index f602ee9b131bc..0000000000000 --- a/docs/changelog/109078.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109078 -summary: Expose API Key cache metrics -area: Authentication -type: enhancement -issues: [] diff --git a/docs/changelog/109084.yaml b/docs/changelog/109084.yaml deleted file mode 100644 index 67ff5610c5a66..0000000000000 --- a/docs/changelog/109084.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109084 -summary: Add AVX-512 optimised vector distance functions for int7 on x64 -area: Search -type: enhancement -issues: [] diff --git a/docs/changelog/109104.yaml b/docs/changelog/109104.yaml deleted file mode 100644 index 985cf14bc5952..0000000000000 --- a/docs/changelog/109104.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109104 -summary: Offload request to generic threadpool -area: Machine Learning -type: bug -issues: - - 109100 diff --git a/docs/changelog/109123.yaml b/docs/changelog/109123.yaml deleted file mode 100644 index dfd7e52b33e7f..0000000000000 --- a/docs/changelog/109123.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109123 -summary: "[Inference API] Check for related pipelines on delete inference endpoint" -area: Machine Learning -type: enhancement -issues: [] diff --git a/docs/changelog/109126.yaml b/docs/changelog/109126.yaml deleted file mode 100644 index 248eacc76b65c..0000000000000 --- a/docs/changelog/109126.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109126 -summary: Correctly handle duplicate model ids for the `_cat` trained models api and usage statistics -area: Machine Learning -type: bug -issues: [ ] diff --git a/docs/changelog/109167.yaml b/docs/changelog/109167.yaml deleted file mode 100644 index e366b2302263c..0000000000000 --- a/docs/changelog/109167.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109167 -summary: Fixes cluster state-based role mappings not recovered from disk -area: Authorization -type: bug -issues: [] diff --git a/docs/changelog/109174.yaml b/docs/changelog/109174.yaml deleted file mode 100644 index 5cd57ebd34ac6..0000000000000 --- a/docs/changelog/109174.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109174 -summary: "ESQL: Change \"substring\" function to not return null on empty string" -area: ES|QL -type: bug -issues: [] diff --git a/docs/changelog/109185.yaml b/docs/changelog/109185.yaml deleted file mode 100644 index 4da72c4b20ffb..0000000000000 --- a/docs/changelog/109185.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109185 -summary: Handle unmatching remote cluster wildcards properly for `IndicesRequest.SingleIndexNoWildcards` - requests -area: Authorization -type: bug -issues: [] diff --git a/docs/changelog/109194.yaml b/docs/changelog/109194.yaml deleted file mode 100644 index bf50139547f62..0000000000000 --- a/docs/changelog/109194.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109194 -summary: "[Inference API] Add Mistral Embeddings Support to Inference API" -area: Machine Learning -type: enhancement -issues: [ ] diff --git a/docs/changelog/109196.yaml b/docs/changelog/109196.yaml deleted file mode 100644 index 7f5ca3efbc8d4..0000000000000 --- a/docs/changelog/109196.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109196 -summary: Handle nullable `DocsStats` and `StoresStats` -area: Distributed -type: bug -issues: [] diff --git a/docs/changelog/109204.yaml b/docs/changelog/109204.yaml deleted file mode 100644 index b5b22ef1a06f9..0000000000000 --- a/docs/changelog/109204.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109204 -summary: Detect long-running tasks on network threads -area: Network -type: enhancement -issues: [] diff --git a/docs/changelog/109205.yaml b/docs/changelog/109205.yaml deleted file mode 100644 index 10f13a6549fbc..0000000000000 --- a/docs/changelog/109205.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109205 -summary: "ESQL: Fix `IpPrefix` function not handling correctly `ByteRefs`" -area: ES|QL -type: bug -issues: - - 109198 diff --git a/docs/changelog/109219.yaml b/docs/changelog/109219.yaml deleted file mode 100644 index abf4f49235166..0000000000000 --- a/docs/changelog/109219.yaml +++ /dev/null @@ -1,15 +0,0 @@ -pr: 109219 -summary: Update Lucene version to 9.11 -area: Search -type: feature -issues: [] -highlight: - title: "Update Elasticsearch to Lucene 9.11" - body: |- - Elasticsearch is now updated using the latest Lucene version 9.11. - Here are the full release notes: - But, here are some particular highlights: - - Usage of MADVISE for better memory management: https://github.com/apache/lucene/pull/13196 - - Use RWLock to access LRUQueryCache to reduce contention: https://github.com/apache/lucene/pull/13306 - - Speedup multi-segment HNSW graph search for nested kNN queries: https://github.com/apache/lucene/pull/13121 - - Add a MemorySegment Vector scorer - for scoring without copying on-heap vectors: https://github.com/apache/lucene/pull/13339 diff --git a/docs/changelog/109220.yaml b/docs/changelog/109220.yaml deleted file mode 100644 index b8efa8f784d7a..0000000000000 --- a/docs/changelog/109220.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109220 -summary: "ESQL: add REPEAT string function" -area: ES|QL -type: enhancement -issues: [] diff --git a/docs/changelog/109233.yaml b/docs/changelog/109233.yaml deleted file mode 100644 index 36010273c80db..0000000000000 --- a/docs/changelog/109233.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109233 -summary: Fix trappy timeouts in security settings APIs -area: Security -type: bug -issues: [] diff --git a/docs/changelog/109236.yaml b/docs/changelog/109236.yaml deleted file mode 100644 index e2eb917ea0343..0000000000000 --- a/docs/changelog/109236.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109236 -summary: Use proper executor for failing requests when connection closes -area: Network -type: bug -issues: - - 109225 diff --git a/docs/changelog/109240.yaml b/docs/changelog/109240.yaml deleted file mode 100644 index a9fad3abdc47f..0000000000000 --- a/docs/changelog/109240.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109240 -summary: Fix trappy timeout in allocation explain API -area: Allocation -type: bug -issues: [] diff --git a/docs/changelog/109241.yaml b/docs/changelog/109241.yaml deleted file mode 100644 index b7343b9df1841..0000000000000 --- a/docs/changelog/109241.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109241 -summary: Fix misc trappy allocation API timeouts -area: Allocation -type: bug -issues: [] diff --git a/docs/changelog/109256.yaml b/docs/changelog/109256.yaml deleted file mode 100644 index 30c15ed77f9b9..0000000000000 --- a/docs/changelog/109256.yaml +++ /dev/null @@ -1,7 +0,0 @@ -pr: 109256 -summary: "[ESQL] Migrate `SimplifyComparisonArithmetics` optimization" -area: ES|QL -type: bug -issues: - - 108388 - - 108743 diff --git a/docs/changelog/109312.yaml b/docs/changelog/109312.yaml deleted file mode 100644 index 594d3f90e8fd1..0000000000000 --- a/docs/changelog/109312.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109312 -summary: Enable fallback synthetic source for `point` and `shape` -area: Mapping -type: feature -issues: [] diff --git a/docs/changelog/109317.yaml b/docs/changelog/109317.yaml deleted file mode 100644 index 1d8595d99c2a6..0000000000000 --- a/docs/changelog/109317.yaml +++ /dev/null @@ -1,13 +0,0 @@ -pr: 109317 -summary: Add new int4 quantization to dense_vector -area: Search -type: feature -issues: [] -highlight: - title: Add new int4 quantization to dense_vector - body: |- - New int4 (half-byte) scalar quantization support via two knew index types: `int4_hnsw` and `int4_flat`. - This gives an 8x reduction from `float32` with some accuracy loss. In addition to less memory required, this - improves query and merge speed significantly when compared to raw vectors. - notable: true - diff --git a/docs/changelog/109332.yaml b/docs/changelog/109332.yaml deleted file mode 100644 index 3d03523fd518b..0000000000000 --- a/docs/changelog/109332.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109332 -summary: "ES|QL: vectorize eval" -area: ES|QL -type: enhancement -issues: [] diff --git a/docs/changelog/109341.yaml b/docs/changelog/109341.yaml deleted file mode 100644 index 0c1eaa98a8aa2..0000000000000 --- a/docs/changelog/109341.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109341 -summary: Re-define `index.mapper.dynamic` setting in 8.x for a better 7.x to 8.x upgrade if this setting is used. -area: Mapping -type: bug -issues: [] diff --git a/docs/changelog/109358.yaml b/docs/changelog/109358.yaml deleted file mode 100644 index af47b4129d874..0000000000000 --- a/docs/changelog/109358.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109358 -summary: Use the multi node routing action for internal inference services -area: Machine Learning -type: bug -issues: [] diff --git a/docs/changelog/109359.yaml b/docs/changelog/109359.yaml deleted file mode 100644 index 37202eb5a28ec..0000000000000 --- a/docs/changelog/109359.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109359 -summary: Adding hamming distance function to painless for `dense_vector` fields -area: Vector Search -type: enhancement -issues: [] diff --git a/docs/changelog/109370.yaml b/docs/changelog/109370.yaml deleted file mode 100644 index 32b190d1a1c94..0000000000000 --- a/docs/changelog/109370.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109370 -summary: Enable fallback synthetic source by default -area: Mapping -type: feature -issues: - - 106460 diff --git a/docs/changelog/109384.yaml b/docs/changelog/109384.yaml deleted file mode 100644 index 303da23d57d8e..0000000000000 --- a/docs/changelog/109384.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109384 -summary: Fix serialising inference delete response -area: Machine Learning -type: bug -issues: [] diff --git a/docs/changelog/109386.yaml b/docs/changelog/109386.yaml deleted file mode 100644 index 984ee96dde063..0000000000000 --- a/docs/changelog/109386.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109386 -summary: "ESQL: `top_list` aggregation" -area: ES|QL -type: feature -issues: - - 109213 diff --git a/docs/changelog/109395.yaml b/docs/changelog/109395.yaml deleted file mode 100644 index e5336695afa48..0000000000000 --- a/docs/changelog/109395.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109395 -summary: Correct positioning for unique token filter -area: Analysis -type: bug -issues: [] diff --git a/docs/changelog/109410.yaml b/docs/changelog/109410.yaml deleted file mode 100644 index e8c4dcdab42c6..0000000000000 --- a/docs/changelog/109410.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109410 -summary: Support synthetic source for date fields when `ignore_malformed` is used -area: Mapping -type: enhancement -issues: [] diff --git a/docs/changelog/109444.yaml b/docs/changelog/109444.yaml deleted file mode 100644 index 8c56fe2dd9f02..0000000000000 --- a/docs/changelog/109444.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109444 -summary: "Aggs: Scripted metric allow list" -area: Aggregations -type: enhancement -issues: [] diff --git a/docs/changelog/109449.yaml b/docs/changelog/109449.yaml deleted file mode 100644 index 90cb908227f1b..0000000000000 --- a/docs/changelog/109449.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109449 -summary: Reset max page size to settings value -area: Transform -type: bug -issues: - - 109308 diff --git a/docs/changelog/109462.yaml b/docs/changelog/109462.yaml deleted file mode 100644 index a05f4a04e80ae..0000000000000 --- a/docs/changelog/109462.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109462 -summary: Add `wait_for_completion` parameter to delete snapshot request -area: Distributed -type: enhancement -issues: - - 101300 diff --git a/docs/changelog/109470.yaml b/docs/changelog/109470.yaml deleted file mode 100644 index 837c1664b775a..0000000000000 --- a/docs/changelog/109470.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109470 -summary: Enabling profiling for `RankBuilders` and adding tests for RRF -area: Ranking -type: enhancement -issues: [] diff --git a/docs/changelog/109480.yaml b/docs/changelog/109480.yaml deleted file mode 100644 index 3a6f48e9bd840..0000000000000 --- a/docs/changelog/109480.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109480 -summary: "[Connector API] Add claim sync job endpoint" -area: Application -type: feature -issues: [] diff --git a/docs/changelog/109481.yaml b/docs/changelog/109481.yaml deleted file mode 100644 index e8251788a90bd..0000000000000 --- a/docs/changelog/109481.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109481 -summary: Fork freeing search/scroll contexts to GENERIC pool -area: Search -type: bug -issues: [] diff --git a/docs/changelog/109487.yaml b/docs/changelog/109487.yaml deleted file mode 100644 index c69c77203f12d..0000000000000 --- a/docs/changelog/109487.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109487 -summary: Start Trained Model Deployment API request query params now override body params -area: Machine Learning -type: bug -issues: [] diff --git a/docs/changelog/109492.yaml b/docs/changelog/109492.yaml deleted file mode 100644 index d4d1e83eb7786..0000000000000 --- a/docs/changelog/109492.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109492 -summary: Add hexstring support byte painless scorers -area: Search -type: bug -issues: [] diff --git a/docs/changelog/109500.yaml b/docs/changelog/109500.yaml deleted file mode 100644 index cfd6bc770d5d6..0000000000000 --- a/docs/changelog/109500.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109500 -summary: Guard file settings readiness on file settings support -area: Infra/Settings -type: bug -issues: [] diff --git a/docs/changelog/109506.yaml b/docs/changelog/109506.yaml deleted file mode 100644 index 3a7570ed0b93a..0000000000000 --- a/docs/changelog/109506.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109506 -summary: Support synthetic source for `scaled_float` and `unsigned_long` when `ignore_malformed` - is used -area: Mapping -type: enhancement -issues: [] diff --git a/docs/changelog/109533.yaml b/docs/changelog/109533.yaml deleted file mode 100644 index 5720410e5f370..0000000000000 --- a/docs/changelog/109533.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109533 -summary: Fix IndexOutOfBoundsException during inference -area: Machine Learning -type: bug -issues: [] diff --git a/docs/changelog/109534.yaml b/docs/changelog/109534.yaml deleted file mode 100644 index c6eb520bb70a8..0000000000000 --- a/docs/changelog/109534.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109534 -summary: Propagate accurate deployment timeout -area: Machine Learning -type: bug -issues: - - 109407 diff --git a/docs/changelog/109540.yaml b/docs/changelog/109540.yaml deleted file mode 100644 index 722c60a30fb97..0000000000000 --- a/docs/changelog/109540.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109540 -summary: Add metrics@custom component template to metrics-*-* index template -area: Data streams -type: enhancement -issues: - - 109475 diff --git a/docs/changelog/109551.yaml b/docs/changelog/109551.yaml deleted file mode 100644 index f4949669091d9..0000000000000 --- a/docs/changelog/109551.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109551 -summary: Avoid `InferenceRunner` deadlock -area: Machine Learning -type: bug -issues: [] diff --git a/docs/changelog/109554.yaml b/docs/changelog/109554.yaml deleted file mode 100644 index 4e78a8f3044c7..0000000000000 --- a/docs/changelog/109554.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109554 -summary: "[Query Rules] Add API calls to get or delete individual query rules within\ - \ a ruleset" -area: Relevance -type: enhancement -issues: [] diff --git a/docs/changelog/109563.yaml b/docs/changelog/109563.yaml deleted file mode 100644 index 9099064b6b040..0000000000000 --- a/docs/changelog/109563.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109563 -summary: Add allocation explain output for THROTTLING shards -area: Infra/Core -type: enhancement -issues: [] diff --git a/docs/changelog/109597.yaml b/docs/changelog/109597.yaml deleted file mode 100644 index 9b99df85da6a3..0000000000000 --- a/docs/changelog/109597.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109597 -summary: Opt `scripted_metric` out of parallelization -area: Aggregations -type: feature -issues: [] diff --git a/docs/changelog/109603.yaml b/docs/changelog/109603.yaml deleted file mode 100644 index 2d6e8b94aa8d0..0000000000000 --- a/docs/changelog/109603.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109603 -summary: Update translog `writeLocation` for `flushListener` after commit -area: Engine -type: enhancement -issues: [] diff --git a/docs/changelog/109606.yaml b/docs/changelog/109606.yaml deleted file mode 100644 index 6c9089c4c4fde..0000000000000 --- a/docs/changelog/109606.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109606 -summary: Avoid NPE if `users_roles` file does not exist -area: Authentication -type: bug -issues: [] diff --git a/docs/changelog/109613.yaml b/docs/changelog/109613.yaml deleted file mode 100644 index 21d152ac1d6de..0000000000000 --- a/docs/changelog/109613.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109613 -summary: Consider `error_trace` supported by all endpoints -area: Infra/REST API -type: bug -issues: - - 109612 diff --git a/docs/changelog/109618.yaml b/docs/changelog/109618.yaml deleted file mode 100644 index f28bb15a53d96..0000000000000 --- a/docs/changelog/109618.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109618 -summary: Fail cluster state API if blocked -area: Cluster Coordination -type: bug -issues: - - 107503 diff --git a/docs/changelog/109629.yaml b/docs/changelog/109629.yaml deleted file mode 100644 index c468388117b72..0000000000000 --- a/docs/changelog/109629.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109629 -summary: "[Data streams] Fix the description of the lazy rollover task" -area: Data streams -type: bug -issues: [] diff --git a/docs/changelog/109632.yaml b/docs/changelog/109632.yaml deleted file mode 100644 index 6b04160bbdbec..0000000000000 --- a/docs/changelog/109632.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109632 -summary: Force execute inactive sink reaper -area: ES|QL -type: bug -issues: [] diff --git a/docs/changelog/109634.yaml b/docs/changelog/109634.yaml deleted file mode 100644 index 4c6358578b6de..0000000000000 --- a/docs/changelog/109634.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109634 -summary: "[Query Rules] Require Enterprise License for Query Rules" -area: Relevance -type: enhancement -issues: [] diff --git a/docs/changelog/109636.yaml b/docs/changelog/109636.yaml deleted file mode 100644 index f8f73a75dfd3d..0000000000000 --- a/docs/changelog/109636.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109636 -summary: "Ensure a lazy rollover request will rollover the target data stream once." -area: Data streams -type: bug -issues: [] diff --git a/docs/changelog/109651.yaml b/docs/changelog/109651.yaml deleted file mode 100644 index 982e6a5b536cc..0000000000000 --- a/docs/changelog/109651.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109651 -summary: Support synthetic source for `geo_point` when `ignore_malformed` is used -area: Mapping -type: enhancement -issues: [] diff --git a/docs/changelog/109653.yaml b/docs/changelog/109653.yaml deleted file mode 100644 index 665163ec2a91b..0000000000000 --- a/docs/changelog/109653.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109653 -summary: Handle the "JSON memory allocator bytes" field -area: Machine Learning -type: enhancement -issues: [] diff --git a/docs/changelog/109657.yaml b/docs/changelog/109657.yaml deleted file mode 100644 index 35b315b7568c9..0000000000000 --- a/docs/changelog/109657.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109657 -summary: Track `RequestedRangeNotSatisfiedException` separately in S3 Metrics -area: Snapshot/Restore -type: enhancement -issues: [] diff --git a/docs/changelog/109672.yaml b/docs/changelog/109672.yaml deleted file mode 100644 index bb6532ab7accf..0000000000000 --- a/docs/changelog/109672.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109672 -summary: Log repo UUID at generation/registration time -area: Snapshot/Restore -type: enhancement -issues: [] diff --git a/docs/changelog/109695.yaml b/docs/changelog/109695.yaml deleted file mode 100644 index f922b76412676..0000000000000 --- a/docs/changelog/109695.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109695 -summary: Fix ESQL cancellation for exchange requests -area: ES|QL -type: bug -issues: [] diff --git a/docs/changelog/109717.yaml b/docs/changelog/109717.yaml deleted file mode 100644 index 326657ea4ce21..0000000000000 --- a/docs/changelog/109717.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109717 -summary: Bump jackson version in modules:repository-azure -area: Snapshot/Restore -type: upgrade -issues: [] diff --git a/docs/changelog/109720.yaml b/docs/changelog/109720.yaml deleted file mode 100644 index b029726c84427..0000000000000 --- a/docs/changelog/109720.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109720 -summary: "DocsStats: Add human readable bytesize" -area: Stats -type: enhancement -issues: [] diff --git a/docs/changelog/109746.yaml b/docs/changelog/109746.yaml deleted file mode 100644 index 5360f545333ac..0000000000000 --- a/docs/changelog/109746.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109746 -summary: ES|QL Add primitive float support to the Compute Engine -area: ES|QL -type: enhancement -issues: - - 109178 diff --git a/docs/changelog/109779.yaml b/docs/changelog/109779.yaml deleted file mode 100644 index 4ccd8d475ec8d..0000000000000 --- a/docs/changelog/109779.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109779 -summary: Include component templates in retention validaiton -area: Data streams -type: bug -issues: [] diff --git a/docs/changelog/109781.yaml b/docs/changelog/109781.yaml deleted file mode 100644 index df74645b53d84..0000000000000 --- a/docs/changelog/109781.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109781 -summary: ES|QL Add primitive float variants of all aggregators to the compute engine -area: ES|QL -type: enhancement -issues: [] diff --git a/docs/changelog/109794.yaml b/docs/changelog/109794.yaml deleted file mode 100644 index d244c69a903ba..0000000000000 --- a/docs/changelog/109794.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109794 -summary: Provide document size reporter with `MapperService` -area: Infra/Metrics -type: bug -issues: [] diff --git a/docs/changelog/109807.yaml b/docs/changelog/109807.yaml deleted file mode 100644 index 5cf8a2c896c4e..0000000000000 --- a/docs/changelog/109807.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109807 -summary: "ESQL: Fix LOOKUP attribute shadowing" -area: ES|QL -type: bug -issues: - - 109392 diff --git a/docs/changelog/109813.yaml b/docs/changelog/109813.yaml deleted file mode 100644 index edcef17e87606..0000000000000 --- a/docs/changelog/109813.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109813 -summary: Add text similarity reranker retriever -area: Ranking -type: feature -issues: [] diff --git a/docs/changelog/109824.yaml b/docs/changelog/109824.yaml deleted file mode 100644 index 987e8c0a8b1a2..0000000000000 --- a/docs/changelog/109824.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109824 -summary: Check array size before returning array item in script doc values -area: Infra/Scripting -type: bug -issues: - - 104998 diff --git a/docs/changelog/109848.yaml b/docs/changelog/109848.yaml deleted file mode 100644 index 858bbe84ef3a4..0000000000000 --- a/docs/changelog/109848.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109848 -summary: Denser in-memory representation of `ShardBlobsToDelete` -area: Snapshot/Restore -type: enhancement -issues: [] diff --git a/docs/changelog/109850.yaml b/docs/changelog/109850.yaml deleted file mode 100644 index 0f11318765aea..0000000000000 --- a/docs/changelog/109850.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109850 -summary: Ensure tasks preserve versions in `MasterService` -area: Cluster Coordination -type: bug -issues: [] diff --git a/docs/changelog/109873.yaml b/docs/changelog/109873.yaml deleted file mode 100644 index c77197cc22d0a..0000000000000 --- a/docs/changelog/109873.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109873 -summary: "ESQL: add Arrow dataframes output format" -area: ES|QL -type: feature -issues: [] diff --git a/docs/changelog/109876.yaml b/docs/changelog/109876.yaml deleted file mode 100644 index 4a65b4e17c4a3..0000000000000 --- a/docs/changelog/109876.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109876 -summary: Always pick the user `maxPageSize` value -area: Transform -type: bug -issues: - - 109844 diff --git a/docs/changelog/109880.yaml b/docs/changelog/109880.yaml deleted file mode 100644 index 71c7209824a8a..0000000000000 --- a/docs/changelog/109880.yaml +++ /dev/null @@ -1,10 +0,0 @@ -pr: 109880 -summary: Deprecate `text_expansion` and `weighted_tokens` queries -area: Machine Learning -type: deprecation -issues: [ ] -deprecation: - title: Deprecate `text_expansion` and `weighted_tokens` queries - area: REST API - details: The `text_expansion` and `weighted_tokens` queries have been replaced by `sparse_vector`. - impact: Please update your existing `text_expansion` and `weighted_tokens` queries to use `sparse_vector.` diff --git a/docs/changelog/109882.yaml b/docs/changelog/109882.yaml deleted file mode 100644 index 0f0fed01c5a7a..0000000000000 --- a/docs/changelog/109882.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109882 -summary: Support synthetic source together with `ignore_malformed` in histogram fields -area: Mapping -type: enhancement -issues: [] diff --git a/docs/changelog/109893.yaml b/docs/changelog/109893.yaml deleted file mode 100644 index df6d6e51236c8..0000000000000 --- a/docs/changelog/109893.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109893 -summary: Add Anthropic messages integration to Inference API -area: Machine Learning -type: enhancement -issues: [ ] diff --git a/docs/changelog/109908.yaml b/docs/changelog/109908.yaml deleted file mode 100644 index cdf2acf17096c..0000000000000 --- a/docs/changelog/109908.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109908 -summary: "Update checkpoints after post-replication actions, even on failure" -area: CRUD -type: bug -issues: [] diff --git a/docs/changelog/109931.yaml b/docs/changelog/109931.yaml deleted file mode 100644 index 3575cfd49176f..0000000000000 --- a/docs/changelog/109931.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109931 -summary: Apply FLS to the contents of `IgnoredSourceFieldMapper` -area: Mapping -type: enhancement -issues: [] diff --git a/docs/changelog/109948.yaml b/docs/changelog/109948.yaml deleted file mode 100644 index 3f5a281781bcf..0000000000000 --- a/docs/changelog/109948.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109948 -summary: Automatically adjust `ignore_malformed` only for the @timestamp -area: Mapping -type: bug -issues: [] diff --git a/docs/changelog/109957.yaml b/docs/changelog/109957.yaml deleted file mode 100644 index 6bbcd8175501c..0000000000000 --- a/docs/changelog/109957.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109957 -summary: Add request metric to `RestController` to track success/failure (by status - code) -area: Infra/Metrics -type: enhancement -issues: [] diff --git a/docs/changelog/109963.yaml b/docs/changelog/109963.yaml deleted file mode 100644 index 1745d549582d4..0000000000000 --- a/docs/changelog/109963.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 109963 -summary: Propagate mapper builder context flags across nested mapper builder context - creation -area: Mapping -type: bug -issues: [] diff --git a/docs/changelog/109967.yaml b/docs/changelog/109967.yaml deleted file mode 100644 index cfc6b6462954b..0000000000000 --- a/docs/changelog/109967.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109967 -summary: Default the HF service to cosine similarity -area: Machine Learning -type: enhancement -issues: [] diff --git a/docs/changelog/109981.yaml b/docs/changelog/109981.yaml deleted file mode 100644 index cf9388f79e29c..0000000000000 --- a/docs/changelog/109981.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109981 -summary: Limit number of synonym rules that can be created -area: Mapping -type: bug -issues: [108785] diff --git a/docs/changelog/109989.yaml b/docs/changelog/109989.yaml deleted file mode 100644 index f1f5972b60eb3..0000000000000 --- a/docs/changelog/109989.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109989 -summary: "ESQL: Fix Join references" -area: ES|QL -type: bug -issues: [] diff --git a/docs/changelog/109993.yaml b/docs/changelog/109993.yaml deleted file mode 100644 index 40d161b6b5c24..0000000000000 --- a/docs/changelog/109993.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 109993 -summary: "[ES|QL] `weighted_avg`" -area: ES|QL -type: enhancement -issues: [] diff --git a/docs/changelog/110004.yaml b/docs/changelog/110004.yaml deleted file mode 100644 index f680016527a9c..0000000000000 --- a/docs/changelog/110004.yaml +++ /dev/null @@ -1,11 +0,0 @@ -pr: 110004 -summary: Mark Query Rules as GA -area: Relevance -type: feature -issues: [] -highlight: - title: Mark Query Rules as GA - body: |- - This PR marks query rules as Generally Available. All APIs are no longer - in tech preview. - notable: true diff --git a/docs/changelog/110016.yaml b/docs/changelog/110016.yaml deleted file mode 100644 index 28ad55aa796c8..0000000000000 --- a/docs/changelog/110016.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 110016 -summary: Opt in keyword field into fallback synthetic source if needed -area: Mapping -type: enhancement -issues: [] diff --git a/docs/changelog/110019.yaml b/docs/changelog/110019.yaml deleted file mode 100644 index 632e79008d351..0000000000000 --- a/docs/changelog/110019.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 110019 -summary: Improve mechanism for extracting the result of a `PlainActionFuture` -area: Distributed -type: enhancement -issues: - - 108125 diff --git a/docs/changelog/110035.yaml b/docs/changelog/110035.yaml deleted file mode 100644 index 670c58240d835..0000000000000 --- a/docs/changelog/110035.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 110035 -summary: Fix equals and hashcode for `SingleValueQuery.LuceneQuery` -area: ES|QL -type: bug -issues: [] diff --git a/docs/changelog/110046.yaml b/docs/changelog/110046.yaml deleted file mode 100644 index 6ebe440e7aced..0000000000000 --- a/docs/changelog/110046.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 110046 -summary: "ESQL: make named params objects truly per request" -area: ES|QL -type: bug -issues: - - 110028 diff --git a/docs/changelog/110059.yaml b/docs/changelog/110059.yaml deleted file mode 100644 index ba160c091cdc2..0000000000000 --- a/docs/changelog/110059.yaml +++ /dev/null @@ -1,32 +0,0 @@ -pr: 110059 -summary: Adds new `bit` `element_type` for `dense_vectors` -area: Vector Search -type: feature -issues: [] -highlight: - title: Adds new `bit` `element_type` for `dense_vectors` - body: |- - This adds `bit` vector support by adding `element_type: bit` for - vectors. This new element type works for indexed and non-indexed - vectors. Additionally, it works with `hnsw` and `flat` index types. No - quantization based codec works with this element type, this is - consistent with `byte` vectors. - - `bit` vectors accept up to `32768` dimensions in size and expect vectors - that are being indexed to be encoded either as a hexidecimal string or a - `byte[]` array where each element of the `byte` array represents `8` - bits of the vector. - - `bit` vectors support script usage and regular query usage. When - indexed, all comparisons done are `xor` and `popcount` summations (aka, - hamming distance), and the scores are transformed and normalized given - the vector dimensions. - - For scripts, `l1norm` is the same as `hamming` distance and `l2norm` is - `sqrt(l1norm)`. `dotProduct` and `cosineSimilarity` are not supported. - - Note, the dimensions expected by this element_type are always to be - divisible by `8`, and the `byte[]` vectors provided for index must be - have size `dim/8` size, where each byte element represents `8` bits of - the vectors. - notable: true diff --git a/docs/changelog/110066.yaml b/docs/changelog/110066.yaml deleted file mode 100644 index 920c6304b63ae..0000000000000 --- a/docs/changelog/110066.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 110066 -summary: Support flattened fields and multi-fields as dimensions in downsampling -area: Downsampling -type: bug -issues: - - 99297 diff --git a/docs/changelog/110096.yaml b/docs/changelog/110096.yaml deleted file mode 100644 index 3d6616c289266..0000000000000 --- a/docs/changelog/110096.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 110096 -summary: Fix `ClassCastException` with MV_EXPAND on missing field -area: ES|QL -type: bug -issues: - - 109974 diff --git a/docs/changelog/110102.yaml b/docs/changelog/110102.yaml deleted file mode 100644 index d1b9b53e2dfc5..0000000000000 --- a/docs/changelog/110102.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 110102 -summary: Optimize ST_DISTANCE filtering with Lucene circle intersection query -area: ES|QL -type: enhancement -issues: - - 109972 diff --git a/docs/changelog/110103.yaml b/docs/changelog/110103.yaml deleted file mode 100644 index 9f613ec2b446e..0000000000000 --- a/docs/changelog/110103.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 110103 -summary: Fix automatic tracking of collapse with `docvalue_fields` -area: Search -type: bug -issues: [] diff --git a/docs/changelog/110112.yaml b/docs/changelog/110112.yaml deleted file mode 100644 index eca5fd9af15ce..0000000000000 --- a/docs/changelog/110112.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 110112 -summary: Increase response size limit for batched requests -area: Machine Learning -type: bug -issues: [] diff --git a/docs/changelog/110146.yaml b/docs/changelog/110146.yaml deleted file mode 100644 index 61ba35cec319b..0000000000000 --- a/docs/changelog/110146.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 110146 -summary: Fix trailing slash in `ml.get_categories` specification -area: Machine Learning -type: bug -issues: [] diff --git a/docs/changelog/110160.yaml b/docs/changelog/110160.yaml deleted file mode 100644 index 0c38c23c69067..0000000000000 --- a/docs/changelog/110160.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 110160 -summary: Opt in number fields into fallback synthetic source when doc values a… -area: Mapping -type: enhancement -issues: [] diff --git a/docs/changelog/110176.yaml b/docs/changelog/110176.yaml deleted file mode 100644 index ae1d7d10d6dc4..0000000000000 --- a/docs/changelog/110176.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 110176 -summary: Fix trailing slash in two rollup specifications -area: Rollup -type: bug -issues: [] diff --git a/docs/changelog/110177.yaml b/docs/changelog/110177.yaml deleted file mode 100644 index 0ac5328d88df4..0000000000000 --- a/docs/changelog/110177.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 110177 -summary: Fix trailing slash in `security.put_privileges` specification -area: Authorization -type: bug -issues: [] diff --git a/docs/changelog/110179.yaml b/docs/changelog/110179.yaml deleted file mode 100644 index b99a390c8586f..0000000000000 --- a/docs/changelog/110179.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 110179 -summary: Make repository analysis API available to non-operators -area: Snapshot/Restore -type: enhancement -issues: - - 100318 diff --git a/docs/changelog/110186.yaml b/docs/changelog/110186.yaml deleted file mode 100644 index 23eaab118e2ab..0000000000000 --- a/docs/changelog/110186.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 110186 -summary: Don't sample calls to `ReduceContext#consumeBucketsAndMaybeBreak` ins `InternalDateHistogram` - and `InternalHistogram` during reduction -area: Aggregations -type: bug -issues: [] diff --git a/docs/changelog/110201.yaml b/docs/changelog/110201.yaml deleted file mode 100644 index a880638881948..0000000000000 --- a/docs/changelog/110201.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 110201 -summary: "ES|QL: Fix DISSECT that overwrites input" -area: ES|QL -type: bug -issues: - - 110184 diff --git a/docs/changelog/110214.yaml b/docs/changelog/110214.yaml deleted file mode 100644 index 20f61cac64454..0000000000000 --- a/docs/changelog/110214.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 110214 -summary: Handle `ignore_above` in synthetic source for flattened fields -area: Mapping -type: enhancement -issues: [] diff --git a/docs/changelog/110233.yaml b/docs/changelog/110233.yaml deleted file mode 100644 index d9ce4057090a4..0000000000000 --- a/docs/changelog/110233.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 110233 -summary: Support k parameter for knn query -area: Vector Search -type: enhancement -issues: - - 108473 diff --git a/docs/changelog/110234.yaml b/docs/changelog/110234.yaml deleted file mode 100644 index 0656ba5fb6636..0000000000000 --- a/docs/changelog/110234.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 110234 -summary: Upgrade to Lucene-9.11.1 -area: Search -type: upgrade -issues: [] diff --git a/docs/changelog/110236.yaml b/docs/changelog/110236.yaml deleted file mode 100644 index e2dbff7fbf768..0000000000000 --- a/docs/changelog/110236.yaml +++ /dev/null @@ -1,21 +0,0 @@ -pr: 110236 -summary: '`ParseHeapRatioOrDeprecatedByteSizeValue` for `indices.breaker.total.limit`' -area: Infra/Settings -type: deprecation -issues: [] -deprecation: - title: 'Deprecate absolute size values for `indices.breaker.total.limit` setting' - area: Cluster and node setting - details: Previously, the value of `indices.breaker.total.limit` could be specified as - an absolute size in bytes. This setting controls the overal amount of - memory the server is allowed to use before taking remedial actions. Setting - this to a specific number of bytes led to strange behaviour when the node - maximum heap size changed because the circut breaker limit would remain - unchanged. This would either leave the value too low, causing part of the - heap to remain unused; or it would leave the value too high, causing the - circuit breaker to be ineffective at preventing OOM errors. The only - reasonable behaviour for this setting is that it scales with the size of - the heap, and so absolute byte limits are now deprecated. - impact: Users must change their configuration to specify a percentage instead of - an absolute number of bytes for `indices.breaker.total.limit`, or else - accept the default, which is already specified as a percentage. diff --git a/docs/changelog/110251.yaml b/docs/changelog/110251.yaml deleted file mode 100644 index a3b0c3128be35..0000000000000 --- a/docs/changelog/110251.yaml +++ /dev/null @@ -1,13 +0,0 @@ -pr: 110251 -summary: Support index sorting with nested fields -area: Logs -type: enhancement -issues: - - 107349 -highlight: - title: Index sorting on indexes with nested fields - body: |- - Index sorting is now supported for indexes with mappings containing nested objects. - The index sort spec (as specified by `index.sort.field`) can't contain any nested - fields, still. - notable: false diff --git a/docs/changelog/110268.yaml b/docs/changelog/110268.yaml deleted file mode 100644 index adfb467f92e8b..0000000000000 --- a/docs/changelog/110268.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 110268 -summary: Disallow index.time_series.end_time setting from being set or updated in normal indices -area: TSDB -type: bug -issues: - - 110265 diff --git a/docs/changelog/110334.yaml b/docs/changelog/110334.yaml deleted file mode 100644 index f83ac04ded773..0000000000000 --- a/docs/changelog/110334.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 110334 -summary: Sentence Chunker -area: Machine Learning -type: enhancement -issues: [] diff --git a/docs/changelog/110337.yaml b/docs/changelog/110337.yaml deleted file mode 100644 index bf21a95c9157f..0000000000000 --- a/docs/changelog/110337.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 110337 -summary: Support `ignore_above` on keyword dimensions -area: TSDB -type: enhancement -issues: [] diff --git a/docs/changelog/110338.yaml b/docs/changelog/110338.yaml deleted file mode 100644 index 2334a1cbc9283..0000000000000 --- a/docs/changelog/110338.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 110338 -summary: Add `semantic_text` field type and `semantic` query -area: Mapping -type: feature -issues: [] diff --git a/docs/changelog/110347.yaml b/docs/changelog/110347.yaml deleted file mode 100644 index 8727128230935..0000000000000 --- a/docs/changelog/110347.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 110347 -summary: "ESQL: Renamed `TopList` to Top" -area: ES|QL -type: enhancement -issues: [] diff --git a/docs/changelog/110361.yaml b/docs/changelog/110361.yaml deleted file mode 100644 index 8558c88e06049..0000000000000 --- a/docs/changelog/110361.yaml +++ /dev/null @@ -1,7 +0,0 @@ -pr: 110361 -summary: Don't detect `PlainActionFuture` deadlock on concurrent complete -area: Distributed -type: bug -issues: - - 110181 - - 110360 diff --git a/docs/changelog/110369.yaml b/docs/changelog/110369.yaml deleted file mode 100644 index 770294605b444..0000000000000 --- a/docs/changelog/110369.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 110369 -summary: Run terms concurrently when cardinality is only lower than shard size -area: Aggregations -type: bug -issues: - - 105505 diff --git a/docs/changelog/110383.yaml b/docs/changelog/110383.yaml deleted file mode 100644 index 5e9bddd4bfcd2..0000000000000 --- a/docs/changelog/110383.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 110383 -summary: Add bulk delete roles API -area: Security -type: enhancement -issues: [] diff --git a/docs/changelog/110391.yaml b/docs/changelog/110391.yaml deleted file mode 100644 index 1e00eda970398..0000000000000 --- a/docs/changelog/110391.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 110391 -summary: Fix ST_DISTANCE Lucene push-down for complex predicates -area: ES|QL -type: bug -issues: - - 110349 diff --git a/docs/changelog/110395.yaml b/docs/changelog/110395.yaml deleted file mode 100644 index 690be55abb5b2..0000000000000 --- a/docs/changelog/110395.yaml +++ /dev/null @@ -1,9 +0,0 @@ -pr: 110395 -summary: Mark the Redact processor as Generally Available -area: Ingest Node -type: feature -issues: [] -highlight: - title: The Redact processor is Generally Available - body: The Redact processor uses the Grok rules engine to obscure text in the input document matching the given Grok patterns. The Redact processor was initially released as Technical Preview in `8.7.0`, and is now released as Generally Available. - notable: true diff --git a/docs/changelog/110431.yaml b/docs/changelog/110431.yaml deleted file mode 100644 index 0dd93ef718ef9..0000000000000 --- a/docs/changelog/110431.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 110431 -summary: "[Inference API] Fix serialization for inference delete endpoint response" -area: Machine Learning -type: bug -issues: [] diff --git a/docs/changelog/111475.yaml b/docs/changelog/111475.yaml new file mode 100644 index 0000000000000..264c975444868 --- /dev/null +++ b/docs/changelog/111475.yaml @@ -0,0 +1,6 @@ +pr: 111475 +summary: "ESQL: Fix for overzealous validation in case of invalid mapped fields" +area: ES|QL +type: bug +issues: + - 111452 diff --git a/docs/changelog/111646.yaml b/docs/changelog/111646.yaml new file mode 100644 index 0000000000000..4973dfa0d2cec --- /dev/null +++ b/docs/changelog/111646.yaml @@ -0,0 +1,5 @@ +pr: 111646 +summary: "[Inference API] Move Delete inference checks to threadpool worker" +area: Machine Learning +type: bug +issues: [] diff --git a/docs/changelog/111673.yaml b/docs/changelog/111673.yaml new file mode 100644 index 0000000000000..ebc211633fcab --- /dev/null +++ b/docs/changelog/111673.yaml @@ -0,0 +1,5 @@ +pr: 111673 +summary: Properly handle filters on `TextSimilarityRank` retriever +area: Ranking +type: bug +issues: [] diff --git a/docs/changelog/111729.yaml b/docs/changelog/111729.yaml new file mode 100644 index 0000000000000..c75c14a997da9 --- /dev/null +++ b/docs/changelog/111729.yaml @@ -0,0 +1,6 @@ +pr: 111729 +summary: Speed up dense/sparse vector stats +area: Vector Search +type: bug +issues: + - 111715 diff --git a/docs/changelog/111756.yaml b/docs/changelog/111756.yaml new file mode 100644 index 0000000000000..e58345dbe696a --- /dev/null +++ b/docs/changelog/111756.yaml @@ -0,0 +1,6 @@ +pr: 111756 +summary: Fix `NullPointerException` when doing knn search on empty index without dims +area: Vector Search +type: bug +issues: + - 111733 diff --git a/docs/changelog/111758.yaml b/docs/changelog/111758.yaml new file mode 100644 index 0000000000000..c95cdf48bc8a7 --- /dev/null +++ b/docs/changelog/111758.yaml @@ -0,0 +1,6 @@ +pr: 111758 +summary: Revert "Avoid bucket copies in Aggs" +area: Aggregations +type: bug +issues: + - 111679 diff --git a/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc index 3efb8f6de9b3e..e37118019a55c 100644 --- a/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc @@ -85,45 +85,45 @@ Additional settings are: <> search analyzers to pick up changes to synonym files. Only to be used for search analyzers. * `expand` (defaults to `true`). -* `lenient` (defaults to `false`). If `true` ignores exceptions while parsing the synonym configuration. It is important -to note that only those synonym rules which cannot get parsed are ignored. For instance consider the following request: - -[source,console] --------------------------------------------------- -PUT /test_index -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "synonym": { - "tokenizer": "standard", - "filter": [ "my_stop", "synonym_graph" ] - } - }, - "filter": { - "my_stop": { - "type": "stop", - "stopwords": [ "bar" ] - }, - "synonym_graph": { - "type": "synonym_graph", - "lenient": true, - "synonyms": [ "foo, bar => baz" ] - } - } - } - } - } -} --------------------------------------------------- +Expands definitions for equivalent synonym rules. +See <>. +* `lenient` (defaults to `false`). +If `true` ignores errors while parsing the synonym configuration. +It is important to note that only those synonym rules which cannot get parsed are ignored. +See <> for an example of `lenient` behaviour for invalid synonym rules. + +[discrete] +[[synonym-graph-tokenizer-expand-equivalent-synonyms]] +===== `expand` equivalent synonym rules + +The `expand` parameter controls whether to expand equivalent synonym rules. +Consider a synonym defined like: + +`foo, bar, baz` + +Using `expand: true`, the synonym rule would be expanded into: -With the above request the word `bar` gets skipped but a mapping `foo => baz` is still added. However, if the mapping -being added was `foo, baz => bar` nothing would get added to the synonym list. This is because the target word for the -mapping is itself eliminated because it was a stop word. Similarly, if the mapping was "bar, foo, baz" and `expand` was -set to `false` no mapping would get added as when `expand=false` the target mapping is the first word. However, if -`expand=true` then the mappings added would be equivalent to `foo, baz => foo, baz` i.e, all mappings other than the -stop word. +``` +foo => foo +foo => bar +foo => baz +bar => foo +bar => bar +bar => baz +baz => foo +baz => bar +baz => baz +``` + +When `expand` is set to `false`, the synonym rule is not expanded and the first synonym is treated as the canonical representation. The synonym would be equivalent to: + +``` +foo => foo +bar => foo +baz => foo +``` + +The `expand` parameter does not affect explicit synonym rules, like `foo, bar => baz`. [discrete] [[synonym-graph-tokenizer-ignore_case-deprecated]] @@ -160,12 +160,65 @@ Text will be processed first through filters preceding the synonym filter before {es} will also use the token filters preceding the synonym filter in a tokenizer chain to parse the entries in a synonym file or synonym set. In the above example, the synonyms graph token filter is placed after a stemmer. The stemmer will also be applied to the synonym entries. -The synonym rules should not contain words that are removed by a filter that appears later in the chain (like a `stop` filter). -Removing a term from a synonym rule means there will be no matching for it at query time. - Because entries in the synonym map cannot have stacked positions, some token filters may cause issues here. Token filters that produce multiple versions of a token may choose which version of the token to emit when parsing synonyms. For example, `asciifolding` will only produce the folded version of the token. Others, like `multiplexer`, `word_delimiter_graph` or `ngram` will throw an error. If you need to build analyzers that include both multi-token filters and synonym filters, consider using the <> filter, with the multi-token filters in one branch and the synonym filter in the other. + +[discrete] +[[synonym-graph-tokenizer-stop-token-filter]] +===== Synonyms and `stop` token filters + +Synonyms and <> interact with each other in the following ways: + +[discrete] +====== Stop token filter *before* synonym token filter + +Stop words will be removed from the synonym rule definition. +This can can cause errors on the synonym rule. + +[WARNING] +==== +Invalid synonym rules can cause errors when applying analyzer changes. +For reloadable analyzers, this prevents reloading and applying changes. +You must correct errors in the synonym rules and reload the analyzer. + +An index with invalid synonym rules cannot be reopened, making it inoperable when: + +* A node containing the index starts +* The index is opened from a closed state +* A node restart occurs (which reopens the node assigned shards) +==== + +For *explicit synonym rules* like `foo, bar => baz` with a stop filter that removes `bar`: + +- If `lenient` is set to `false`, an error will be raised as `bar` would be removed from the left hand side of the synonym rule. +- If `lenient` is set to `true`, the rule `foo => baz` will be added and `bar => baz` will be ignored. + +If the stop filter removed `baz` instead: + +- If `lenient` is set to `false`, an error will be raised as `baz` would be removed from the right hand side of the synonym rule. +- If `lenient` is set to `true`, the synonym will have no effect as the target word is removed. + +For *equivalent synonym rules* like `foo, bar, baz` and `expand: true, with a stop filter that removes `bar`: + +- If `lenient` is set to `false`, an error will be raised as `bar` would be removed from the synonym rule. +- If `lenient` is set to `true`, the synonyms added would be equivalent to the following synonym rules, which do not contain the removed word: + +``` +foo => foo +foo => baz +baz => foo +baz => baz +``` + +[discrete] +====== Stop token filter *after* synonym token filter + +The stop filter will remove the terms from the resulting synonym expansion. + +For example, a synonym rule like `foo, bar => baz` and a stop filter that removes `baz` will get no matches for `foo` or `bar`, as both would get expanded to `baz` which is removed by the stop filter. + +If the stop filter removed `foo` instead, then searching for `foo` would get expanded to `baz`, which is not removed by the stop filter thus potentially providing matches for `baz`. diff --git a/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc index 046cd297b5092..1658f016db60b 100644 --- a/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc @@ -73,47 +73,45 @@ Additional settings are: <> search analyzers to pick up changes to synonym files. Only to be used for search analyzers. * `expand` (defaults to `true`). -* `lenient` (defaults to `false`). If `true` ignores exceptions while parsing the synonym configuration. It is important -to note that only those synonym rules which cannot get parsed are ignored. For instance consider the following request: - - -[source,console] --------------------------------------------------- -PUT /test_index -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "synonym": { - "tokenizer": "standard", - "filter": [ "my_stop", "synonym" ] - } - }, - "filter": { - "my_stop": { - "type": "stop", - "stopwords": [ "bar" ] - }, - "synonym": { - "type": "synonym", - "lenient": true, - "synonyms": [ "foo, bar => baz" ] - } - } - } - } - } -} --------------------------------------------------- +Expands definitions for equivalent synonym rules. +See <>. +* `lenient` (defaults to `false`). +If `true` ignores errors while parsing the synonym configuration. +It is important to note that only those synonym rules which cannot get parsed are ignored. +See <> for an example of `lenient` behaviour for invalid synonym rules. + +[discrete] +[[synonym-tokenizer-expand-equivalent-synonyms]] +===== `expand` equivalent synonym rules + +The `expand` parameter controls whether to expand equivalent synonym rules. +Consider a synonym defined like: + +`foo, bar, baz` + +Using `expand: true`, the synonym rule would be expanded into: -With the above request the word `bar` gets skipped but a mapping `foo => baz` is still added. However, if the mapping -being added was `foo, baz => bar` nothing would get added to the synonym list. This is because the target word for the -mapping is itself eliminated because it was a stop word. Similarly, if the mapping was "bar, foo, baz" and `expand` was -set to `false` no mapping would get added as when `expand=false` the target mapping is the first word. However, if -`expand=true` then the mappings added would be equivalent to `foo, baz => foo, baz` i.e, all mappings other than the -stop word. +``` +foo => foo +foo => bar +foo => baz +bar => foo +bar => bar +bar => baz +baz => foo +baz => bar +baz => baz +``` +When `expand` is set to `false`, the synonym rule is not expanded and the first synonym is treated as the canonical representation. The synonym would be equivalent to: + +``` +foo => foo +bar => foo +baz => foo +``` + +The `expand` parameter does not affect explicit synonym rules, like `foo, bar => baz`. [discrete] [[synonym-tokenizer-ignore_case-deprecated]] @@ -135,7 +133,7 @@ To apply synonyms, you will need to include a synonym token filters into an anal "my_analyzer": { "type": "custom", "tokenizer": "standard", - "filter": ["stemmer", "synonym_graph"] + "filter": ["stemmer", "synonym"] } } ---- @@ -148,10 +146,7 @@ Order is important for your token filters. Text will be processed first through filters preceding the synonym filter before being processed by the synonym filter. {es} will also use the token filters preceding the synonym filter in a tokenizer chain to parse the entries in a synonym file or synonym set. -In the above example, the synonyms graph token filter is placed after a stemmer. The stemmer will also be applied to the synonym entries. - -The synonym rules should not contain words that are removed by a filter that appears later in the chain (like a `stop` filter). -Removing a term from a synonym rule means there will be no matching for it at query time. +In the above example, the synonyms token filter is placed after a stemmer. The stemmer will also be applied to the synonym entries. Because entries in the synonym map cannot have stacked positions, some token filters may cause issues here. Token filters that produce multiple versions of a token may choose which version of the token to emit when parsing synonyms. @@ -159,3 +154,59 @@ For example, `asciifolding` will only produce the folded version of the token. Others, like `multiplexer`, `word_delimiter_graph` or `ngram` will throw an error. If you need to build analyzers that include both multi-token filters and synonym filters, consider using the <> filter, with the multi-token filters in one branch and the synonym filter in the other. + +[discrete] +[[synonym-tokenizer-stop-token-filter]] +===== Synonyms and `stop` token filters + +Synonyms and <> interact with each other in the following ways: + +[discrete] +====== Stop token filter *before* synonym token filter + +Stop words will be removed from the synonym rule definition. +This can can cause errors on the synonym rule. + +[WARNING] +==== +Invalid synonym rules can cause errors when applying analyzer changes. +For reloadable analyzers, this prevents reloading and applying changes. +You must correct errors in the synonym rules and reload the analyzer. + +An index with invalid synonym rules cannot be reopened, making it inoperable when: + +* A node containing the index starts +* The index is opened from a closed state +* A node restart occurs (which reopens the node assigned shards) +==== + +For *explicit synonym rules* like `foo, bar => baz` with a stop filter that removes `bar`: + +- If `lenient` is set to `false`, an error will be raised as `bar` would be removed from the left hand side of the synonym rule. +- If `lenient` is set to `true`, the rule `foo => baz` will be added and `bar => baz` will be ignored. + +If the stop filter removed `baz` instead: + +- If `lenient` is set to `false`, an error will be raised as `baz` would be removed from the right hand side of the synonym rule. +- If `lenient` is set to `true`, the synonym will have no effect as the target word is removed. + +For *equivalent synonym rules* like `foo, bar, baz` and `expand: true, with a stop filter that removes `bar`: + +- If `lenient` is set to `false`, an error will be raised as `bar` would be removed from the synonym rule. +- If `lenient` is set to `true`, the synonyms added would be equivalent to the following synonym rules, which do not contain the removed word: + +``` +foo => foo +foo => baz +baz => foo +baz => baz +``` + +[discrete] +====== Stop token filter *after* synonym token filter + +The stop filter will remove the terms from the resulting synonym expansion. + +For example, a synonym rule like `foo, bar => baz` and a stop filter that removes `baz` will get no matches for `foo` or `bar`, as both would get expanded to `baz` which is removed by the stop filter. + +If the stop filter removed `foo` instead, then searching for `foo` would get expanded to `baz`, which is not removed by the stop filter thus potentially providing matches for `baz`. diff --git a/docs/reference/analysis/tokenfilters/synonyms-format.asciidoc b/docs/reference/analysis/tokenfilters/synonyms-format.asciidoc index 63dd72dade8d0..e780c24963312 100644 --- a/docs/reference/analysis/tokenfilters/synonyms-format.asciidoc +++ b/docs/reference/analysis/tokenfilters/synonyms-format.asciidoc @@ -15,7 +15,7 @@ This format uses two different definitions: ipod, i-pod, i pod computer, pc, laptop ---- -* Explicit mappings: Matches a group of words to other words. Words on the left hand side of the rule definition are expanded into all the possibilities described on the right hand side. Example: +* Explicit synonyms: Matches a group of words to other words. Words on the left hand side of the rule definition are expanded into all the possibilities described on the right hand side. Example: + [source,synonyms] ---- diff --git a/docs/reference/data-streams/data-streams.asciidoc b/docs/reference/data-streams/data-streams.asciidoc index 9c7137563caef..1484e21febdb3 100644 --- a/docs/reference/data-streams/data-streams.asciidoc +++ b/docs/reference/data-streams/data-streams.asciidoc @@ -157,4 +157,5 @@ include::set-up-a-data-stream.asciidoc[] include::use-a-data-stream.asciidoc[] include::change-mappings-and-settings.asciidoc[] include::tsds.asciidoc[] +include::logs.asciidoc[] include::lifecycle/index.asciidoc[] diff --git a/docs/reference/data-streams/lifecycle/apis/put-lifecycle.asciidoc b/docs/reference/data-streams/lifecycle/apis/put-lifecycle.asciidoc index 6bd157071f54e..7d33a5b5f880c 100644 --- a/docs/reference/data-streams/lifecycle/apis/put-lifecycle.asciidoc +++ b/docs/reference/data-streams/lifecycle/apis/put-lifecycle.asciidoc @@ -54,7 +54,7 @@ duration the document could be deleted. When empty, every document in this data `enabled`:: (Optional, boolean) -If defined, it turns data streqm lifecycle on/off (`true`/`false`) for this data stream. +If defined, it turns data stream lifecycle on/off (`true`/`false`) for this data stream. A data stream lifecycle that's disabled (`enabled: false`) will have no effect on the data stream. Defaults to `true`. diff --git a/docs/reference/data-streams/logs.asciidoc b/docs/reference/data-streams/logs.asciidoc new file mode 100644 index 0000000000000..e870289bcf7be --- /dev/null +++ b/docs/reference/data-streams/logs.asciidoc @@ -0,0 +1,52 @@ +[[logs-data-stream]] +== Logs data stream + +preview::[Logs data streams and the logsdb index mode are in tech preview and may be changed or removed in the future. Don't use logs data streams or logsdb index mode in production.] + +A logs data stream is a data stream type that stores log data more efficiently. + +In benchmarks, log data stored in a logs data stream used ~2.5 times less disk space than a regular data +stream. The exact impact will vary depending on your data set. + +The following features are enabled in a logs data stream: + +* <>, which omits storing the `_source` field. When the document source is requested, it is synthesized from document fields upon retrieval. + +* Index sorting. This yields a lower storage footprint. By default indices are sorted by `host.name` and `@timestamp` fields at index time. + +* More space efficient compression for fields with <> enabled. + +[discrete] +[[how-to-use-logsds]] +=== Create a logs data stream + +To create a logs data stream, set your index template `index.mode` to `logsdb`: + +[source,console] +---- +PUT _index_template/my-index-template +{ + "index_patterns": ["logs-*"], + "data_stream": { }, + "template": { + "settings": { + "index.mode": "logsdb" <1> + } + }, + "priority": 101 <2> +} +---- +// TEST + +<1> The index mode setting. +<2> The index template priority. By default, Elasticsearch ships with an index template with a `logs-*-*` pattern with a priority of 100. You need to define a priority higher than 100 to ensure that this index template gets selected over the default index template for the `logs-*-*` pattern. See the <> for more information. + +After the index template is created, new indices that use the template will be configured as a logs data stream. You can start indexing data and <>. + +//// +[source,console] +---- +DELETE _index_template/my-index-template +---- +// TEST[continued] +//// diff --git a/docs/reference/data-streams/tsds.asciidoc b/docs/reference/data-streams/tsds.asciidoc index 460048d8ccbc9..de89fa1ca3f31 100644 --- a/docs/reference/data-streams/tsds.asciidoc +++ b/docs/reference/data-streams/tsds.asciidoc @@ -53,8 +53,9 @@ shard segments by `_tsid` and `@timestamp`. documents, the document `_id` is a hash of the document's dimensions and `@timestamp`. A TSDS doesn't support custom document `_id` values. + * A TSDS uses <>, and as a result is -subject to a number of <>. +subject to some <> and <> applied to the `_source` field. NOTE: A time series index can contain fields other than dimensions or metrics. diff --git a/docs/reference/esql/esql-across-clusters.asciidoc b/docs/reference/esql/esql-across-clusters.asciidoc index 6231b4f4f0a69..d4e36cd41e046 100644 --- a/docs/reference/esql/esql-across-clusters.asciidoc +++ b/docs/reference/esql/esql-across-clusters.asciidoc @@ -49,11 +49,6 @@ Refer to <> for prerequisi [[esql-ccs-security-model-api-key]] ===== API key authentication -[NOTE] -==== -`ENRICH` is *not supported* in this version when using {esql} with the API key based security model. -==== - The following information pertains to using {esql} across clusters with the <>. You'll need to follow the steps on that page for the *full setup instructions*. This page only contains additional information specific to {esql}. API key based cross-cluster search (CCS) enables more granular control over allowed actions between clusters. @@ -66,6 +61,7 @@ You will need to: Using {esql} with the API key based security model requires some additional permissions that may not be needed when using the traditional query DSL based search. The following example API call creates a role that can query remote indices using {esql} when using the API key based security model. +The final privilege, `remote_cluster`, is required to allow remote enrich operations. [source,console] ---- @@ -84,7 +80,17 @@ POST /_security/role/remote1 "privileges": [ "read","read_cross_cluster" ], <4> "clusters" : ["my_remote_cluster"] <5> } - ] + ], + "remote_cluster": [ <6> + { + "privileges": [ + "monitor_enrich" + ], + "clusters": [ + "my_remote_cluster" + ] + } + ] } ---- @@ -95,6 +101,7 @@ POST /_security/role/remote1 <5> The remote clusters to which these privileges apply. This remote cluster must be configured with a <> and connected to the remote cluster before the remote index can be queried. Verify connection using the <> API. +<6> Required to allow remote enrichment. Without this, the user cannot read from the `.enrich` indices on the remote cluster. The `remote_cluster` security privilege was introduced in version *8.15.0*. You will then need a user or API key with the permissions you created above. The following example API call creates a user with the `remote1` role. @@ -109,6 +116,11 @@ POST /_security/user/remote_user Remember that all cross-cluster requests from the local cluster are bound by the cross cluster API key’s privileges, which are controlled by the remote cluster's administrator. +[TIP] +==== +Cross cluster API keys created in versions prior to 8.15.0 will need to replaced or updated to add the new permissions required for {esql} with ENRICH. +==== + [discrete] [[ccq-remote-cluster-setup]] ==== Remote cluster setup @@ -169,9 +181,11 @@ clusters, aiming to minimize computation or inter-cluster data transfer. Ensurin the policy exists with consistent data on both the local cluster and the remote clusters is critical for ES|QL to produce a consistent query result. -[NOTE] +[TIP] ==== -Enrich across clusters is *not supported* in this version when using {esql} with the <>. +Enrich in {esql} across clusters using the API key based security model was introduced in version *8.15.0*. +Cross cluster API keys created in versions prior to 8.15.0 will need to replaced or updated to use the new required permissions. +Refer to the example in the <> section. ==== In the following example, the enrich with `hosts` policy can be executed on diff --git a/docs/reference/esql/esql-limitations.asciidoc b/docs/reference/esql/esql-limitations.asciidoc index 11e3fd7ae9883..8accc7550edbb 100644 --- a/docs/reference/esql/esql-limitations.asciidoc +++ b/docs/reference/esql/esql-limitations.asciidoc @@ -85,6 +85,11 @@ Some <> are not supported in all contexts: ** `cartesian_point` ** `cartesian_shape` +In addition, when <>, +it's possible for the same field to be mapped to multiple types. +These fields cannot be directly used in queries or returned in results, +unless they're <>. + [discrete] [[esql-_source-availability]] === _source availability diff --git a/docs/reference/esql/esql-multi-index.asciidoc b/docs/reference/esql/esql-multi-index.asciidoc new file mode 100644 index 0000000000000..25874a132d93d --- /dev/null +++ b/docs/reference/esql/esql-multi-index.asciidoc @@ -0,0 +1,175 @@ +[[esql-multi-index]] +=== Using {esql} to query multiple indices +++++ +Using {esql} to query multiple indices +++++ + +With {esql}, you can execute a single query across multiple indices, data streams, or aliases. +To do so, use wildcards and date arithmetic. The following example uses a comma-separated list and a wildcard: + +[source,esql] +---- +FROM employees-00001,other-employees-* +---- + +Use the format `:` to <>: + +[source,esql] +---- +FROM cluster_one:employees-00001,cluster_two:other-employees-* +---- + +[discrete] +[[esql-multi-index-invalid-mapping]] +=== Field type mismatches + +When querying multiple indices, data streams, or aliases, you might find that the same field is mapped to multiple different types. +For example, consider the two indices with the following field mappings: + +*index: events_ip* +``` +{ + "mappings": { + "properties": { + "@timestamp": { "type": "date" }, + "client_ip": { "type": "ip" }, + "event_duration": { "type": "long" }, + "message": { "type": "keyword" } + } + } +} +``` + +*index: events_keyword* +``` +{ + "mappings": { + "properties": { + "@timestamp": { "type": "date" }, + "client_ip": { "type": "keyword" }, + "event_duration": { "type": "long" }, + "message": { "type": "keyword" } + } + } +} +``` + +When you query each of these individually with a simple query like `FROM events_ip`, the results are provided with type-specific columns: + +[source.merge.styled,esql] +---- +FROM events_ip +| SORT @timestamp DESC +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +@timestamp:date | client_ip:ip | event_duration:long | message:keyword +2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +|=== + +Note how the `client_ip` column is correctly identified as type `ip`, and all values are displayed. +However, if instead the query sources two conflicting indices with `FROM events_*`, the type of the `client_ip` column cannot be determined +and is reported as `unsupported` with all values returned as `null`. + +[[query-unsupported]] +[source.merge.styled,esql] +---- +FROM events_* +| SORT @timestamp DESC +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +@timestamp:date | client_ip:unsupported | event_duration:long | message:keyword +2023-10-23T13:55:01.543Z | null | 1756467 | Connected to 10.1.0.1 +2023-10-23T13:53:55.832Z | null | 5033755 | Connection error +2023-10-23T13:52:55.015Z | null | 8268153 | Connection error +2023-10-23T13:51:54.732Z | null | 725448 | Connection error +2023-10-23T13:33:34.937Z | null | 1232382 | Disconnected +2023-10-23T12:27:28.948Z | null | 2764889 | Connected to 10.1.0.2 +2023-10-23T12:15:03.360Z | null | 3450233 | Connected to 10.1.0.3 +|=== + +In addition, if the query refers to this unsupported field directly, the query fails: + +[source.merge.styled,esql] +---- +FROM events_* +| SORT client_ip DESC +---- + +[source,bash] +---- +Cannot use field [client_ip] due to ambiguities being mapped as +[2] incompatible types: + [ip] in [events_ip], + [keyword] in [events_keyword] +---- + +[discrete] +[[esql-multi-index-union-types]] +=== Union types + +experimental::[] + +{esql} has a way to handle <>. When the same field is mapped to multiple types in multiple indices, +the type of the field is understood to be a _union_ of the various types in the index mappings. +As seen in the preceding examples, this _union type_ cannot be used in the results, +and cannot be referred to by the query -- except in `KEEP`, `DROP` or when it's passed to a type conversion function that accepts all the types in +the _union_ and converts the field to a single type. {esql} offers a suite of <> to achieve this. + +In the above examples, the query can use a command like `EVAL client_ip = TO_IP(client_ip)` to resolve +the union of `ip` and `keyword` to just `ip`. +You can also use the type-conversion syntax `EVAL client_ip = client_ip::IP`. +Alternatively, the query could use <> to convert all supported types into `KEYWORD`. + +For example, the <> that returned `client_ip:unsupported` with `null` values can be improved using the `TO_IP` function or the equivalent `field::ip` syntax. +These changes also resolve the error message. +As long as the only reference to the original field is to pass it to a conversion function that resolves the type ambiguity, no error results. + +[source.merge.styled,esql] +---- +FROM events_* +| EVAL client_ip = TO_IP(client_ip) +| KEEP @timestamp, client_ip, event_duration, message +| SORT @timestamp DESC +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +@timestamp:date | client_ip:ip | event_duration:long | message:keyword +2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +|=== + +[discrete] +[[esql-multi-index-index-metadata]] +=== Index metadata + +It can be helpful to know the particular index from which each row is sourced. +To get this information, use the <> option on the <> command. + +[source.merge.styled,esql] +---- +FROM events_* METADATA _index +| EVAL client_ip = TO_IP(client_ip) +| KEEP _index, @timestamp, client_ip, event_duration, message +| SORT @timestamp DESC +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +_index:keyword | @timestamp:date | client_ip:ip | event_duration:long | message:keyword +events_ip | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +events_ip | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +events_ip | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +events_keyword | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +events_keyword | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +events_keyword | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +events_keyword | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +|=== diff --git a/docs/reference/esql/esql-rest.asciidoc b/docs/reference/esql/esql-rest.asciidoc index 5b90e96d7a734..2c8c5e81e273d 100644 --- a/docs/reference/esql/esql-rest.asciidoc +++ b/docs/reference/esql/esql-rest.asciidoc @@ -278,6 +278,47 @@ POST /_query ---- // TEST[setup:library] +The parameters can be named parameters or positional parameters. + +Named parameters use question mark placeholders (`?`) followed by a string. + +[source,console] +---- +POST /_query +{ + "query": """ + FROM library + | EVAL year = DATE_EXTRACT("year", release_date) + | WHERE page_count > ?page_count AND author == ?author + | STATS count = COUNT(*) by year + | WHERE count > ?count + | LIMIT 5 + """, + "params": [{"page_count" : 300}, {"author" : "Frank Herbert"}, {"count" : 0}] +} +---- +// TEST[setup:library] + +Positional parameters use question mark placeholders (`?`) followed by an +integer. + +[source,console] +---- +POST /_query +{ + "query": """ + FROM library + | EVAL year = DATE_EXTRACT("year", release_date) + | WHERE page_count > ?1 AND author == ?2 + | STATS count = COUNT(*) by year + | WHERE count > ?3 + | LIMIT 5 + """, + "params": [300, "Frank Herbert", 0] +} +---- +// TEST[setup:library] + [discrete] [[esql-rest-async-query]] ==== Running an async {esql} query diff --git a/docs/reference/esql/esql-using.asciidoc b/docs/reference/esql/esql-using.asciidoc index 3e045163069ec..d2e18bf1b91a3 100644 --- a/docs/reference/esql/esql-using.asciidoc +++ b/docs/reference/esql/esql-using.asciidoc @@ -12,6 +12,9 @@ and set up alerts. Using {esql} in {elastic-sec} to investigate events in Timeline, create detection rules, and build {esql} queries using Elastic AI Assistant. +<>:: +Using {esql} to query multiple indexes and resolve field type mismatches. + <>:: Using {esql} to query across multiple clusters. @@ -21,5 +24,6 @@ Using the <> to list and cancel {esql} queries. include::esql-rest.asciidoc[] include::esql-kibana.asciidoc[] include::esql-security-solution.asciidoc[] +include::esql-multi-index.asciidoc[] include::esql-across-clusters.asciidoc[] include::task-management.asciidoc[] diff --git a/docs/reference/esql/processing-commands/dissect.asciidoc b/docs/reference/esql/processing-commands/dissect.asciidoc index c48b72af0de7e..3dca50c8aee5e 100644 --- a/docs/reference/esql/processing-commands/dissect.asciidoc +++ b/docs/reference/esql/processing-commands/dissect.asciidoc @@ -17,6 +17,8 @@ multiple values, `DISSECT` will process each value. `pattern`:: A <>. +If a field name conflicts with an existing column, the existing column is dropped. +If a field name is used more than once, only the rightmost duplicate creates a column. ``:: A string used as the separator between appended values, when using the <>. diff --git a/docs/reference/esql/processing-commands/enrich.asciidoc b/docs/reference/esql/processing-commands/enrich.asciidoc index 5470d81b2f40b..844cc2c62d1ed 100644 --- a/docs/reference/esql/processing-commands/enrich.asciidoc +++ b/docs/reference/esql/processing-commands/enrich.asciidoc @@ -28,11 +28,16 @@ name as the `match_field` defined in the <>. The enrich fields from the enrich index that are added to the result as new columns. If a column with the same name as the enrich field already exists, the existing column will be replaced by the new column. If not specified, each of -the enrich fields defined in the policy is added +the enrich fields defined in the policy is added. +A column with the same name as the enrich field will be dropped unless the +enrich field is renamed. `new_nameX`:: Enables you to change the name of the column that's added for each of the enrich fields. Defaults to the enrich field name. +If a column has the same name as the new name, it will be discarded. +If a name (new or original) occurs more than once, only the rightmost duplicate +creates a new column. *Description* diff --git a/docs/reference/esql/processing-commands/eval.asciidoc b/docs/reference/esql/processing-commands/eval.asciidoc index 9b34fca7ceeff..be69d775b2755 100644 --- a/docs/reference/esql/processing-commands/eval.asciidoc +++ b/docs/reference/esql/processing-commands/eval.asciidoc @@ -13,10 +13,12 @@ EVAL [column1 =] value1[, ..., [columnN =] valueN] `columnX`:: The column name. +If a column with the same name already exists, the existing column is dropped. +If a column name is used more than once, only the rightmost duplicate creates a column. `valueX`:: The value for the column. Can be a literal, an expression, or a -<>. +<>. Can use columns defined left of this one. *Description* diff --git a/docs/reference/esql/processing-commands/grok.asciidoc b/docs/reference/esql/processing-commands/grok.asciidoc index d5d58a9eaee12..58493a13359d2 100644 --- a/docs/reference/esql/processing-commands/grok.asciidoc +++ b/docs/reference/esql/processing-commands/grok.asciidoc @@ -17,6 +17,9 @@ multiple values, `GROK` will process each value. `pattern`:: A grok pattern. +If a field name conflicts with an existing column, the existing column is discarded. +If a field name is used more than once, a multi-valued column will be created with one value +per each occurrence of the field name. *Description* @@ -64,4 +67,16 @@ include::{esql-specs}/docs.csv-spec[tag=grokWithToDatetime] |=== include::{esql-specs}/docs.csv-spec[tag=grokWithToDatetime-result] |=== + +If a field name is used more than once, `GROK` creates a multi-valued +column: + +[source.merge.styled,esql] +---- +include::{esql-specs}/docs.csv-spec[tag=grokWithDuplicateFieldNames] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/docs.csv-spec[tag=grokWithDuplicateFieldNames-result] +|=== // end::examples[] diff --git a/docs/reference/esql/processing-commands/keep.asciidoc b/docs/reference/esql/processing-commands/keep.asciidoc index 57f32a68aec4c..a07afa64a756c 100644 --- a/docs/reference/esql/processing-commands/keep.asciidoc +++ b/docs/reference/esql/processing-commands/keep.asciidoc @@ -13,6 +13,8 @@ KEEP columns `columns`:: A comma-separated list of columns to keep. Supports wildcards. +See below for the behavior in case an existing column matches multiple +given wildcards or column names. *Description* @@ -26,7 +28,7 @@ Fields are added in the order they appear. If one field matches multiple express 2. Partial wildcard expressions (for example: `fieldNam*`) 3. Wildcard only (`*`) -If a field matches two expressions with the same precedence, the right-most expression wins. +If a field matches two expressions with the same precedence, the rightmost expression wins. Refer to the examples for illustrations of these precedence rules. diff --git a/docs/reference/esql/processing-commands/lookup.asciidoc b/docs/reference/esql/processing-commands/lookup.asciidoc index 1944d243968a8..142bcb93dc445 100644 --- a/docs/reference/esql/processing-commands/lookup.asciidoc +++ b/docs/reference/esql/processing-commands/lookup.asciidoc @@ -15,6 +15,7 @@ LOOKUP table ON match_field1[, match_field2, ...] `table`:: The name of the `table` provided in the request to match. +If the table's column names conflict with existing columns, the existing columns will be dropped. `match_field`:: The fields in the input to match against the table. diff --git a/docs/reference/esql/processing-commands/rename.asciidoc b/docs/reference/esql/processing-commands/rename.asciidoc index 773fe8b640f75..0f338ed6e15e8 100644 --- a/docs/reference/esql/processing-commands/rename.asciidoc +++ b/docs/reference/esql/processing-commands/rename.asciidoc @@ -15,7 +15,9 @@ RENAME old_name1 AS new_name1[, ..., old_nameN AS new_nameN] The name of a column you want to rename. `new_nameX`:: -The new name of the column. +The new name of the column. If it conflicts with an existing column name, +the existing column is dropped. If multiple columns are renamed to the same +name, all but the rightmost column with the same new name are dropped. *Description* diff --git a/docs/reference/esql/processing-commands/stats.asciidoc b/docs/reference/esql/processing-commands/stats.asciidoc index fe84c56bbfc19..db533866a0b1b 100644 --- a/docs/reference/esql/processing-commands/stats.asciidoc +++ b/docs/reference/esql/processing-commands/stats.asciidoc @@ -15,12 +15,15 @@ STATS [column1 =] expression1[, ..., [columnN =] expressionN] `columnX`:: The name by which the aggregated value is returned. If omitted, the name is equal to the corresponding expression (`expressionX`). +If multiple columns have the same name, all but the rightmost column with this +name will be ignored. `expressionX`:: An expression that computes an aggregated value. `grouping_expressionX`:: An expression that outputs the values to group by. +If its name coincides with one of the computed columns, that column will be ignored. NOTE: Individual `null` values are skipped when computing aggregations. diff --git a/docs/reference/esql/source-commands/from.asciidoc b/docs/reference/esql/source-commands/from.asciidoc index 9ab21e8996aa0..1abe7dcb2fa9b 100644 --- a/docs/reference/esql/source-commands/from.asciidoc +++ b/docs/reference/esql/source-commands/from.asciidoc @@ -58,24 +58,22 @@ today's index: FROM ---- -Use comma-separated lists or wildcards to query multiple data streams, indices, -or aliases: +Use comma-separated lists or wildcards to <>: [source,esql] ---- FROM employees-00001,other-employees-* ---- -Use the format `:` to query data streams and indices -on remote clusters: +Use the format `:` to <>: [source,esql] ---- FROM cluster_one:employees-00001,cluster_two:other-employees-* ---- -See <>. - Use the optional `METADATA` directive to enable <>: [source,esql] diff --git a/docs/reference/esql/source-commands/row.asciidoc b/docs/reference/esql/source-commands/row.asciidoc index adce844f365b8..d127080415f37 100644 --- a/docs/reference/esql/source-commands/row.asciidoc +++ b/docs/reference/esql/source-commands/row.asciidoc @@ -13,6 +13,7 @@ ROW column1 = value1[, ..., columnN = valueN] `columnX`:: The column name. +In case of duplicate column names, only the rightmost duplicate creates a column. `valueX`:: The value for the column. Can be a literal, an expression, or a diff --git a/docs/reference/features/apis/features-apis.asciidoc b/docs/reference/features/apis/features-apis.asciidoc index fe06471cff0df..83414b69fabf3 100644 --- a/docs/reference/features/apis/features-apis.asciidoc +++ b/docs/reference/features/apis/features-apis.asciidoc @@ -7,7 +7,7 @@ by Elasticsearch and Elasticsearch plugins. [discrete] === Features APIs * <> -* <> +* <> include::get-features-api.asciidoc[] include::reset-features-api.asciidoc[] diff --git a/docs/reference/geospatial-analysis.asciidoc b/docs/reference/geospatial-analysis.asciidoc index 7577bb222127f..6760040e14bc7 100644 --- a/docs/reference/geospatial-analysis.asciidoc +++ b/docs/reference/geospatial-analysis.asciidoc @@ -2,7 +2,7 @@ [[geospatial-analysis]] = Geospatial analysis -Did you know that {es} has geospatial capabilities? https://www.elastic.co/blog/geo-location-and-search[{es} and geo] go way back, to 2010. A lot has happened since then and today {es} provides robust geospatial capabilities with speed, all with a stack that scales automatically. +Did you know that {es} has geospatial capabilities? https://www.elastic.co/blog/geo-location-and-search[{es} and geo] go way back, to 2010. A lot has happened since then and today {es} provides robust geospatial capabilities with speed, all with a stack that scales automatically. Not sure where to get started with {es} and geo? Then, you have come to the right place. @@ -18,8 +18,10 @@ Have an index with lat/lon pairs but no geo_point mapping? Use <> lets you clean, transform, and augment your data before indexing. +Data is often messy and incomplete. <> lets you clean, transform, and augment your data before indexing. +* Use <> together with <> to index CSV files with geo data. + Kibana's {kibana-ref}/import-geospatial-data.html[Import CSV] feature can help with this. * Use <> to add geographical location of an IPv4 or IPv6 address. * Use <> to convert grid tiles or hexagonal cell ids to bounding boxes or polygons which describe their shape. * Use <> for reverse geocoding. For example, use {kibana-ref}/reverse-geocoding-tutorial.html[reverse geocoding] to visualize metropolitan areas by web traffic. @@ -30,6 +32,18 @@ Data is often messy and incomplete. <> lets you clean, <> answer location-driven questions. Find documents that intersect with, are within, are contained by, or do not intersect your query geometry. Combine geospatial queries with full text search queries for unparalleled searching experience. For example, "Show me all subscribers that live within 5 miles of our new gym location, that joined in the last year and have running mentioned in their profile". +[discrete] +[[esql-query]] +=== ES|QL + +<> has support for <> functions, enabling efficient index searching for documents that intersect with, are within, are contained by, or are disjoint from a query geometry. In addition, the `ST_DISTANCE` function calculates the distance between two points. + +* experimental:[] <> +* experimental:[] <> +* experimental:[] <> +* experimental:[] <> +* experimental:[] <> + [discrete] [[geospatial-aggregate]] === Aggregate @@ -42,12 +56,12 @@ Geospatial bucket aggregations: * <> groups geo_point and geo_shape values into buckets that represent a grid. * <> groups geo_point and geo_shape values into buckets that represent an H3 hexagonal cell. * <> groups geo_point and geo_shape values into buckets that represent a grid. Each cell corresponds to a {wikipedia}/Tiled_web_map[map tile] as used by many online map sites. - + Geospatial metric aggregations: * <> computes the geographic bounding box containing all values for a Geopoint or Geoshape field. * <> computes the weighted centroid from all coordinate values for geo fields. -* <> aggregates all geo_point values within a bucket into a LineString ordered by the chosen sort field. Use geo_line aggregation to create {kibana-ref}/asset-tracking-tutorial.html[vehicle tracks]. +* <> aggregates all geo_point values within a bucket into a LineString ordered by the chosen sort field. Use geo_line aggregation to create {kibana-ref}/asset-tracking-tutorial.html[vehicle tracks]. Combine aggregations to perform complex geospatial analysis. For example, to calculate the most recent GPS tracks per flight, use a <> to group documents into buckets per aircraft. Then use geo-line aggregation to compute a track for each aircraft. In another example, use geotile grid aggregation to group documents into a grid. Then use geo-centroid aggregation to find the weighted centroid of each grid cell. @@ -79,4 +93,4 @@ Put machine learning to work for you and find the data that should stand out wit Let your location data drive insights and action with {kibana-ref}/geo-alerting.html[geographic alerts]. Commonly referred to as geo-fencing, track moving objects as they enter or exit a boundary to receive notifications through common business systems (email, Slack, Teams, PagerDuty, and more). -Interested in learning more? Follow {kibana-ref}/asset-tracking-tutorial.html[step-by-step instructions] for setting up tracking containment alerts to monitor moving vehicles. \ No newline at end of file +Interested in learning more? Follow {kibana-ref}/asset-tracking-tutorial.html[step-by-step instructions] for setting up tracking containment alerts to monitor moving vehicles. diff --git a/docs/reference/how-to/size-your-shards.asciidoc b/docs/reference/how-to/size-your-shards.asciidoc index 56e5fbbf15c77..6baac25aa0532 100644 --- a/docs/reference/how-to/size-your-shards.asciidoc +++ b/docs/reference/how-to/size-your-shards.asciidoc @@ -152,9 +152,10 @@ same data. However, very large shards can also cause slower searches and will take longer to recover after a failure. There is no hard limit on the physical size of a shard, and each shard can in -theory contain up to just over two billion documents. However, experience shows -that shards between 10GB and 50GB typically work well for many use cases, as -long as the per-shard document count is kept below 200 million. +theory contain up to <>. However, experience shows that shards between 10GB and 50GB +typically work well for many use cases, as long as the per-shard document count +is kept below 200 million. You may be able to use larger shards depending on your network and use case, and smaller shards may be appropriate for @@ -184,6 +185,29 @@ index prirep shard store // TESTRESPONSE[s/\.ds-my-data-stream-2099\.05\.06-000001/my-index-000001/] // TESTRESPONSE[s/50gb/.*/] +If an index's shard is experiencing degraded performance from surpassing the +recommended 50GB size, you may consider fixing the index's shards' sizing. +Shards are immutable and therefore their size is fixed in place, +so indices must be copied with corrected settings. This requires first ensuring +sufficient disk to copy the data. Afterwards, you can copy the index's data +with corrected settings via one of the following options: + +* running <> to increase number of primary +shards + +* creating a destination index with corrected settings and then running +<> + +Kindly note performing a <> and/or +<> would be insufficient to resolve shards' +sizing. + +Once a source index's data is copied into its destination index, the source +index can be <>. You may then consider setting +<> against the destination index for the source +index's name to point to it for continuity. + + [discrete] [[shard-count-recommendation]] ==== Master-eligible nodes should have at least 1GB of heap per 3000 indices @@ -544,3 +568,42 @@ PUT _cluster/settings } } ---- + +[discrete] +[[troubleshooting-max-docs-limit]] +==== Number of documents in the shard cannot exceed [2147483519] + +Each {es} shard is a separate Lucene index, so it shares Lucene's +https://github.com/apache/lucene/issues/5176[`MAX_DOC` limit] of having at most +2,147,483,519 (`(2^31)-129`) documents. This per-shard limit applies to the sum +of `docs.count` plus `docs.deleted` as reported by the <>. Exceeding this limit will result in errors like the following: + +[source,txt] +---- +Elasticsearch exception [type=illegal_argument_exception, reason=Number of documents in the shard cannot exceed [2147483519]] +---- + +TIP: This calculation may differ from the <> +calculation, because the Count API does not include nested documents and does +not count deleted documents. + +This limit is much higher than the <> of approximately 200M documents per shard. + +If you encounter this problem, try to mitigate it by using the +<> to merge away some deleted docs. For +example: + +[source,console] +---- +POST my-index-000001/_forcemerge?only_expunge_deletes=true +---- +// TEST[setup:my_index] + +This will launch an asynchronous task which can be monitored via the +<>. + +It may also be helpful to <>, +or to <> or <> the index into +one with a larger number of shards. diff --git a/docs/reference/ilm/actions/ilm-delete.asciidoc b/docs/reference/ilm/actions/ilm-delete.asciidoc index eac3b9804709a..beed60105ed96 100644 --- a/docs/reference/ilm/actions/ilm-delete.asciidoc +++ b/docs/reference/ilm/actions/ilm-delete.asciidoc @@ -15,6 +15,18 @@ Deletes the searchable snapshot created in a previous phase. Defaults to `true`. This option is applicable when the <> action is used in any previous phase. ++ +If you set this option to `false`, use the <> to remove {search-snaps} from your snapshot repository when +they are no longer needed. ++ +If you manually delete an index before the {ilm-cap} delete phase runs, then +{ilm-init} will not delete the underlying {search-snap}. Use the +<> to remove the {search-snap} from +your snapshot repository when it is no longer needed. ++ +See <> for +further information about deleting {search-snaps}. WARNING: If a policy with a searchable snapshot action is applied on an existing searchable snapshot index, the snapshot backing this index will NOT be deleted because it was not created by this policy. If you want diff --git a/docs/reference/index-modules.asciidoc b/docs/reference/index-modules.asciidoc index 04bebfae2763b..24149afe802a2 100644 --- a/docs/reference/index-modules.asciidoc +++ b/docs/reference/index-modules.asciidoc @@ -81,8 +81,9 @@ breaking change]. If you are updating the compression type, the new one will be applied after segments are merged. Segment merging can be forced using <>. Experiments with indexing log datasets - have shown that `best_compression` gives up to ~18% lower storage usage - compared to `default` while only minimally affecting indexing throughput (~2%). + have shown that `best_compression` gives up to ~18% lower storage usage in + the most ideal scenario compared to `default` while only minimally affecting + indexing throughput (~2%). [[index-mode-setting]] `index.mode`:: + diff --git a/docs/reference/indices/forcemerge.asciidoc b/docs/reference/indices/forcemerge.asciidoc index 1d473acbd5d48..6eacaac5e7b2a 100644 --- a/docs/reference/indices/forcemerge.asciidoc +++ b/docs/reference/indices/forcemerge.asciidoc @@ -89,8 +89,9 @@ one at a time. If you expand the `force_merge` threadpool on a node then it will force merge its shards in parallel. Force merge makes the storage for the shard being merged temporarily -increase, up to double its size in case `max_num_segments` parameter is set to -`1`, as all segments need to be rewritten into a new one. +increase, as it may require free space up to triple its size in case +`max_num_segments` parameter is set to `1`, to rewrite all segments into a new +one. [[forcemerge-api-path-params]] ==== {api-path-parms-title} diff --git a/docs/reference/inference/delete-inference.asciidoc b/docs/reference/inference/delete-inference.asciidoc index 2f9d9511e6326..4df72ba672092 100644 --- a/docs/reference/inference/delete-inference.asciidoc +++ b/docs/reference/inference/delete-inference.asciidoc @@ -8,7 +8,7 @@ Deletes an {infer} endpoint. IMPORTANT: The {infer} APIs enable you to use certain services, such as built-in {ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI or -Hugging Face. For built-in models and models uploaded though Eland, the {infer} +Hugging Face. For built-in models and models uploaded through Eland, the {infer} APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the {infer} APIs to use these models or if you want to use non-NLP models, use the <>. diff --git a/docs/reference/inference/get-inference.asciidoc b/docs/reference/inference/get-inference.asciidoc index 7f4dc1c496837..c3fe841603bcc 100644 --- a/docs/reference/inference/get-inference.asciidoc +++ b/docs/reference/inference/get-inference.asciidoc @@ -8,7 +8,7 @@ Retrieves {infer} endpoint information. IMPORTANT: The {infer} APIs enable you to use certain services, such as built-in {ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI or -Hugging Face. For built-in models and models uploaded though Eland, the {infer} +Hugging Face. For built-in models and models uploaded through Eland, the {infer} APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the {infer} APIs to use these models or if you want to use non-NLP models, use the <>. diff --git a/docs/reference/inference/inference-apis.asciidoc b/docs/reference/inference/inference-apis.asciidoc index 896cb02a9e699..0d2b61ef067f9 100644 --- a/docs/reference/inference/inference-apis.asciidoc +++ b/docs/reference/inference/inference-apis.asciidoc @@ -6,7 +6,7 @@ experimental[] IMPORTANT: The {infer} APIs enable you to use certain services, such as built-in {ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio or -Hugging Face. For built-in models and models uploaded though Eland, the {infer} +Hugging Face. For built-in models and models uploaded through Eland, the {infer} APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the {infer} APIs to use these models or if you want to use non-NLP models, use the <>. @@ -25,6 +25,8 @@ include::delete-inference.asciidoc[] include::get-inference.asciidoc[] include::post-inference.asciidoc[] include::put-inference.asciidoc[] +include::service-amazon-bedrock.asciidoc[] +include::service-anthropic.asciidoc[] include::service-azure-ai-studio.asciidoc[] include::service-azure-openai.asciidoc[] include::service-cohere.asciidoc[] diff --git a/docs/reference/inference/post-inference.asciidoc b/docs/reference/inference/post-inference.asciidoc index 3ad23ac3300cc..52131c0b10776 100644 --- a/docs/reference/inference/post-inference.asciidoc +++ b/docs/reference/inference/post-inference.asciidoc @@ -8,7 +8,7 @@ Performs an inference task on an input text by using an {infer} endpoint. IMPORTANT: The {infer} APIs enable you to use certain services, such as built-in {ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI or -Hugging Face. For built-in models and models uploaded though Eland, the {infer} +Hugging Face. For built-in models and models uploaded through Eland, the {infer} APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the {infer} APIs to use these models or if you want to use non-NLP models, use the <>. diff --git a/docs/reference/inference/put-inference.asciidoc b/docs/reference/inference/put-inference.asciidoc index 101c0a24b66b7..57485e0720cca 100644 --- a/docs/reference/inference/put-inference.asciidoc +++ b/docs/reference/inference/put-inference.asciidoc @@ -6,10 +6,16 @@ experimental[] Creates an {infer} endpoint to perform an {infer} task. -IMPORTANT: The {infer} APIs enable you to use certain services, such as built-in -{ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI or Hugging Face. -For built-in models and models uploaded though Eland, the {infer} APIs offer an alternative way to use and manage trained models. -However, if you do not plan to use the {infer} APIs to use these models or if you want to use non-NLP models, use the <>. +[IMPORTANT] +==== +* The {infer} APIs enable you to use certain services, such as built-in +{ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, +Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic or Hugging Face. +* For built-in models and models uploaded through Eland, the {infer} APIs offer an +alternative way to use and manage trained models. However, if you do not plan to +use the {infer} APIs to use these models or if you want to use non-NLP models, +use the <>. +==== [discrete] @@ -25,7 +31,6 @@ However, if you do not plan to use the {infer} APIs to use these models or if yo * Requires the `manage_inference` <> (the built-in `inference_admin` role grants this privilege) - [discrete] [[put-inference-api-desc]] ==== {api-description-title} @@ -34,6 +39,8 @@ The create {infer} API enables you to create an {infer} endpoint and configure a The following services are available through the {infer} API, click the links to review the configuration details of the services: +* <> +* <> * <> * <> * <> @@ -44,3 +51,6 @@ The following services are available through the {infer} API, click the links to * <> * <> * <> + +The {es} and ELSER services run on a {ml} node in your {es} cluster. The rest of +the services connect to external providers. \ No newline at end of file diff --git a/docs/reference/inference/service-amazon-bedrock.asciidoc b/docs/reference/inference/service-amazon-bedrock.asciidoc new file mode 100644 index 0000000000000..4ffa368613a0e --- /dev/null +++ b/docs/reference/inference/service-amazon-bedrock.asciidoc @@ -0,0 +1,175 @@ +[[infer-service-amazon-bedrock]] +=== Amazon Bedrock {infer} service + +Creates an {infer} endpoint to perform an {infer} task with the `amazonbedrock` service. + +[discrete] +[[infer-service-amazon-bedrock-api-request]] +==== {api-request-title} + +`PUT /_inference//` + +[discrete] +[[infer-service-amazon-bedrock-api-path-params]] +==== {api-path-parms-title} + +``:: +(Required, string) +include::inference-shared.asciidoc[tag=inference-id] + +``:: +(Required, string) +include::inference-shared.asciidoc[tag=task-type] ++ +-- +Available task types: + +* `completion`, +* `text_embedding`. +-- + +[discrete] +[[infer-service-amazon-bedrock-api-request-body]] +==== {api-request-body-title} + +`service`:: +(Required, string) The type of service supported for the specified task type. +In this case, +`amazonbedrock`. + +`service_settings`:: +(Required, object) +include::inference-shared.asciidoc[tag=service-settings] ++ +-- +These settings are specific to the `amazonbedrock` service. +-- + +`access_key`::: +(Required, string) +A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests. + +`secret_key`::: +(Required, string) +A valid AWS secret key that is paired with the `access_key`. +To create or manage access and secret keys, see https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html[Managing access keys for IAM users] in the AWS documentation. + +IMPORTANT: You need to provide the access and secret keys only once, during the {infer} model creation. +The <> does not retrieve your access or secret keys. +After creating the {infer} model, you cannot change the associated key pairs. +If you want to use a different access and secret key pair, delete the {infer} model and recreate it with the same name and the updated keys. + +`provider`::: +(Required, string) +The model provider for your deployment. +Note that some providers may support only certain task types. +Supported providers include: + +* `amazontitan` - available for `text_embedding` and `completion` task types +* `anthropic` - available for `completion` task type only +* `ai21labs` - available for `completion` task type only +* `cohere` - available for `text_embedding` and `completion` task types +* `meta` - available for `completion` task type only +* `mistral` - available for `completion` task type only + +`model`::: +(Required, string) +The base model ID or an ARN to a custom model based on a foundational model. +The base model IDs can be found in the https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html[Amazon Bedrock model IDs] documentation. +Note that the model ID must be available for the provider chosen, and your IAM user must have access to the model. + +`region`::: +(Required, string) +The region that your model or ARN is deployed in. +The list of available regions per model can be found in the https://docs.aws.amazon.com/bedrock/latest/userguide/models-regions.html[Model support by AWS region] documentation. + +`rate_limit`::: +(Optional, object) +By default, the `amazonbedrock` service sets the number of requests allowed per minute to `240`. +This helps to minimize the number of rate limit errors returned from Amazon Bedrock. +To modify this, set the `requests_per_minute` setting of this object in your service settings: ++ +-- +include::inference-shared.asciidoc[tag=request-per-minute-example] +-- + +`task_settings`:: +(Optional, object) +include::inference-shared.asciidoc[tag=task-settings] ++ +.`task_settings` for the `completion` task type +[%collapsible%closed] +===== + +`max_new_tokens`::: +(Optional, integer) +Sets the maximum number for the output tokens to be generated. +Defaults to 64. + +`temperature`::: +(Optional, float) +A number between 0.0 and 1.0 that controls the apparent creativity of the results. At temperature 0.0 the model is most deterministic, at temperature 1.0 most random. +Should not be used if `top_p` or `top_k` is specified. + +`top_p`::: +(Optional, float) +Alternative to `temperature`. A number in the range of 0.0 to 1.0, to eliminate low-probability tokens. Top-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence. +Should not be used if `temperature` is specified. + +`top_k`::: +(Optional, float) +Only available for `anthropic`, `cohere`, and `mistral` providers. +Alternative to `temperature`. Limits samples to the top-K most likely words, balancing coherence and variability. +Should not be used if `temperature` is specified. + +===== ++ +.`task_settings` for the `text_embedding` task type +[%collapsible%closed] +===== + +There are no `task_settings` available for the `text_embedding` task type. + +===== + +[discrete] +[[inference-example-amazonbedrock]] +==== Amazon Bedrock service example + +The following example shows how to create an {infer} endpoint called `amazon_bedrock_embeddings` to perform a `text_embedding` task type. + +Choose chat completion and embeddings models that you have access to from the https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html[Amazon Bedrock base models]. + +[source,console] +------------------------------------------------------------ +PUT _inference/text_embedding/amazon_bedrock_embeddings +{ + "service": "amazonbedrock", + "service_settings": { + "access_key": "", + "secret_key": "", + "region": "us-east-1", + "provider": "amazontitan", + "model": "amazon.titan-embed-text-v2:0" + } +} +------------------------------------------------------------ +// TEST[skip:TBD] + +The next example shows how to create an {infer} endpoint called `amazon_bedrock_completion` to perform a `completion` task type. + +[source,console] +------------------------------------------------------------ +PUT _inference/completion/amazon_bedrock_completion +{ + "service": "amazonbedrock", + "service_settings": { + "access_key": "", + "secret_key": "", + "region": "us-east-1", + "provider": "amazontitan", + "model": "amazon.titan-text-premier-v1:0" + } +} +------------------------------------------------------------ +// TEST[skip:TBD] diff --git a/docs/reference/inference/service-anthropic.asciidoc b/docs/reference/inference/service-anthropic.asciidoc new file mode 100644 index 0000000000000..41419db7a6069 --- /dev/null +++ b/docs/reference/inference/service-anthropic.asciidoc @@ -0,0 +1,124 @@ +[[infer-service-anthropic]] +=== Anthropic {infer} service + +Creates an {infer} endpoint to perform an {infer} task with the `anthropic` service. + + +[discrete] +[[infer-service-anthropic-api-request]] +==== {api-request-title} + +`PUT /_inference//` + +[discrete] +[[infer-service-anthropic-api-path-params]] +==== {api-path-parms-title} + +``:: +(Required, string) +include::inference-shared.asciidoc[tag=inference-id] + +``:: +(Required, string) +include::inference-shared.asciidoc[tag=task-type] ++ +-- +Available task types: + +* `completion` +-- + +[discrete] +[[infer-service-anthropic-api-request-body]] +==== {api-request-body-title} + +`service`:: +(Required, string) +The type of service supported for the specified task type. In this case, +`anthropic`. + +`service_settings`:: +(Required, object) +include::inference-shared.asciidoc[tag=service-settings] ++ +-- +These settings are specific to the `anthropic` service. +-- + +`api_key`::: +(Required, string) +A valid API key for the Anthropic API. + +`model_id`::: +(Required, string) +The name of the model to use for the {infer} task. +You can find the supported models at https://docs.anthropic.com/en/docs/about-claude/models#model-names[Anthropic models]. + +`rate_limit`::: +(Optional, object) +By default, the `anthropic` service sets the number of requests allowed per minute to `50`. +This helps to minimize the number of rate limit errors returned from Anthropic. +To modify this, set the `requests_per_minute` setting of this object in your service settings: ++ +-- +include::inference-shared.asciidoc[tag=request-per-minute-example] +-- + +`task_settings`:: +(Required, object) +include::inference-shared.asciidoc[tag=task-settings] ++ +.`task_settings` for the `completion` task type +[%collapsible%closed] +===== +`max_tokens`::: +(Required, integer) +The maximum number of tokens to generate before stopping. + +`temperature`::: +(Optional, float) +The amount of randomness injected into the response. ++ +For more details about the supported range, see the https://docs.anthropic.com/en/api/messages[Anthropic messages API]. + +`top_k`::: +(Optional, integer) +Specifies to only sample from the top K options for each subsequent token. ++ +Recommended for advanced use cases only. You usually only need to use `temperature`. ++ +For more details, see the https://docs.anthropic.com/en/api/messages[Anthropic messages API]. + +`top_p`::: +(Optional, float) +Specifies to use Anthropic's nucleus sampling. ++ +In nucleus sampling, Anthropic computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cut it off once it reaches a particular probability specified by `top_p`. You should either alter `temperature` or `top_p`, but not both. ++ +Recommended for advanced use cases only. You usually only need to use `temperature`. ++ +For more details, see the https://docs.anthropic.com/en/api/messages[Anthropic messages API]. +===== + +[discrete] +[[inference-example-anthropic]] +==== Anthropic service example + +The following example shows how to create an {infer} endpoint called +`anthropic_completion` to perform a `completion` task type. + +[source,console] +------------------------------------------------------------ +PUT _inference/completion/anthropic_completion +{ + "service": "anthropic", + "service_settings": { + "api_key": "", + "model_id": "" + }, + "task_settings": { + "max_tokens": 1024 + } +} +------------------------------------------------------------ +// TEST[skip:TBD] diff --git a/docs/reference/inference/service-cohere.asciidoc b/docs/reference/inference/service-cohere.asciidoc index 52d71e0bc02a5..84eae6e880617 100644 --- a/docs/reference/inference/service-cohere.asciidoc +++ b/docs/reference/inference/service-cohere.asciidoc @@ -131,6 +131,7 @@ Specify whether to return doc text within the results. `top_n`:: (Optional, integer) The number of most relevant documents to return, defaults to the number of the documents. +If this {infer} endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query. ===== + .`task_settings` for the `text_embedding` task type diff --git a/docs/reference/inference/service-elasticsearch.asciidoc b/docs/reference/inference/service-elasticsearch.asciidoc index 3b9b5b1928d7b..6fb0b4a38d0ef 100644 --- a/docs/reference/inference/service-elasticsearch.asciidoc +++ b/docs/reference/inference/service-elasticsearch.asciidoc @@ -1,7 +1,12 @@ [[infer-service-elasticsearch]] === Elasticsearch {infer} service -Creates an {infer} endpoint to perform an {infer} task with the `elasticsearch` service. +Creates an {infer} endpoint to perform an {infer} task with the `elasticsearch` +service. + +NOTE: If you use the E5 model through the `elasticsearch` service, the API +request will automatically download and deploy the model if it isn't downloaded +yet. [discrete] @@ -35,7 +40,7 @@ Available task types: `service`:: (Required, string) -The type of service supported for the specified task type. In this case, +The type of service supported for the specified task type. In this case, `elasticsearch`. `service_settings`:: @@ -58,7 +63,7 @@ The total number of allocations this model is assigned across machine learning n `num_threads`::: (Required, integer) -Sets the number of threads used by each model allocation during inference. This generally increases the speed per inference request. The inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node. +Sets the number of threads used by each model allocation during inference. This generally increases the speed per inference request. The inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node. Must be a power of 2. Max allowed value is 32. `task_settings`:: @@ -81,6 +86,9 @@ Returns the document instead of only the index. Defaults to `true`. The following example shows how to create an {infer} endpoint called `my-e5-model` to perform a `text_embedding` task type. +The API request below will automatically download the E5 model if it isn't +already downloaded and then deploy the model. + [source,console] ------------------------------------------------------------ PUT _inference/text_embedding/my-e5-model @@ -98,6 +106,14 @@ PUT _inference/text_embedding/my-e5-model Valid values are `.multilingual-e5-small` and `.multilingual-e5-small_linux-x86_64`. For further details, refer to the {ml-docs}/ml-nlp-e5.html[E5 model documentation]. +[NOTE] +==== +You might see a 502 bad gateway error in the response when using the {kib} Console. +This error usually just reflects a timeout, while the model downloads in the background. +You can check the download progress in the {ml-app} UI. +If using the Python client, you can set the `timeout` parameter to a higher value. +==== + [discrete] [[inference-example-eland]] ==== Models uploaded by Eland via the elasticsearch service @@ -107,16 +123,17 @@ The following example shows how to create an {infer} endpoint called [source,console] ------------------------------------------------------------ -PUT _inference/text_embedding/my-msmarco-minilm-model +PUT _inference/text_embedding/my-msmarco-minilm-model <1> { "service": "elasticsearch", "service_settings": { "num_allocations": 1, "num_threads": 1, - "model_id": "msmarco-MiniLM-L12-cos-v5" <1> + "model_id": "msmarco-MiniLM-L12-cos-v5" <2> } } ------------------------------------------------------------ // TEST[skip:TBD] -<1> The `model_id` must be the ID of a text embedding model which has already been -{ml-docs}/ml-nlp-import-model.html#ml-nlp-import-script[uploaded through Eland]. \ No newline at end of file +<1> Provide an unique identifier for the inference endpoint. The `inference_id` must be unique and must not match the `model_id`. +<2> The `model_id` must be the ID of a text embedding model which has already been +{ml-docs}/ml-nlp-import-model.html#ml-nlp-import-script[uploaded through Eland]. diff --git a/docs/reference/inference/service-elser.asciidoc b/docs/reference/inference/service-elser.asciidoc index 829ff4968c5be..34c0f7d0a9c53 100644 --- a/docs/reference/inference/service-elser.asciidoc +++ b/docs/reference/inference/service-elser.asciidoc @@ -3,6 +3,9 @@ Creates an {infer} endpoint to perform an {infer} task with the `elser` service. +NOTE: The API request will automatically download and deploy the ELSER model if +it isn't already downloaded. + [discrete] [[infer-service-elser-api-request]] @@ -34,7 +37,7 @@ Available task types: `service`:: (Required, string) -The type of service supported for the specified task type. In this case, +The type of service supported for the specified task type. In this case, `elser`. `service_settings`:: @@ -51,7 +54,7 @@ The total number of allocations this model is assigned across machine learning n `num_threads`::: (Required, integer) -Sets the number of threads used by each model allocation during inference. This generally increases the speed per inference request. The inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node. +Sets the number of threads used by each model allocation during inference. This generally increases the speed per inference request. The inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node. Must be a power of 2. Max allowed value is 32. @@ -63,6 +66,9 @@ The following example shows how to create an {infer} endpoint called `my-elser-model` to perform a `sparse_embedding` task type. Refer to the {ml-docs}/ml-nlp-elser.html[ELSER model documentation] for more info. +The request below will automatically download the ELSER model if it isn't +already downloaded and then deploy the model. + [source,console] ------------------------------------------------------------ PUT _inference/sparse_embedding/my-elser-model @@ -92,4 +98,12 @@ Example response: "task_settings": {} } ------------------------------------------------------------ -// NOTCONSOLE \ No newline at end of file +// NOTCONSOLE + +[NOTE] +==== +You might see a 502 bad gateway error in the response when using the {kib} Console. +This error usually just reflects a timeout, while the model downloads in the background. +You can check the download progress in the {ml-app} UI. +If using the Python client, you can set the `timeout` parameter to a higher value. +==== diff --git a/docs/reference/ingest/apis/delete-geoip-database.asciidoc b/docs/reference/ingest/apis/delete-geoip-database.asciidoc new file mode 100644 index 0000000000000..957e59f0f0de4 --- /dev/null +++ b/docs/reference/ingest/apis/delete-geoip-database.asciidoc @@ -0,0 +1,55 @@ +[[delete-geoip-database-api]] +=== Delete geoip database configuration API +++++ +Delete geoip database configuration +++++ + +Deletes a geoip database configuration. + +[source,console] +---- +DELETE /_ingest/geoip/database/my-database-id +---- +// TEST[skip:we don't want to leak the enterprise-geoip-downloader task, which touching these APIs would cause. Therefore, skip this test.] + + +[[delete-geoip-database-api-request]] +==== {api-request-title} + +`DELETE /_ingest/geoip/database/` + +[[delete-geoip-database-api-prereqs]] +==== {api-prereq-title} + +* If the {es} {security-features} are enabled, you must have the +`manage` <> to use this API. + +[[delete-geoip-database-api-path-params]] +==== {api-path-parms-title} + +``:: ++ +-- +(Required, string) Database configuration ID used to limit the request. + +-- + + +[[delete-geoip-database-api-query-params]] +==== {api-query-parms-title} + +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms] + + +[[delete-geoip-database-api-example]] +==== {api-examples-title} + + +[[delete-geoip-database-api-specific-ex]] +===== Delete a specific geoip database configuration + +[source,console] +---- +DELETE /_ingest/geoip/database/example-database-id +---- +// TEST[skip:we don't want to leak the enterprise-geoip-downloader task, which touching these APIs would cause. Therefore, skip this test.] diff --git a/docs/reference/ingest/apis/delete-pipeline.asciidoc b/docs/reference/ingest/apis/delete-pipeline.asciidoc index 6f50251dbf1cd..94ac87c61b56b 100644 --- a/docs/reference/ingest/apis/delete-pipeline.asciidoc +++ b/docs/reference/ingest/apis/delete-pipeline.asciidoc @@ -62,7 +62,7 @@ use a value of `*`. include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms] -[[delete-pipeline-api-api-example]] +[[delete-pipeline-api-example]] ==== {api-examples-title} diff --git a/docs/reference/ingest/apis/geoip-stats-api.asciidoc b/docs/reference/ingest/apis/geoip-stats.asciidoc similarity index 100% rename from docs/reference/ingest/apis/geoip-stats-api.asciidoc rename to docs/reference/ingest/apis/geoip-stats.asciidoc diff --git a/docs/reference/ingest/apis/get-geoip-database.asciidoc b/docs/reference/ingest/apis/get-geoip-database.asciidoc new file mode 100644 index 0000000000000..f055e3e759db8 --- /dev/null +++ b/docs/reference/ingest/apis/get-geoip-database.asciidoc @@ -0,0 +1,80 @@ +[[get-geoip-database-api]] +=== Get geoip database configuration API +++++ +Get geoip database configuration +++++ + +Returns information about one or more geoip database configurations. + +[source,console] +---- +GET /_ingest/geoip/database/my-database-id +---- +// TEST[skip:we don't want to leak the enterprise-geoip-downloader task, which touching these APIs would cause. Therefore, skip this test.] + + + +[[get-geoip-database-api-request]] +==== {api-request-title} + +`GET /_ingest/geoip/database/` + +`GET /_ingest/geoip/database` + +[[get-geoip-database-api-prereqs]] +==== {api-prereq-title} + +* If the {es} {security-features} are enabled, you must have the + `manage` <> to use this API. + +[[get-geoip-database-api-path-params]] +==== {api-path-parms-title} + +``:: +(Optional, string) +Comma-separated list of database configuration IDs to retrieve. Wildcard (`*`) expressions are +supported. ++ +To get all database configurations, omit this parameter or use `*`. + + +[[get-geoip-database-api-query-params]] +==== {api-query-parms-title} + +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] + + +[[get-geoip-database-api-example]] +==== {api-examples-title} + + +[[get-geoip-database-api-specific-ex]] +===== Get information for a specific geoip database configuration + +[source,console] +---- +GET /_ingest/geoip/database/my-database-id +---- +// TEST[skip:we don't want to leak the enterprise-geoip-downloader task, which touching these APIs would cause. Therefore, skip this test.] + +The API returns the following response: + +[source,console-result] +---- +{ + "databases" : [ + { + "id" : "my-database-id", + "version" : 1, + "modified_date_millis" : 1723040276114, + "database" : { + "name" : "GeoIP2-Domain", + "maxmind" : { + "account_id" : "1234567" + } + } + } + ] +} +---- +// TEST[skip:we don't want to leak the enterprise-geoip-downloader task, which touching these APIs would cause. Therefore, skip this test.] diff --git a/docs/reference/ingest/apis/get-pipeline.asciidoc b/docs/reference/ingest/apis/get-pipeline.asciidoc index 71a261d97bdeb..f2a1155bca12b 100644 --- a/docs/reference/ingest/apis/get-pipeline.asciidoc +++ b/docs/reference/ingest/apis/get-pipeline.asciidoc @@ -65,7 +65,7 @@ To get all ingest pipelines, omit this parameter or use `*`. include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] -[[get-pipeline-api-api-example]] +[[get-pipeline-api-example]] ==== {api-examples-title} diff --git a/docs/reference/ingest/apis/index.asciidoc b/docs/reference/ingest/apis/index.asciidoc index 04fcd500a9721..e068f99ea0ad3 100644 --- a/docs/reference/ingest/apis/index.asciidoc +++ b/docs/reference/ingest/apis/index.asciidoc @@ -13,7 +13,7 @@ Use the following APIs to create, manage, and test ingest pipelines: * <> to create or update a pipeline * <> to retrieve a pipeline configuration * <> to delete a pipeline -* <> to test a pipeline +* <> and <> to test ingest pipelines [discrete] [[ingest-stat-apis]] @@ -21,12 +21,27 @@ Use the following APIs to create, manage, and test ingest pipelines: Use the following APIs to get statistics about ingest processing: -* <> to get download statistics for GeoIP2 databases used with +* <> to get download statistics for IP geolocation databases used with the <>. +[discrete] +[[ingest-geoip-database-apis]] +=== Ingest GeoIP Database APIs + +preview::["The commercial IP geolocation database download management APIs are in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but this feature is not subject to the support SLA of official GA features."] + +Use the following APIs to configure and manage commercial IP geolocation database downloads: + +* <> to create or update a database configuration +* <> to retrieve a database configuration +* <> to delete a database configuration + include::put-pipeline.asciidoc[] -include::delete-pipeline.asciidoc[] -include::geoip-stats-api.asciidoc[] include::get-pipeline.asciidoc[] +include::delete-pipeline.asciidoc[] include::simulate-pipeline.asciidoc[] include::simulate-ingest.asciidoc[] +include::geoip-stats.asciidoc[] +include::put-geoip-database.asciidoc[] +include::get-geoip-database.asciidoc[] +include::delete-geoip-database.asciidoc[] diff --git a/docs/reference/ingest/apis/put-geoip-database.asciidoc b/docs/reference/ingest/apis/put-geoip-database.asciidoc new file mode 100644 index 0000000000000..311c303002387 --- /dev/null +++ b/docs/reference/ingest/apis/put-geoip-database.asciidoc @@ -0,0 +1,72 @@ +[[put-geoip-database-api]] +=== Create or update geoip database configuration API +++++ +Create or update geoip database configuration +++++ + +Creates or updates an IP geolocation database configuration. + +IMPORTANT: The Maxmind `account_id` shown below requires a license key. Because the license key is sensitive information, +it is stored as a <> in {es} named `ingest.geoip.downloader.maxmind.license_key`. Only +one Maxmind license key is currently allowed per {es} cluster. A valid license key must be in the secure settings in order +to download from Maxmind. The license key setting does not take effect until all nodes are restarted. + +[source,console] +---- +PUT _ingest/geoip/database/my-database-id +{ + "name": "GeoIP2-Domain", + "maxmind": { + "account_id": "1025402" + } +} +---- +// TEST[skip:we don't want to leak the enterprise-geoip-downloader task, which touching these APIs would cause. Therefore, skip this test.] + +[[put-geoip-database-api-request]] +==== {api-request-title} + +`PUT /_ingest/geoip/database/` + +[[put-geoip-database-api-prereqs]] +==== {api-prereq-title} + +* If the {es} {security-features} are enabled, you must have the +`manage` <> to use this API. + + +[[put-geoip-database-api-path-params]] +==== {api-path-parms-title} + +``:: ++ +__ +(Required, string) ID of the database configuration to create or update. + +[[put-geoip-database-api-query-params]] +==== {api-query-parms-title} + +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms] + +[[put-geoip-database-api-request-body]] +==== {api-request-body-title} + +// tag::geoip-database-object[] +`name`:: +(Required, string) +The provider-assigned name of the IP geolocation database to download. + +``:: +(Required, a provider object and its associated configuration) +The configuration necessary to identify which IP geolocation provider to use to download +the database, as well as any provider-specific configuration necessary for such downloading. ++ +At present, the only supported provider is `maxmind`, and the maxmind provider +requires that an `account_id` (string) is configured. +// end::geoip-database-object[] + +[[geoip-database-configuration-licensing]] +==== Licensing + +Downloading databases from third party providers is a commercial feature that requires an +appropriate license. For more information, refer to https://www.elastic.co/subscriptions. diff --git a/docs/reference/ingest/processors/geoip.asciidoc b/docs/reference/ingest/processors/geoip.asciidoc index 738ac234d6162..3a9ba58dedbf0 100644 --- a/docs/reference/ingest/processors/geoip.asciidoc +++ b/docs/reference/ingest/processors/geoip.asciidoc @@ -24,6 +24,9 @@ stats API>>. If your cluster can't connect to the Elastic GeoIP endpoint or you want to manage your own updates, see <>. +If you would like to have {es} download database files directly from Maxmind using your own provided +license key, see <>. + If {es} can't connect to the endpoint for 30 days all updated databases will become invalid. {es} will stop enriching documents with geoip data and will add `tags: ["_geoip_expired_database"]` field instead. @@ -36,9 +39,9 @@ field instead. [options="header"] |====== | Name | Required | Default | Description -| `field` | yes | - | The field to get the ip address from for the geographical lookup. +| `field` | yes | - | The field to get the IP address from for the geographical lookup. | `target_field` | no | geoip | The field that will hold the geographical information looked up from the MaxMind database. -| `database_file` | no | GeoLite2-City.mmdb | The database filename referring to one of the automatically downloaded GeoLite2 databases (GeoLite2-City.mmdb, GeoLite2-Country.mmdb, or GeoLite2-ASN.mmdb) or the name of a supported database file in the `ingest-geoip` config directory. +| `database_file` | no | GeoLite2-City.mmdb | The database filename referring to one of the automatically downloaded GeoLite2 databases (GeoLite2-City.mmdb, GeoLite2-Country.mmdb, or GeoLite2-ASN.mmdb), or the name of a supported database file in the `ingest-geoip` config directory, or the name of a <> (with the `.mmdb` suffix appended). | `properties` | no | [`continent_name`, `country_iso_code`, `country_name`, `region_iso_code`, `region_name`, `city_name`, `location`] * | Controls what properties are added to the `target_field` based on the geoip lookup. | `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document | `first_only` | no | `true` | If `true` only first found geoip data will be returned, even if `field` contains array @@ -64,12 +67,12 @@ depend on what has been found and which properties were configured in `propertie * If the GeoIP2 Domain database is used, then the following fields may be added under the `target_field`: `ip`, and `domain`. The fields actually added depend on what has been found and which properties were configured in `properties`. * If the GeoIP2 ISP database is used, then the following fields may be added under the `target_field`: `ip`, `asn`, -`organization_name`, `network`, `isp`, `isp_organization`, `mobile_country_code`, and `mobile_network_code`. The fields actually added +`organization_name`, `network`, `isp`, `isp_organization_name`, `mobile_country_code`, and `mobile_network_code`. The fields actually added depend on what has been found and which properties were configured in `properties`. * If the GeoIP2 Enterprise database is used, then the following fields may be added under the `target_field`: `ip`, `country_iso_code`, `country_name`, `continent_code`, `continent_name`, `region_iso_code`, `region_name`, `city_name`, `timezone`, `location`, `asn`, `organization_name`, `network`, `hosting_provider`, `tor_exit_node`, `anonymous_vpn`, `anonymous`, `public_proxy`, -`residential_proxy`, `domain`, `isp`, `isp_organization`, `mobile_country_code`, `mobile_network_code`, `user_type`, and +`residential_proxy`, `domain`, `isp`, `isp_organization_name`, `mobile_country_code`, `mobile_network_code`, `user_type`, and `connection_type`. The fields actually added depend on what has been found and which properties were configured in `properties`. preview::["Do not use the GeoIP2 Anonymous IP, GeoIP2 Connection Type, GeoIP2 Domain, GeoIP2 ISP, and GeoIP2 Enterprise databases in production environments. This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] diff --git a/docs/reference/mapping/fields/source-field.asciidoc b/docs/reference/mapping/fields/source-field.asciidoc index ec824e421e015..903b301ab1a96 100644 --- a/docs/reference/mapping/fields/source-field.asciidoc +++ b/docs/reference/mapping/fields/source-field.asciidoc @@ -6,11 +6,11 @@ at index time. The `_source` field itself is not indexed (and thus is not searchable), but it is stored so that it can be returned when executing _fetch_ requests, like <> or <>. -If disk usage is important to you then have a look at -<> which shrinks disk usage at the cost of -only supporting a subset of mappings and slower fetches or (not recommended) -<> which also shrinks disk -usage but disables many features. +If disk usage is important to you, then consider the following options: + +- Using <>, which reconstructs source content at the time of retrieval instead of storing it on disk. This shrinks disk usage, at the cost of slower access to `_source` in <> and <> queries. +- <>. This shrinks disk +usage but disables features that rely on `_source`. include::synthetic-source.asciidoc[] @@ -43,7 +43,7 @@ available then a number of features are not supported: * The <>, <>, and <> APIs. -* In the {kib} link:{kibana-ref}/discover.html[Discover] application, field data will not be displayed. +* In the {kib} link:{kibana-ref}/discover.html[Discover] application, field data will not be displayed. * On the fly <>. diff --git a/docs/reference/mapping/fields/synthetic-source.asciidoc b/docs/reference/mapping/fields/synthetic-source.asciidoc index a0e7aed177a9c..ccea38cf602da 100644 --- a/docs/reference/mapping/fields/synthetic-source.asciidoc +++ b/docs/reference/mapping/fields/synthetic-source.asciidoc @@ -28,45 +28,22 @@ PUT idx While this on the fly reconstruction is *generally* slower than saving the source documents verbatim and loading them at query time, it saves a lot of storage -space. +space. Additional latency can be avoided by not loading `_source` field in queries when it is not needed. + +[[synthetic-source-fields]] +===== Supported fields +Synthetic `_source` is supported by all field types. Depending on implementation details, field types have different properties when used with synthetic `_source`. + +<> construct synthetic `_source` using existing data, most commonly <> and <>. For these field types, no additional space is needed to store the contents of `_source` field. Due to the storage layout of <>, the generated `_source` field undergoes <> compared to original document. + +For all other field types, the original value of the field is stored as is, in the same way as the `_source` field in non-synthetic mode. In this case there are no modifications and field data in `_source` is the same as in the original document. Similarly, malformed values of fields that use <> or <> need to be stored as is. This approach is less storage efficient since data needed for `_source` reconstruction is stored in addition to other data required to index the field (like `doc_values`). [[synthetic-source-restrictions]] ===== Synthetic `_source` restrictions -There are a couple of restrictions to be aware of: +Synthetic `_source` cannot be used together with field mappings that use <>. -* When you retrieve synthetic `_source` content it undergoes minor -<> compared to the original JSON. -* Synthetic `_source` can be used with indices that contain only these field -types: - -** <> -** {plugins}/mapper-annotated-text-usage.html#annotated-text-synthetic-source[`annotated-text`] -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> +Some field types have additional restrictions. These restrictions are documented in the **synthetic `_source`** section of the field type's <>. [[synthetic-source-modifications]] ===== Synthetic `_source` modifications @@ -178,4 +155,40 @@ that ordering. [[synthetic-source-modifications-ranges]] ====== Representation of ranges -Range field vales (e.g. `long_range`) are always represented as inclusive on both sides with bounds adjusted accordingly. See <>. +Range field values (e.g. `long_range`) are always represented as inclusive on both sides with bounds adjusted accordingly. See <>. + +[[synthetic-source-precision-loss-for-point-types]] +====== Reduced precision of `geo_point` values +Values of `geo_point` fields are represented in synthetic `_source` with reduced precision. See <>. + + +[[synthetic-source-fields-native-list]] +===== Field types that support synthetic source with no storage overhead +The following field types support synthetic source using data from <> or <>, and require no additional storage space to construct the `_source` field. + +NOTE: If you enable the <> or <> settings, then additional storage is required to store ignored field values for these types. + +** <> +** {plugins}/mapper-annotated-text-usage.html#annotated-text-synthetic-source[`annotated-text`] +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> diff --git a/docs/reference/mapping/params/copy-to.asciidoc b/docs/reference/mapping/params/copy-to.asciidoc index 10eebfb027736..b26ceac349a3e 100644 --- a/docs/reference/mapping/params/copy-to.asciidoc +++ b/docs/reference/mapping/params/copy-to.asciidoc @@ -64,16 +64,104 @@ Some important points: * It is the field _value_ which is copied, not the terms (which result from the analysis process). * The original <> field will not be modified to show the copied values. * The same value can be copied to multiple fields, with `"copy_to": [ "field_1", "field_2" ]` -* You cannot copy recursively via intermediary fields such as a `copy_to` on -`field_1` to `field_2` and `copy_to` on `field_2` to `field_3` expecting -indexing into `field_1` will eventuate in `field_3`, instead use copy_to -directly to multiple fields from the originating field. +* You cannot copy recursively using intermediary fields. +The following configuration will not copy data from `field_1` to `field_3`: ++ +[source,console] +---- +PUT bad_example_index +{ + "mappings": { + "properties": { + "field_1": { + "type": "text", + "copy_to": "field_2" + }, + "field_2": { + "type": "text", + "copy_to": "field_3" + }, + "field_3": { + "type": "text" + } + } + } +} +---- +Instead, copy to multiple fields from the source field: ++ +[source,console] +---- +PUT good_example_index +{ + "mappings": { + "properties": { + "field_1": { + "type": "text", + "copy_to": ["field_2", "field_3"] + }, + "field_2": { + "type": "text" + }, + "field_3": { + "type": "text" + } + } + } +} +---- + +NOTE: `copy_to` is not supported for field types where values take the form of objects, e.g. `date_range`. + +[float] +[[copy-to-dynamic-mapping]] +==== Dynamic mapping + +Consider the following points when using `copy_to` with dynamic mappings: + * If the target field does not exist in the index mappings, the usual <> behavior applies. By default, with <> set to `true`, a non-existent target field will be -dynamically added to the index mappings. If `dynamic` is set to `false`, the +dynamically added to the index mappings. +* If `dynamic` is set to `false`, the target field will not be added to the index mappings, and the value will not be -copied. If `dynamic` is set to `strict`, copying to a non-existent field will +copied. +* If `dynamic` is set to `strict`, copying to a non-existent field will result in an error. ++ +** If the target field is nested, then `copy_to` fields must specify the full path to the nested field. +Omitting the full path will lead to a `strict_dynamic_mapping_exception`. +Use `"copy_to": ["parent_field.child_field"]` to correctly target a nested field. ++ +For example: ++ +[source,console] +-------------------------------------------------- +PUT /test_index +{ + "mappings": { + "dynamic": "strict", + "properties": { + "description": { + "properties": { + "notes": { + "type": "text", + "copy_to": [ "description.notes_raw"], <1> + "analyzer": "standard", + "search_analyzer": "standard" + }, + "notes_raw": { + "type": "keyword" + } + } + } + } + } +} +-------------------------------------------------- -NOTE: `copy_to` is _not_ supported for field types where values take the form of objects, e.g. `date_range` \ No newline at end of file +<1> The `notes` field is copied to the `notes_raw` field. Targeting `notes_raw` alone instead of `description.notes_raw` +would lead to a `strict_dynamic_mapping_exception`. ++ +In this example, `notes_raw` is not defined at the root of the mapping, but under the `description` field. +Without the fully qualified path, {es} would interpret the `copy_to` target as a root-level field, not as a nested field under `description`. \ No newline at end of file diff --git a/docs/reference/mapping/types/rank-features.asciidoc b/docs/reference/mapping/types/rank-features.asciidoc index b54e99ede3fae..25d5278ca220d 100644 --- a/docs/reference/mapping/types/rank-features.asciidoc +++ b/docs/reference/mapping/types/rank-features.asciidoc @@ -70,6 +70,15 @@ GET my-index-000001/_search } } } + +GET my-index-000001/_search +{ + "query": { <6> + "term": { + "topics": "economics" + } + } +} -------------------------------------------------- <1> Rank features fields must use the `rank_features` field type @@ -77,6 +86,7 @@ GET my-index-000001/_search <3> Rank features fields must be a hash with string keys and strictly positive numeric values <4> This query ranks documents by how much they are about the "politics" topic. <5> This query ranks documents inversely to the number of "1star" reviews they received. +<6> This query returns documents that store the "economics" feature in the "topics" field. NOTE: `rank_features` fields only support single-valued features and strictly diff --git a/docs/reference/mapping/types/semantic-text.asciidoc b/docs/reference/mapping/types/semantic-text.asciidoc index 6ee30e6b9f831..76fd752d29e8c 100644 --- a/docs/reference/mapping/types/semantic-text.asciidoc +++ b/docs/reference/mapping/types/semantic-text.asciidoc @@ -52,8 +52,8 @@ Use the <> to create the endpoint. The `inference_id` will not be validated when the mapping is created, but when documents are ingested into the index. When the first document is indexed, the `inference_id` will be used to generate underlying indexing structures for the field. -WARNING: Removing an inference endpoint will cause ingestion of documents and semantic queries to fail on indices that define `semantic_text` fields with that inference endpoint as their `inference_id`. -Please check that inference endpoints are not used in `semantic_text` fields before removal. +WARNING: Removing an {infer} endpoint will cause ingestion of documents and semantic queries to fail on indices that define `semantic_text` fields with that {infer} endpoint as their `inference_id`. +Before removal, check if {infer} endpoints are used in `semantic_text` fields. [discrete] [[auto-text-chunking]] @@ -65,6 +65,9 @@ To allow for large amounts of text to be used in semantic search, `semantic_text Each chunk will include the text subpassage and the corresponding embedding generated from it. When querying, the individual passages will be automatically searched for each document, and the most relevant passage will be used to compute a score. +Documents are split into 250-word sections with a 100-word overlap so that each section shares 100 words with the previous section. +This overlap ensures continuity and prevents vital contextual information in the input text from being lost by a hard break. + [discrete] [[semantic-text-structure]] @@ -118,13 +121,19 @@ In case you want to customize data indexing, use the <> or <> field types and create an ingest pipeline with an <> to generate the embeddings. -<> walks you through the process. +<> walks you through the process. In +these cases - when you use `sparse_vector` or `dense_vector` field types instead +of the `semantic_text` field type to customize indexing - using the +<> is not supported for querying the +field data. + [discrete] [[update-script]] ==== Updates to `semantic_text` fields -Updates that use scripts are not supported when the index contains a `semantic_text` field. +Updates that use scripts are not supported for an index contains a `semantic_text` field. +Even if the script targets non-`semantic_text` fields, the update will fail when the index contains a `semantic_text` field. [discrete] diff --git a/docs/reference/migration/migrate_8_15.asciidoc b/docs/reference/migration/migrate_8_15.asciidoc index a183e68a50693..1961230da1bbf 100644 --- a/docs/reference/migration/migrate_8_15.asciidoc +++ b/docs/reference/migration/migrate_8_15.asciidoc @@ -16,5 +16,125 @@ coming::[8.15.0] [[breaking-changes-8.15]] === Breaking changes -There are no breaking changes in {es} 8.15. +The following changes in {es} 8.15 might affect your applications +and prevent them from operating normally. +Before upgrading to 8.15, review these changes and take the described steps +to mitigate the impact. + +[discrete] +[[breaking_815_cluster_and_node_setting_changes]] +==== Cluster and node setting changes + +[[change_skip_unavailable_remote_cluster_setting_default_value_to_true]] +.Change `skip_unavailable` remote cluster setting default value to true +[%collapsible] +==== +*Details* + +The default value of the `skip_unavailable` setting is now set to true. All existing and future remote clusters that do not define this setting will use the new default. This setting only affects cross-cluster searches using the _search or _async_search API. + +*Impact* + +Unavailable remote clusters in a cross-cluster search will no longer cause the search to fail unless skip_unavailable is configured to be `false` in elasticsearch.yml or via the `_cluster/settings` API. Unavailable clusters with `skip_unavailable`=`true` (either explicitly or by using the new default) are marked as SKIPPED in the search response metadata section and do not fail the entire search. If users want to ensure that a search returns a failure when a particular remote cluster is not available, `skip_unavailable` must be now be set explicitly. +==== + +[discrete] +[[breaking_815_rollup_changes]] +==== Rollup changes + +[[disallow_new_rollup_jobs_in_clusters_with_no_rollup_usage]] +.Disallow new rollup jobs in clusters with no rollup usage +[%collapsible] +==== +*Details* + +The put rollup API will fail with an error when a rollup job is created in a cluster with no rollup usage + +*Impact* + +Clusters with no rollup usage (either no rollup job or index) can not create new rollup jobs +==== + +[discrete] +[[breaking_815_rest_api_changes]] +==== REST API changes + +[[interpret_timeout_1_as_infinite_ack_timeout]] +.Interpret `?timeout=-1` as infinite ack timeout +[%collapsible] +==== +*Details* + +Today {es} accepts the parameter `?timeout=-1` in many APIs, but interprets +this to mean the same as `?timeout=0`. From 8.15 onwards `?timeout=-1` will +mean to wait indefinitely, aligning the behaviour of this parameter with +other similar parameters such as `?master_timeout`. + +*Impact* + +Use `?timeout=0` to force relevant operations to time out immediately +instead of `?timeout=-1` +==== + +[[replace_model_id_with_inference_id]] +.Replace `model_id` with `inference_id` in GET inference API +[%collapsible] +==== +*Details* + +From 8.15 onwards the <> response will return an +`inference_id` field instead of a `model_id`. + +*Impact* + +If your application uses the `model_id` in a GET inference API response, +switch it to use `inference_id` instead. +==== + + +[discrete] +[[deprecated-8.15]] +=== Deprecations + +The following functionality has been deprecated in {es} 8.15 +and will be removed in a future version. +While this won't have an immediate impact on your applications, +we strongly encourage you to take the described steps to update your code +after upgrading to 8.15. + +To find out if you are using any deprecated functionality, +enable <>. + +[discrete] +[[deprecations_815_cluster_and_node_setting]] +==== Cluster and node setting deprecations + +[[deprecate_absolute_size_values_for_indices_breaker_total_limit_setting]] +.Deprecate absolute size values for `indices.breaker.total.limit` setting +[%collapsible] +==== +*Details* + +Previously, the value of `indices.breaker.total.limit` could be specified as an absolute size in bytes. This setting controls the overal amount of memory the server is allowed to use before taking remedial actions. Setting this to a specific number of bytes led to strange behaviour when the node maximum heap size changed because the circut breaker limit would remain unchanged. This would either leave the value too low, causing part of the heap to remain unused; or it would leave the value too high, causing the circuit breaker to be ineffective at preventing OOM errors. The only reasonable behaviour for this setting is that it scales with the size of the heap, and so absolute byte limits are now deprecated. + +*Impact* + +Users must change their configuration to specify a percentage instead of an absolute number of bytes for `indices.breaker.total.limit`, or else accept the default, which is already specified as a percentage. +==== + +[discrete] +[[deprecations_815_rest_api]] +==== REST API deprecations + +[[deprecate_text_expansion_weighted_tokens_queries]] +.Deprecate `text_expansion` and `weighted_tokens` queries +[%collapsible] +==== +*Details* + +The `text_expansion` and `weighted_tokens` queries have been replaced by `sparse_vector`. + +*Impact* + +Please update your existing `text_expansion` and `weighted_tokens` queries to use `sparse_vector.` +==== + +[[deprecate_using_slm_privileges_to_access_ilm]] +.Deprecate using slm privileges to access ilm +[%collapsible] +==== +*Details* + +The `read_slm` privilege can get the ILM status, and the `manage_slm` privilege can start and stop ILM. Access to these APIs should be granted using the `read_ilm` and `manage_ilm` privileges instead. Access to ILM APIs will be removed from SLM privileges in a future major release, and is now deprecated. + +*Impact* + +Users that need access to the ILM status API should now use the `read_ilm` privilege. Users that need to start and stop ILM, should use the `manage_ilm` privilege. +==== diff --git a/docs/reference/ml/anomaly-detection/functions/ml-geo-functions.asciidoc b/docs/reference/ml/anomaly-detection/functions/ml-geo-functions.asciidoc index 5c061daa1cd44..63a0f047db647 100644 --- a/docs/reference/ml/anomaly-detection/functions/ml-geo-functions.asciidoc +++ b/docs/reference/ml/anomaly-detection/functions/ml-geo-functions.asciidoc @@ -52,6 +52,12 @@ detects anomalies where the geographic location of a credit card transaction is unusual for a particular customer’s credit card. An anomaly might indicate fraud. +A "typical" value indicates a centroid of a cluster of previously observed +locations that is closest to the "actual" location at that time. For example, +there may be one centroid near the person's home that is associated with the +cluster of local grocery stores and restaurants, and another centroid near the +person's work associated with the cluster of lunch and coffee places. + IMPORTANT: The `field_name` that you supply must be a single string that contains two comma-separated numbers of the form `latitude,longitude`, a `geo_point` field, a `geo_shape` field that contains point values, or a diff --git a/docs/reference/modules/cluster/misc.asciidoc b/docs/reference/modules/cluster/misc.asciidoc index 3da5df4f16414..75eaca88c66b1 100644 --- a/docs/reference/modules/cluster/misc.asciidoc +++ b/docs/reference/modules/cluster/misc.asciidoc @@ -11,12 +11,12 @@ An entire cluster may be set to read-only with the following setting: (<>) Make the whole cluster read only (indices do not accept write operations), metadata is not allowed to be modified (create or delete - indices). + indices). Defaults to `false`. `cluster.blocks.read_only_allow_delete`:: (<>) Identical to `cluster.blocks.read_only` but allows to delete indices - to free up resources. + to free up resources. Defaults to `false`. WARNING: Don't rely on this setting to prevent changes to your cluster. Any user with access to the <> diff --git a/docs/reference/modules/gateway.asciidoc b/docs/reference/modules/gateway.asciidoc index d6ee730d5021c..bf7e6de64f093 100644 --- a/docs/reference/modules/gateway.asciidoc +++ b/docs/reference/modules/gateway.asciidoc @@ -4,11 +4,11 @@ The local gateway stores the cluster state and shard data across full cluster restarts. -The following _static_ settings, which must be set on every master node, +The following _static_ settings, which must be set on every <>, control how long a freshly elected master should wait before it tries to -recover the cluster state and the cluster's data. +recover the <> and the cluster's data. -NOTE: These settings only take effect on a full cluster restart. +NOTE: These settings only take effect during a <>. `gateway.expected_data_nodes`:: (<>) diff --git a/docs/reference/modules/indices/search-settings.asciidoc b/docs/reference/modules/indices/search-settings.asciidoc index e43ec076578d4..003974815c4bd 100644 --- a/docs/reference/modules/indices/search-settings.asciidoc +++ b/docs/reference/modules/indices/search-settings.asciidoc @@ -33,6 +33,39 @@ a single response. Defaults to 65,536. + Requests that attempt to return more than this limit will return an error. +[[search-settings-only-allowed-scripts]] +`search.aggs.only_allowed_metric_scripts`:: +(<>, boolean) +Configures whether only explicitly allowed scripts can be used in +<>. +Defaults to `false`. ++ +Requests using scripts not contained in either +<> +or +<> +will return an error. + +[[search-settings-allowed-inline-scripts]] +`search.aggs.allowed_inline_metric_scripts`:: +(<>, list of strings) +List of inline scripts that can be used in scripted metrics aggregations when +<> +is set to `true`. +Defaults to an empty list. ++ +Requests using other inline scripts will return an error. + +[[search-settings-allowed-stored-scripts]] +`search.aggs.allowed_stored_metric_scripts`:: +(<>, list of strings) +List of ids of stored scripts that can be used in scripted metrics aggregations when +<> +is set to `true`. +Defaults to an empty list. ++ +Requests using other stored scripts will return an error. + [[indices-query-bool-max-nested-depth]] `indices.query.bool.max_nested_depth`:: (<>, integer) Maximum nested depth of queries. Defaults to `30`. diff --git a/docs/reference/query-dsl.asciidoc b/docs/reference/query-dsl.asciidoc index 4d5504e5fe7ae..2f8f07f21f648 100644 --- a/docs/reference/query-dsl.asciidoc +++ b/docs/reference/query-dsl.asciidoc @@ -72,14 +72,12 @@ include::query-dsl/match-all-query.asciidoc[] include::query-dsl/span-queries.asciidoc[] +include::query-dsl/vector-queries.asciidoc[] + include::query-dsl/special-queries.asciidoc[] include::query-dsl/term-level-queries.asciidoc[] -include::query-dsl/text-expansion-query.asciidoc[] - -include::query-dsl/sparse-vector-query.asciidoc[] - include::query-dsl/minimum-should-match.asciidoc[] include::query-dsl/multi-term-rewrite.asciidoc[] diff --git a/docs/reference/query-dsl/intervals-query.asciidoc b/docs/reference/query-dsl/intervals-query.asciidoc index 63ba4046a395d..1e3380389d861 100644 --- a/docs/reference/query-dsl/intervals-query.asciidoc +++ b/docs/reference/query-dsl/intervals-query.asciidoc @@ -397,68 +397,3 @@ This query does *not* match a document containing the phrase `hot porridge is salty porridge`, because the intervals returned by the match query for `hot porridge` only cover the initial two terms in this document, and these do not overlap the intervals covering `salty`. - -Another restriction to be aware of is the case of `any_of` rules that contain -sub-rules which overlap. In particular, if one of the rules is a strict -prefix of the other, then the longer rule can never match, which can -cause surprises when used in combination with `max_gaps`. Consider the -following query, searching for `the` immediately followed by `big` or `big bad`, -immediately followed by `wolf`: - -[source,console] --------------------------------------------------- -POST _search -{ - "query": { - "intervals" : { - "my_text" : { - "all_of" : { - "intervals" : [ - { "match" : { "query" : "the" } }, - { "any_of" : { - "intervals" : [ - { "match" : { "query" : "big" } }, - { "match" : { "query" : "big bad" } } - ] } }, - { "match" : { "query" : "wolf" } } - ], - "max_gaps" : 0, - "ordered" : true - } - } - } - } -} --------------------------------------------------- - -Counter-intuitively, this query does *not* match the document `the big bad -wolf`, because the `any_of` rule in the middle only produces intervals -for `big` - intervals for `big bad` being longer than those for `big`, while -starting at the same position, and so being minimized away. In these cases, -it's better to rewrite the query so that all of the options are explicitly -laid out at the top level: - -[source,console] --------------------------------------------------- -POST _search -{ - "query": { - "intervals" : { - "my_text" : { - "any_of" : { - "intervals" : [ - { "match" : { - "query" : "the big bad wolf", - "ordered" : true, - "max_gaps" : 0 } }, - { "match" : { - "query" : "the big wolf", - "ordered" : true, - "max_gaps" : 0 } } - ] - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/query-dsl/query-string-query.asciidoc b/docs/reference/query-dsl/query-string-query.asciidoc index 319ede7c4ac05..b45247ace3735 100644 --- a/docs/reference/query-dsl/query-string-query.asciidoc +++ b/docs/reference/query-dsl/query-string-query.asciidoc @@ -30,7 +30,7 @@ If you don't need to support a query syntax, consider using the syntax, use the <> query, which is less strict. ==== - + [[query-string-query-ex-request]] ==== Example request @@ -83,7 +83,7 @@ could be expensive. There is a limit on the number of fields times terms that can be queried at once. It is defined by the `indices.query.bool.max_clause_count` -<>, which defaults to 4096. +<>. ==== -- diff --git a/docs/reference/query-dsl/semantic-query.asciidoc b/docs/reference/query-dsl/semantic-query.asciidoc index d0eb2da95ebc6..22b5e6c5e6aad 100644 --- a/docs/reference/query-dsl/semantic-query.asciidoc +++ b/docs/reference/query-dsl/semantic-query.asciidoc @@ -128,6 +128,10 @@ If you want to fine-tune a search on a `semantic_text` field, you need to know t You can find the task type using the <>, and check the `task_type` associated with the {infer} service. Depending on the `task_type`, use either the <> or the <> query for greater flexibility and customization. +NOTE: While it is possible to use the `sparse_vector` query or the `knn` query +on a `semantic_text` field, it is not supported to use the `semantic_query` on a +`sparse_vector` or `dense_vector` field type. + [discrete] [[search-sparse-inference]] diff --git a/docs/reference/query-dsl/span-multi-term-query.asciidoc b/docs/reference/query-dsl/span-multi-term-query.asciidoc index aefb3e4b75eb5..5a5f0e1f5ff99 100644 --- a/docs/reference/query-dsl/span-multi-term-query.asciidoc +++ b/docs/reference/query-dsl/span-multi-term-query.asciidoc @@ -39,7 +39,8 @@ GET /_search -------------------------------------------------- WARNING: `span_multi` queries will hit too many clauses failure if the number of terms that match the query exceeds the -boolean query limit (defaults to 4096).To avoid an unbounded expansion you can set the <>. +To avoid an unbounded expansion you can set the <> of the multi term query to `top_terms_*` rewrite. Or, if you use `span_multi` on `prefix` query only, you can activate the <> field option of the `text` field instead. This will rewrite any prefix query on the field to a single term query that matches the indexed prefix. diff --git a/docs/reference/query-dsl/sparse-vector-query.asciidoc b/docs/reference/query-dsl/sparse-vector-query.asciidoc index 80616ff174e36..08dd7ab7f4470 100644 --- a/docs/reference/query-dsl/sparse-vector-query.asciidoc +++ b/docs/reference/query-dsl/sparse-vector-query.asciidoc @@ -1,5 +1,5 @@ [[query-dsl-sparse-vector-query]] -== Sparse vector query +=== Sparse vector query ++++ Sparse vector @@ -19,7 +19,7 @@ For example, a stored vector `{"feature_0": 0.12, "feature_1": 1.2, "feature_2": [discrete] [[sparse-vector-query-ex-request]] -=== Example request using an {nlp} model +==== Example request using an {nlp} model [source,console] ---- @@ -37,7 +37,7 @@ GET _search // TEST[skip: Requires inference] [discrete] -=== Example request using precomputed vectors +==== Example request using precomputed vectors [source,console] ---- @@ -55,7 +55,7 @@ GET _search [discrete] [[sparse-vector-field-params]] -=== Top level parameters for `sparse_vector` +==== Top level parameters for `sparse_vector` `field`:: (Required, string) The name of the field that contains the token-weight pairs to be searched against. @@ -120,7 +120,7 @@ NOTE: The default values for `tokens_freq_ratio_threshold` and `tokens_weight_th [discrete] [[sparse-vector-query-example]] -=== Example ELSER query +==== Example ELSER query The following is an example of the `sparse_vector` query that references the ELSER model to perform semantic search. For a more detailed description of how to perform semantic search by using ELSER and the `sparse_vector` query, refer to <>. @@ -241,7 +241,7 @@ GET my-index/_search [discrete] [[sparse-vector-query-with-pruning-config-and-rescore-example]] -=== Example ELSER query with pruning configuration and rescore +==== Example ELSER query with pruning configuration and rescore The following is an extension to the above example that adds a preview:[] pruning configuration to the `sparse_vector` query. The pruning configuration identifies non-significant tokens to prune from the query in order to improve query performance. diff --git a/docs/reference/query-dsl/special-queries.asciidoc b/docs/reference/query-dsl/special-queries.asciidoc index 90cd9a696a6d9..a6d35d4f9b707 100644 --- a/docs/reference/query-dsl/special-queries.asciidoc +++ b/docs/reference/query-dsl/special-queries.asciidoc @@ -17,10 +17,6 @@ or collection of documents. This query finds queries that are stored as documents that match with the specified document. -<>:: -A query that finds the _k_ nearest vectors to a query -vector, as measured by a similarity metric. - <>:: A query that computes scores based on the values of numeric features and is able to efficiently skip non-competitive hits. @@ -32,9 +28,6 @@ This query allows a script to act as a filter. Also see the <>:: A query that allows to modify the score of a sub-query with a script. -<>:: -A query that allows you to perform semantic search. - <>:: A query that accepts other queries as json or yaml string. @@ -50,20 +43,14 @@ include::mlt-query.asciidoc[] include::percolate-query.asciidoc[] -include::knn-query.asciidoc[] - include::rank-feature-query.asciidoc[] include::script-query.asciidoc[] include::script-score-query.asciidoc[] -include::semantic-query.asciidoc[] - include::wrapper-query.asciidoc[] include::pinned-query.asciidoc[] include::rule-query.asciidoc[] - -include::weighted-tokens-query.asciidoc[] diff --git a/docs/reference/query-dsl/text-expansion-query.asciidoc b/docs/reference/query-dsl/text-expansion-query.asciidoc index 1c51429b5aa22..8faecad1dbdb9 100644 --- a/docs/reference/query-dsl/text-expansion-query.asciidoc +++ b/docs/reference/query-dsl/text-expansion-query.asciidoc @@ -1,5 +1,5 @@ [[query-dsl-text-expansion-query]] -== Text expansion query +=== Text expansion query ++++ Text expansion @@ -12,7 +12,7 @@ The text expansion query uses a {nlp} model to convert the query text into a lis [discrete] [[text-expansion-query-ex-request]] -=== Example request +==== Example request [source,console] ---- @@ -32,14 +32,14 @@ GET _search [discrete] [[text-expansion-query-params]] -=== Top level parameters for `text_expansion` +==== Top level parameters for `text_expansion` ``::: (Required, object) The name of the field that contains the token-weight pairs the NLP model created based on the input text. [discrete] [[text-expansion-rank-feature-field-params]] -=== Top level parameters for `` +==== Top level parameters for `` `model_id`:::: (Required, string) The ID of the model to use to convert the query text into token-weight pairs. @@ -84,7 +84,7 @@ NOTE: The default values for `tokens_freq_ratio_threshold` and `tokens_weight_th [discrete] [[text-expansion-query-example]] -=== Example ELSER query +==== Example ELSER query The following is an example of the `text_expansion` query that references the ELSER model to perform semantic search. For a more detailed description of how to perform semantic search by using ELSER and the `text_expansion` query, refer to <>. @@ -208,7 +208,7 @@ GET my-index/_search [discrete] [[text-expansion-query-with-pruning-config-and-rescore-example]] -=== Example ELSER query with pruning configuration and rescore +==== Example ELSER query with pruning configuration and rescore The following is an extension to the above example that adds a preview:[] pruning configuration to the `text_expansion` query. The pruning configuration identifies non-significant tokens to prune from the query in order to improve query performance. diff --git a/docs/reference/query-dsl/vector-queries.asciidoc b/docs/reference/query-dsl/vector-queries.asciidoc new file mode 100644 index 0000000000000..fe9f380eeb621 --- /dev/null +++ b/docs/reference/query-dsl/vector-queries.asciidoc @@ -0,0 +1,37 @@ +[[vector-queries]] +== Vector queries + +Vector queries are specialized queries that work on vector fields to efficiently perform <>. + +<>:: +A query that finds the _k_ nearest vectors to a query vector for <> fields, as measured by a similarity metric. + +<>:: +A query used to search <> field types. + +<>:: +A query that allows you to perform semantic search on <> fields. + +[discrete] +=== Deprecated vector queries + +The following queries have been deprecated and will be removed in the near future. +Use the <> query instead. + +<>:: +A query that allows you to perform sparse vector search on <> or <> fields. + +<>:: +Allows to perform text expansion queries optimizing for performance. + +include::knn-query.asciidoc[] + +include::sparse-vector-query.asciidoc[] + +include::semantic-query.asciidoc[] + +include::text-expansion-query.asciidoc[] + +include::weighted-tokens-query.asciidoc[] + + diff --git a/docs/reference/query-rules/apis/index.asciidoc b/docs/reference/query-rules/apis/index.asciidoc index f7303647f8515..53d5fc3dc4eee 100644 --- a/docs/reference/query-rules/apis/index.asciidoc +++ b/docs/reference/query-rules/apis/index.asciidoc @@ -1,8 +1,6 @@ [[query-rules-apis]] == Query rules APIs -preview::[] - ++++ Query rules APIs ++++ diff --git a/docs/reference/query-rules/apis/put-query-rule.asciidoc b/docs/reference/query-rules/apis/put-query-rule.asciidoc index 2b9a6ba892b84..9737673be009c 100644 --- a/docs/reference/query-rules/apis/put-query-rule.asciidoc +++ b/docs/reference/query-rules/apis/put-query-rule.asciidoc @@ -70,10 +70,10 @@ Matches all queries, regardless of input. -- - `metadata` (Optional, string) The metadata field to match against. This metadata will be used to match against `match_criteria` sent in the <>. -Required for all criteria types except `global`. +Required for all criteria types except `always`. - `values` (Optional, array of strings) The values to match against the metadata field. Only one value must match for the criteria to be met. -Required for all criteria types except `global`. +Required for all criteria types except `always`. `actions`:: (Required, object) The actions to take when the rule is matched. diff --git a/docs/reference/query-rules/apis/put-query-ruleset.asciidoc b/docs/reference/query-rules/apis/put-query-ruleset.asciidoc index 012060e1004ae..c164e9e140a4e 100644 --- a/docs/reference/query-rules/apis/put-query-ruleset.asciidoc +++ b/docs/reference/query-rules/apis/put-query-ruleset.asciidoc @@ -78,10 +78,10 @@ Matches all queries, regardless of input. -- - `metadata` (Optional, string) The metadata field to match against. This metadata will be used to match against `match_criteria` sent in the <>. -Required for all criteria types except `global`. +Required for all criteria types except `always`. - `values` (Optional, array of strings) The values to match against the metadata field. Only one value must match for the criteria to be met. -Required for all criteria types except `global`. +Required for all criteria types except `always`. Actions depend on the rule type. For `pinned` rules, actions follow the format specified by the <>. diff --git a/docs/reference/release-notes.asciidoc b/docs/reference/release-notes.asciidoc index 2e043834c9969..65bd11a29b053 100644 --- a/docs/reference/release-notes.asciidoc +++ b/docs/reference/release-notes.asciidoc @@ -7,6 +7,8 @@ This section summarizes the changes in each release. * <> +* <> +* <> * <> * <> * <> @@ -69,6 +71,8 @@ This section summarizes the changes in each release. -- include::release-notes/8.15.0.asciidoc[] +include::release-notes/8.14.3.asciidoc[] +include::release-notes/8.14.2.asciidoc[] include::release-notes/8.14.1.asciidoc[] include::release-notes/8.14.0.asciidoc[] include::release-notes/8.13.4.asciidoc[] diff --git a/docs/reference/release-notes/8.12.0.asciidoc b/docs/reference/release-notes/8.12.0.asciidoc index 4c0fc50584b9f..bfa99401f41a2 100644 --- a/docs/reference/release-notes/8.12.0.asciidoc +++ b/docs/reference/release-notes/8.12.0.asciidoc @@ -14,6 +14,13 @@ there are deleted documents in the segments, quantiles may fail to build and pre This issue is fixed in 8.12.1. +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[breaking-8.12.0]] [float] === Breaking changes diff --git a/docs/reference/release-notes/8.12.1.asciidoc b/docs/reference/release-notes/8.12.1.asciidoc index 9aa9a11b3bf02..8ebe5cbac3852 100644 --- a/docs/reference/release-notes/8.12.1.asciidoc +++ b/docs/reference/release-notes/8.12.1.asciidoc @@ -3,6 +3,16 @@ Also see <>. +[[known-issues-8.12.1]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[bug-8.12.1]] [float] === Bug fixes diff --git a/docs/reference/release-notes/8.12.2.asciidoc b/docs/reference/release-notes/8.12.2.asciidoc index 2be8449b6c1df..44202ee8226eb 100644 --- a/docs/reference/release-notes/8.12.2.asciidoc +++ b/docs/reference/release-notes/8.12.2.asciidoc @@ -3,6 +3,16 @@ Also see <>. +[[known-issues-8.12.2]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[bug-8.12.2]] [float] === Bug fixes diff --git a/docs/reference/release-notes/8.13.0.asciidoc b/docs/reference/release-notes/8.13.0.asciidoc index dba4fdbe5f67e..65c77ff602e34 100644 --- a/docs/reference/release-notes/8.13.0.asciidoc +++ b/docs/reference/release-notes/8.13.0.asciidoc @@ -21,6 +21,13 @@ This affects clusters running version 8.10 or later, with an active downsampling https://www.elastic.co/guide/en/elasticsearch/reference/current/downsampling-ilm.html[configuration] or a configuration that was activated at some point since upgrading to version 8.10 or later. +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[breaking-8.13.0]] [float] === Breaking changes diff --git a/docs/reference/release-notes/8.13.1.asciidoc b/docs/reference/release-notes/8.13.1.asciidoc index 7b3dbff74cc6e..c95fb1e720651 100644 --- a/docs/reference/release-notes/8.13.1.asciidoc +++ b/docs/reference/release-notes/8.13.1.asciidoc @@ -3,6 +3,16 @@ Also see <>. +[[known-issues-8.13.1]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[bug-8.13.1]] [float] diff --git a/docs/reference/release-notes/8.13.2.asciidoc b/docs/reference/release-notes/8.13.2.asciidoc index 514118f5ea575..d4e2cc794b7e8 100644 --- a/docs/reference/release-notes/8.13.2.asciidoc +++ b/docs/reference/release-notes/8.13.2.asciidoc @@ -3,6 +3,16 @@ Also see <>. +[[known-issues-8.13.2]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[bug-8.13.2]] [float] diff --git a/docs/reference/release-notes/8.13.3.asciidoc b/docs/reference/release-notes/8.13.3.asciidoc index 9aee0dd815f6d..bbad07f36a31e 100644 --- a/docs/reference/release-notes/8.13.3.asciidoc +++ b/docs/reference/release-notes/8.13.3.asciidoc @@ -10,6 +10,16 @@ Also see <>. SQL:: * Limit how much space some string functions can use {es-pull}107333[#107333] +[[known-issues-8.13.3]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[bug-8.13.3]] [float] === Bug fixes diff --git a/docs/reference/release-notes/8.13.4.asciidoc b/docs/reference/release-notes/8.13.4.asciidoc index bf3f2f497d8fc..bb2fe5789d56f 100644 --- a/docs/reference/release-notes/8.13.4.asciidoc +++ b/docs/reference/release-notes/8.13.4.asciidoc @@ -3,6 +3,16 @@ Also see <>. +[[known-issues-8.13.4]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[bug-8.13.4]] [float] === Bug fixes diff --git a/docs/reference/release-notes/8.14.0.asciidoc b/docs/reference/release-notes/8.14.0.asciidoc index 42f2f86a123ed..034b1ce39be1b 100644 --- a/docs/reference/release-notes/8.14.0.asciidoc +++ b/docs/reference/release-notes/8.14.0.asciidoc @@ -12,6 +12,16 @@ Security:: * Apply stricter Document Level Security (DLS) rules for the validate query API with the rewrite parameter {es-pull}105709[#105709] * Apply stricter Document Level Security (DLS) rules for terms aggregations when min_doc_count is set to 0 {es-pull}105714[#105714] +[[known-issues-8.14.0]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[bug-8.14.0]] [float] === Bug fixes diff --git a/docs/reference/release-notes/8.14.1.asciidoc b/docs/reference/release-notes/8.14.1.asciidoc index f161c7d08099c..0b5f5b0a4e804 100644 --- a/docs/reference/release-notes/8.14.1.asciidoc +++ b/docs/reference/release-notes/8.14.1.asciidoc @@ -4,6 +4,16 @@ Also see <>. +[[known-issues-8.14.1]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[bug-8.14.1]] [float] === Bug fixes diff --git a/docs/reference/release-notes/8.14.2.asciidoc b/docs/reference/release-notes/8.14.2.asciidoc new file mode 100644 index 0000000000000..f52acf0b6a7e8 --- /dev/null +++ b/docs/reference/release-notes/8.14.2.asciidoc @@ -0,0 +1,48 @@ +[[release-notes-8.14.2]] +== {es} version 8.14.2 + +coming[8.14.2] + +Also see <>. + +[[known-issues-8.14.2]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + +[[bug-8.14.2]] +[float] +=== Bug fixes + +Data streams:: +* Ensure a lazy rollover request will rollover the target data stream once. {es-pull}109636[#109636] +* [Data streams] Fix the description of the lazy rollover task {es-pull}109629[#109629] + +ES|QL:: +* Fix ESQL cancellation for exchange requests {es-pull}109695[#109695] +* Fix equals and hashcode for `SingleValueQuery.LuceneQuery` {es-pull}110035[#110035] +* Force execute inactive sink reaper {es-pull}109632[#109632] + +Infra/Scripting:: +* Check array size before returning array item in script doc values {es-pull}109824[#109824] (issue: {es-issue}104998[#104998]) + +Infra/Settings:: +* Guard file settings readiness on file settings support {es-pull}109500[#109500] + +Machine Learning:: +* Fix IndexOutOfBoundsException during inference {es-pull}109533[#109533] + +Mapping:: +* Re-define `index.mapper.dynamic` setting in 8.x for a better 7.x to 8.x upgrade if this setting is used. {es-pull}109341[#109341] + +Ranking:: +* Fix for from parameter when using `sub_searches` and rank {es-pull}106253[#106253] (issue: {es-issue}99011[#99011]) + +Search:: +* Add hexstring support byte painless scorers {es-pull}109492[#109492] +* Fix automatic tracking of collapse with `docvalue_fields` {es-pull}110103[#110103] diff --git a/docs/reference/release-notes/8.14.3.asciidoc b/docs/reference/release-notes/8.14.3.asciidoc new file mode 100644 index 0000000000000..6b8895e39de3e --- /dev/null +++ b/docs/reference/release-notes/8.14.3.asciidoc @@ -0,0 +1,33 @@ +[[release-notes-8.14.3]] +== {es} version 8.14.3 + + +Also see <>. + +[[known-issues-8.14.3]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + +[[bug-8.14.3]] +[float] +=== Bug fixes + +Cluster Coordination:: +* Ensure tasks preserve versions in `MasterService` {es-pull}109850[#109850] + +ES|QL:: +* Introduce compute listener {es-pull}110400[#110400] + +Mapping:: +* Automatically adjust `ignore_malformed` only for the @timestamp {es-pull}109948[#109948] + +TSDB:: +* Disallow index.time_series.end_time setting from being set or updated in normal indices {es-pull}110268[#110268] (issue: {es-issue}110265[#110265]) + + diff --git a/docs/reference/release-notes/8.15.0.asciidoc b/docs/reference/release-notes/8.15.0.asciidoc index 97f4a51a1142f..40d5cf02954bf 100644 --- a/docs/reference/release-notes/8.15.0.asciidoc +++ b/docs/reference/release-notes/8.15.0.asciidoc @@ -1,8 +1,509 @@ [[release-notes-8.15.0]] == {es} version 8.15.0 -coming[8.15.0] - Also see <>. +[[breaking-8.15.0]] +[float] +=== Breaking changes + +Cluster Coordination:: +* Interpret `?timeout=-1` as infinite ack timeout {es-pull}107675[#107675] + +Inference API:: +* Replace `model_id` with `inference_id` in GET inference API {es-pull}111366[#111366] + +Rollup:: +* Disallow new rollup jobs in clusters with no rollup usage {es-pull}108624[#108624] (issue: {es-issue}108381[#108381]) + +Search:: +* Change `skip_unavailable` remote cluster setting default value to true {es-pull}105792[#105792] + +[[bug-8.15.0]] +[float] +=== Bug fixes + +Aggregations:: +* Don't sample calls to `ReduceContext#consumeBucketsAndMaybeBreak` ins `InternalDateHistogram` and `InternalHistogram` during reduction {es-pull}110186[#110186] +* Fix `ClassCastException` in Significant Terms {es-pull}108429[#108429] (issue: {es-issue}108427[#108427]) +* Run terms concurrently when cardinality is only lower than shard size {es-pull}110369[#110369] (issue: {es-issue}105505[#105505]) + +Allocation:: +* Fix misc trappy allocation API timeouts {es-pull}109241[#109241] +* Fix trappy timeout in allocation explain API {es-pull}109240[#109240] + +Analysis:: +* Correct positioning for unique token filter {es-pull}109395[#109395] + +Authentication:: +* Add comma before charset parameter in WWW-Authenticate response header {es-pull}110906[#110906] +* Avoid NPE if `users_roles` file does not exist {es-pull}109606[#109606] +* Improve security-crypto threadpool overflow handling {es-pull}111369[#111369] + +Authorization:: +* Fix trailing slash in `security.put_privileges` specification {es-pull}110177[#110177] +* Fixes cluster state-based role mappings not recovered from disk {es-pull}109167[#109167] +* Handle unmatching remote cluster wildcards properly for `IndicesRequest.SingleIndexNoWildcards` requests {es-pull}109185[#109185] + +Autoscaling:: +* Expose `?master_timeout` in autoscaling APIs {es-pull}108759[#108759] + +CRUD:: +* Update checkpoints after post-replication actions, even on failure {es-pull}109908[#109908] + +Cluster Coordination:: +* Deserialize publish requests on generic thread-pool {es-pull}108814[#108814] (issue: {es-issue}106352[#106352]) +* Fail cluster state API if blocked {es-pull}109618[#109618] (issue: {es-issue}107503[#107503]) +* Use `scheduleUnlessShuttingDown` in `LeaderChecker` {es-pull}108643[#108643] (issue: {es-issue}108642[#108642]) + +Data streams:: +* Apm-data: set concrete values for `metricset.interval` {es-pull}109043[#109043] +* Ecs@mappings: reduce scope for `ecs_geo_point` {es-pull}108349[#108349] (issue: {es-issue}108338[#108338]) +* Include component templates in retention validaiton {es-pull}109779[#109779] + +Distributed:: +* Associate restore snapshot task to parent mount task {es-pull}108705[#108705] (issue: {es-issue}105830[#105830]) +* Don't detect `PlainActionFuture` deadlock on concurrent complete {es-pull}110361[#110361] (issues: {es-issue}110181[#110181], {es-issue}110360[#110360]) +* Handle nullable `DocsStats` and `StoresStats` {es-pull}109196[#109196] + +Downsampling:: +* Support flattened fields and multi-fields as dimensions in downsampling {es-pull}110066[#110066] (issue: {es-issue}99297[#99297]) + +ES|QL:: +* ESQL: Change "substring" function to not return null on empty string {es-pull}109174[#109174] +* ESQL: Fix Join references {es-pull}109989[#109989] +* ESQL: Fix LOOKUP attribute shadowing {es-pull}109807[#109807] (issue: {es-issue}109392[#109392]) +* ESQL: Fix Max doubles bug with negatives and add tests for Max and Min {es-pull}110586[#110586] +* ESQL: Fix `IpPrefix` function not handling correctly `ByteRefs` {es-pull}109205[#109205] (issue: {es-issue}109198[#109198]) +* ESQL: Fix equals `hashCode` for functions {es-pull}107947[#107947] (issue: {es-issue}104393[#104393]) +* ESQL: Fix variable shadowing when pushing down past Project {es-pull}108360[#108360] (issue: {es-issue}108008[#108008]) +* ESQL: Validate unique plan attribute names {es-pull}110488[#110488] (issue: {es-issue}110541[#110541]) +* ESQL: change from quoting from backtick to quote {es-pull}108395[#108395] +* ESQL: make named params objects truly per request {es-pull}110046[#110046] (issue: {es-issue}110028[#110028]) +* ES|QL: Fix DISSECT that overwrites input {es-pull}110201[#110201] (issue: {es-issue}110184[#110184]) +* ES|QL: limit query depth to 500 levels {es-pull}108089[#108089] (issue: {es-issue}107752[#107752]) +* ES|QL: reduce max expression depth to 400 {es-pull}111186[#111186] (issue: {es-issue}109846[#109846]) +* Fix ST_DISTANCE Lucene push-down for complex predicates {es-pull}110391[#110391] (issue: {es-issue}110349[#110349]) +* Fix `ClassCastException` with MV_EXPAND on missing field {es-pull}110096[#110096] (issue: {es-issue}109974[#109974]) +* Fix bug in union-types with type-casting in grouping key of STATS {es-pull}110476[#110476] (issues: {es-issue}109922[#109922], {es-issue}110477[#110477]) +* Fix for union-types for multiple columns with the same name {es-pull}110793[#110793] (issues: {es-issue}110490[#110490], {es-issue}109916[#109916]) +* [ESQL] Count_distinct(_source) should return a 400 {es-pull}110824[#110824] +* [ESQL] Fix parsing of large magnitude negative numbers {es-pull}110665[#110665] (issue: {es-issue}104323[#104323]) +* [ESQL] Migrate `SimplifyComparisonArithmetics` optimization {es-pull}109256[#109256] (issues: {es-issue}108388[#108388], {es-issue}108743[#108743]) + +Engine:: +* Async close of `IndexShard` {es-pull}108145[#108145] + +Highlighting:: +* Fix issue with returning incomplete fragment for plain highlighter {es-pull}110707[#110707] + +ILM+SLM:: +* Allow `read_slm` to call GET /_slm/status {es-pull}108333[#108333] + +Indices APIs:: +* Create a new `NodeRequest` for every `NodesDataTiersUsageTransport` use {es-pull}108379[#108379] + +Infra/Core:: +* Add a cluster listener to fix missing node features after upgrading from a version prior to 8.13 {es-pull}110710[#110710] (issue: {es-issue}109254[#109254]) +* Add bounds checking to parsing ISO8601 timezone offset values {es-pull}108672[#108672] +* Fix native preallocate to actually run {es-pull}110851[#110851] +* Ignore additional cpu.stat fields {es-pull}108019[#108019] (issue: {es-issue}107983[#107983]) +* Specify parse index when error occurs on multiple datetime parses {es-pull}108607[#108607] + +Infra/Metrics:: +* Provide document size reporter with `MapperService` {es-pull}109794[#109794] + +Infra/Node Lifecycle:: +* Expose `?master_timeout` on get-shutdown API {es-pull}108886[#108886] +* Fix serialization of put-shutdown request {es-pull}107862[#107862] (issue: {es-issue}107857[#107857]) +* Support wait indefinitely for search tasks to complete on node shutdown {es-pull}107426[#107426] + +Infra/REST API:: +* Add some missing timeout params to REST API specs {es-pull}108761[#108761] +* Consider `error_trace` supported by all endpoints {es-pull}109613[#109613] (issue: {es-issue}109612[#109612]) + +Ingest Node:: +* Fix Dissect with leading non-ascii characters {es-pull}111184[#111184] +* Fix enrich policy runner exception handling on empty segments response {es-pull}111290[#111290] +* GeoIP tasks should wait longer for master {es-pull}108410[#108410] +* Removing the use of Stream::peek from `GeoIpDownloader::cleanDatabases` {es-pull}110666[#110666] +* Simulate should succeed if `ignore_missing_pipeline` {es-pull}108106[#108106] (issue: {es-issue}107314[#107314]) + +Machine Learning:: +* Allow deletion of the ELSER inference service when reference in ingest {es-pull}108146[#108146] +* Avoid `InferenceRunner` deadlock {es-pull}109551[#109551] +* Correctly handle duplicate model ids for the `_cat` trained models api and usage statistics {es-pull}109126[#109126] +* Do not use global ordinals strategy if the leaf reader context cannot be obtained {es-pull}108459[#108459] +* Fix NPE in trained model assignment updater {es-pull}108942[#108942] +* Fix serialising inference delete response {es-pull}109384[#109384] +* Fix "stack use after scope" memory error {ml-pull}2673[#2673] +* Fix trailing slash in `ml.get_categories` specification {es-pull}110146[#110146] +* Handle any exception thrown by inference {ml-pull}2680[#2680] +* Increase response size limit for batched requests {es-pull}110112[#110112] +* Offload request to generic threadpool {es-pull}109104[#109104] (issue: {es-issue}109100[#109100]) +* Propagate accurate deployment timeout {es-pull}109534[#109534] (issue: {es-issue}109407[#109407]) +* Refactor TextEmbeddingResults to use primitives rather than objects {es-pull}108161[#108161] +* Require question to be non-null in `QuestionAnsweringConfig` {es-pull}107972[#107972] +* Start Trained Model Deployment API request query params now override body params {es-pull}109487[#109487] +* Suppress deprecation warnings from ingest pipelines when deleting trained model {es-pull}108679[#108679] (issue: {es-issue}105004[#105004]) +* Use default translog durability on AD results index {es-pull}108999[#108999] +* Use the multi node routing action for internal inference services {es-pull}109358[#109358] +* [Inference API] Extract optional long instead of integer in `RateLimitSettings#of` {es-pull}108602[#108602] +* [Inference API] Fix serialization for inference delete endpoint response {es-pull}110431[#110431] +* [Inference API] Replace `model_id` with `inference_id` in inference API except when stored {es-pull}111366[#111366] + +Mapping:: +* Fix off by one error when handling null values in range fields {es-pull}107977[#107977] (issue: {es-issue}107282[#107282]) +* Limit number of synonym rules that can be created {es-pull}109981[#109981] (issue: {es-issue}108785[#108785]) +* Propagate mapper builder context flags across nested mapper builder context creation {es-pull}109963[#109963] +* `DenseVectorFieldMapper` fixed typo {es-pull}108065[#108065] + +Network:: +* Use proper executor for failing requests when connection closes {es-pull}109236[#109236] (issue: {es-issue}109225[#109225]) +* `NoSuchRemoteClusterException` should not be thrown when a remote is configured {es-pull}107435[#107435] (issue: {es-issue}107381[#107381]) + +Packaging:: +* Adding override for lintian false positive on `libvec.so` {es-pull}108521[#108521] (issue: {es-issue}108514[#108514]) + +Ranking:: +* Fix score count validation in reranker response {es-pull}111424[#111424] (issue: {es-issue}111202[#111202]) + +Rollup:: +* Fix trailing slash in two rollup specifications {es-pull}110176[#110176] + +Search:: +* Adding score from `RankDoc` to `SearchHit` {es-pull}108870[#108870] +* Better handling of multiple rescorers clauses with LTR {es-pull}109071[#109071] +* Correct query profiling for conjunctions {es-pull}108122[#108122] (issue: {es-issue}108116[#108116]) +* Fix `DecayFunctions'` `toString` {es-pull}107415[#107415] (issue: {es-issue}100870[#100870]) +* Fix leak in collapsing search results {es-pull}110927[#110927] +* Fork freeing search/scroll contexts to GENERIC pool {es-pull}109481[#109481] + +Security:: +* Add permission to secure access to certain config files {es-pull}107827[#107827] +* Add permission to secure access to certain config files specified by settings {es-pull}108895[#108895] +* Fix trappy timeouts in security settings APIs {es-pull}109233[#109233] + +Snapshot/Restore:: +* Stricter failure handling in multi-repo get-snapshots request handling {es-pull}107191[#107191] + +TSDB:: +* Sort time series indices by time range in `GetDataStreams` API {es-pull}107967[#107967] (issue: {es-issue}102088[#102088]) + +Transform:: +* Always pick the user `maxPageSize` value {es-pull}109876[#109876] (issue: {es-issue}109844[#109844]) +* Exit gracefully when deleted {es-pull}107917[#107917] (issue: {es-issue}107266[#107266]) +* Fix NPE during destination index creation {es-pull}108891[#108891] (issue: {es-issue}108890[#108890]) +* Forward `indexServiceSafe` exception to listener {es-pull}108517[#108517] (issue: {es-issue}108418[#108418]) +* Halt Indexer on Stop/Abort API {es-pull}107792[#107792] +* Handle `IndexNotFoundException` {es-pull}108394[#108394] (issue: {es-issue}107263[#107263]) +* Prevent concurrent jobs during cleanup {es-pull}109047[#109047] +* Redirect `VersionConflict` to reset code {es-pull}108070[#108070] +* Reset max page size to settings value {es-pull}109449[#109449] (issue: {es-issue}109308[#109308]) + +Vector Search:: +* Ensure vector similarity correctly limits `inner_hits` returned for nested kNN {es-pull}111363[#111363] (issue: {es-issue}111093[#111093]) +* Ensure we return non-negative scores when scoring scalar dot-products {es-pull}108522[#108522] + +Watcher:: +* Avoiding running watch jobs in TickerScheduleTriggerEngine if it is paused {es-pull}110061[#110061] (issue: {es-issue}105933[#105933]) + +[[deprecation-8.15.0]] +[float] +=== Deprecations + +ILM+SLM:: +* Deprecate using slm privileges to access ilm {es-pull}110540[#110540] + +Infra/Settings:: +* `ParseHeapRatioOrDeprecatedByteSizeValue` for `indices.breaker.total.limit` {es-pull}110236[#110236] + +Machine Learning:: +* Deprecate `text_expansion` and `weighted_tokens` queries {es-pull}109880[#109880] + +[[enhancement-8.15.0]] +[float] +=== Enhancements + +Aggregations:: +* Aggs: Scripted metric allow list {es-pull}109444[#109444] +* Enable inter-segment concurrency for low cardinality numeric terms aggs {es-pull}108306[#108306] +* Increase size of big arrays only when there is an actual value in the aggregators {es-pull}107764[#107764] +* Increase size of big arrays only when there is an actual value in the aggregators (Analytics module) {es-pull}107813[#107813] +* Optimise `BinaryRangeAggregator` for single value fields {es-pull}108016[#108016] +* Optimise cardinality aggregations for single value fields {es-pull}107892[#107892] +* Optimise composite aggregations for single value fields {es-pull}107897[#107897] +* Optimise few metric aggregations for single value fields {es-pull}107832[#107832] +* Optimise histogram aggregations for single value fields {es-pull}107893[#107893] +* Optimise multiterms aggregation for single value fields {es-pull}107937[#107937] +* Optimise terms aggregations for single value fields {es-pull}107930[#107930] +* Speed up collecting zero document string terms {es-pull}110922[#110922] + +Allocation:: +* Log shard movements {es-pull}105829[#105829] +* Support effective watermark thresholds in node stats API {es-pull}107244[#107244] (issue: {es-issue}106676[#106676]) + +Application:: +* Add Create or update query rule API call {es-pull}109042[#109042] +* Rename rule query and add support for multiple rulesets {es-pull}108831[#108831] +* Support multiple associated groups for TopN {es-pull}108409[#108409] (issue: {es-issue}108018[#108018]) +* [Connector API] Change `UpdateConnectorFiltering` API to have better defaults {es-pull}108612[#108612] + +Authentication:: +* Expose API Key cache metrics {es-pull}109078[#109078] + +Authorization:: +* Cluster state role mapper file settings service {es-pull}107886[#107886] +* Cluster-state based Security role mapper {es-pull}107410[#107410] +* Introduce role description field {es-pull}107088[#107088] +* [Osquery] Extend `kibana_system` role with an access to new `osquery_manager` index {es-pull}108849[#108849] + +Data streams:: +* Add metrics@custom component template to metrics-*-* index template {es-pull}109540[#109540] (issue: {es-issue}109475[#109475]) +* Apm-data: enable plugin by default {es-pull}108860[#108860] +* Apm-data: ignore malformed fields, and too many dynamic fields {es-pull}108444[#108444] +* Apm-data: improve default pipeline performance {es-pull}108396[#108396] (issue: {es-issue}108290[#108290]) +* Apm-data: improve indexing resilience {es-pull}108227[#108227] +* Apm-data: increase priority above Fleet templates {es-pull}108885[#108885] +* Apm-data: increase version for templates {es-pull}108340[#108340] +* Apm-data: set codec: best_compression for logs-apm.* data streams {es-pull}108862[#108862] +* Remove `default_field: message` from metrics index templates {es-pull}110651[#110651] + +Distributed:: +* Add `wait_for_completion` parameter to delete snapshot request {es-pull}109462[#109462] (issue: {es-issue}101300[#101300]) +* Improve mechanism for extracting the result of a `PlainActionFuture` {es-pull}110019[#110019] (issue: {es-issue}108125[#108125]) + +ES|QL:: +* Add `BlockHash` for 3 `BytesRefs` {es-pull}108165[#108165] +* Allow `LuceneSourceOperator` to early terminate {es-pull}108820[#108820] +* Check if `CsvTests` required capabilities exist {es-pull}108684[#108684] +* ESQL: Add aggregates node level reduction {es-pull}107876[#107876] +* ESQL: Add more time span units {es-pull}108300[#108300] +* ESQL: Implement LOOKUP, an "inline" enrich {es-pull}107987[#107987] (issue: {es-issue}107306[#107306]) +* ESQL: Renamed `TopList` to Top {es-pull}110347[#110347] +* ESQL: Union Types Support {es-pull}107545[#107545] (issue: {es-issue}100603[#100603]) +* ESQL: add REPEAT string function {es-pull}109220[#109220] +* ES|QL Add primitive float support to the Compute Engine {es-pull}109746[#109746] (issue: {es-issue}109178[#109178]) +* ES|QL Add primitive float variants of all aggregators to the compute engine {es-pull}109781[#109781] +* ES|QL: vectorize eval {es-pull}109332[#109332] +* Optimize ST_DISTANCE filtering with Lucene circle intersection query {es-pull}110102[#110102] (issue: {es-issue}109972[#109972]) +* Optimize for single value in ordinals grouping {es-pull}108118[#108118] +* Rewrite away type converting functions that do not convert types {es-pull}108713[#108713] (issue: {es-issue}107716[#107716]) +* ST_DISTANCE Function {es-pull}108764[#108764] (issue: {es-issue}108212[#108212]) +* Support metrics counter types in ESQL {es-pull}107877[#107877] +* [ESQL] CBRT function {es-pull}108574[#108574] +* [ES|QL] Convert string to datetime when the other size of an arithmetic operator is `date_period` or `time_duration` {es-pull}108455[#108455] +* [ES|QL] Support Named and Positional Parameters in `EsqlQueryRequest` {es-pull}108421[#108421] (issue: {es-issue}107029[#107029]) +* [ES|QL] `weighted_avg` {es-pull}109993[#109993] + +Engine:: +* Drop shards close timeout when stopping node. {es-pull}107978[#107978] (issue: {es-issue}107938[#107938]) +* Update translog `writeLocation` for `flushListener` after commit {es-pull}109603[#109603] + +Geo:: +* Optimize `GeoBounds` and `GeoCentroid` aggregations for single value fields {es-pull}107663[#107663] + +Health:: +* Log details of non-green indicators in `HealthPeriodicLogger` {es-pull}108266[#108266] + +Highlighting:: +* Unified Highlighter to support matched_fields {es-pull}107640[#107640] (issue: {es-issue}5172[#5172]) + +Infra/Core:: +* Add allocation explain output for THROTTLING shards {es-pull}109563[#109563] +* Create custom parser for ISO-8601 datetimes {es-pull}106486[#106486] (issue: {es-issue}102063[#102063]) +* Extend ISO8601 datetime parser to specify forbidden fields, allowing it to be used on more formats {es-pull}108606[#108606] +* add Elastic-internal stable bridge api for use by Logstash {es-pull}108171[#108171] + +Infra/Metrics:: +* Add auto-sharding APM metrics {es-pull}107593[#107593] +* Add request metric to `RestController` to track success/failure (by status code) {es-pull}109957[#109957] +* Allow RA metrics to be reported upon parsing completed or accumulated {es-pull}108726[#108726] +* Provide the `DocumentSizeReporter` with index mode {es-pull}108947[#108947] +* Return noop instance `DocSizeObserver` for updates with scripts {es-pull}108856[#108856] + +Ingest Node:: +* Add `continent_code` support to the geoip processor {es-pull}108780[#108780] (issue: {es-issue}85820[#85820]) +* Add support for the 'Connection Type' database to the geoip processor {es-pull}108683[#108683] +* Add support for the 'Domain' database to the geoip processor {es-pull}108639[#108639] +* Add support for the 'ISP' database to the geoip processor {es-pull}108651[#108651] +* Adding `hits_time_in_millis` and `misses_time_in_millis` to enrich cache stats {es-pull}107579[#107579] +* Adding `user_type` support for the enterprise database for the geoip processor {es-pull}108687[#108687] +* Adding human readable times to geoip stats {es-pull}107647[#107647] +* Include doc size info in ingest stats {es-pull}107240[#107240] (issue: {es-issue}106386[#106386]) +* Make ingest byte stat names more descriptive {es-pull}108786[#108786] +* Return ingest byte stats even when 0-valued {es-pull}108796[#108796] +* Test pipeline run after reroute {es-pull}108693[#108693] + +Logs:: +* Introduce a node setting controlling the activation of the `logs` index mode in logs@settings component template {es-pull}109025[#109025] (issue: {es-issue}108762[#108762]) +* Support index sorting with nested fields {es-pull}110251[#110251] (issue: {es-issue}107349[#107349]) + +Machine Learning:: +* Add Anthropic messages integration to Inference API {es-pull}109893[#109893] +* Add `sparse_vector` query {es-pull}108254[#108254] +* Add model download progress to the download task status {es-pull}107676[#107676] +* Add rate limiting support for the Inference API {es-pull}107706[#107706] +* Add the rerank task to the Elasticsearch internal inference service {es-pull}108452[#108452] +* Default the HF service to cosine similarity {es-pull}109967[#109967] +* GA the update trained model action {es-pull}108868[#108868] +* Handle the "JSON memory allocator bytes" field {es-pull}109653[#109653] +* Inference Processor: skip inference when all fields are missing {es-pull}108131[#108131] +* Log 'No statistics at.. ' message as a warning {ml-pull}2684[#2684] +* Optimise frequent item sets aggregation for single value fields {es-pull}108130[#108130] +* Sentence Chunker {es-pull}110334[#110334] +* [Inference API] Add Amazon Bedrock Support to Inference API {es-pull}110248[#110248] +* [Inference API] Add Mistral Embeddings Support to Inference API {es-pull}109194[#109194] +* [Inference API] Check for related pipelines on delete inference endpoint {es-pull}109123[#109123] + +Mapping:: +* Add ignored field values to synthetic source {es-pull}107567[#107567] +* Apply FLS to the contents of `IgnoredSourceFieldMapper` {es-pull}109931[#109931] +* Binary field enables doc values by default for index mode with synthe… {es-pull}107739[#107739] (issue: {es-issue}107554[#107554]) +* Feature/annotated text store defaults {es-pull}107922[#107922] (issue: {es-issue}107734[#107734]) +* Handle `ignore_above` in synthetic source for flattened fields {es-pull}110214[#110214] +* Opt in keyword field into fallback synthetic source if needed {es-pull}110016[#110016] +* Opt in number fields into fallback synthetic source when doc values a… {es-pull}110160[#110160] +* Reflect latest changes in synthetic source documentation {es-pull}109501[#109501] +* Store source for fields in objects with `dynamic` override {es-pull}108911[#108911] +* Store source for nested objects {es-pull}108818[#108818] +* Support synthetic source for `geo_point` when `ignore_malformed` is used {es-pull}109651[#109651] +* Support synthetic source for `scaled_float` and `unsigned_long` when `ignore_malformed` is used {es-pull}109506[#109506] +* Support synthetic source for date fields when `ignore_malformed` is used {es-pull}109410[#109410] +* Support synthetic source together with `ignore_malformed` in histogram fields {es-pull}109882[#109882] +* Track source for arrays of objects {es-pull}108417[#108417] (issue: {es-issue}90708[#90708]) +* Track synthetic source for disabled objects {es-pull}108051[#108051] + +Network:: +* Detect long-running tasks on network threads {es-pull}109204[#109204] + +Ranking:: +* Enabling profiling for `RankBuilders` and adding tests for RRF {es-pull}109470[#109470] + +Relevance:: +* [Query Rules] Add API calls to get or delete individual query rules within a ruleset {es-pull}109554[#109554] +* [Query Rules] Require Enterprise License for Query Rules {es-pull}109634[#109634] + +Search:: +* Add AVX-512 optimised vector distance functions for int7 on x64 {es-pull}109084[#109084] +* Add `SparseVectorStats` {es-pull}108793[#108793] +* Add `_name` support for top level `knn` clauses {es-pull}107645[#107645] (issues: {es-issue}106254[#106254], {es-issue}107448[#107448]) +* Add a SIMD (AVX2) optimised vector distance function for int7 on x64 {es-pull}108088[#108088] +* Add min/max range of the `event.ingested` field to cluster state for searchable snapshots {es-pull}106252[#106252] +* Add per-field KNN vector format to Index Segments API {es-pull}107216[#107216] +* Add support for hiragana_uppercase & katakana_uppercase token filters in kuromoji analysis plugin {es-pull}106553[#106553] +* Adding support for explain in rrf {es-pull}108682[#108682] +* Allow rescorer with field collapsing {es-pull}107779[#107779] (issue: {es-issue}27243[#27243]) +* Cut over stored fields to ZSTD for compression {es-pull}103374[#103374] +* Limit the value in prefix query {es-pull}108537[#108537] (issue: {es-issue}108486[#108486]) +* Make dense vector field type updatable {es-pull}106591[#106591] +* Multivalue Sparse Vector Support {es-pull}109007[#109007] + +Security:: +* Add bulk delete roles API {es-pull}110383[#110383] +* Remote cluster - API key security model - cluster privileges {es-pull}107493[#107493] + +Snapshot/Restore:: +* Denser in-memory representation of `ShardBlobsToDelete` {es-pull}109848[#109848] +* Log repo UUID at generation/registration time {es-pull}109672[#109672] +* Make repository analysis API available to non-operators {es-pull}110179[#110179] (issue: {es-issue}100318[#100318]) +* Track `RequestedRangeNotSatisfiedException` separately in S3 Metrics {es-pull}109657[#109657] + +Stats:: +* DocsStats: Add human readable bytesize {es-pull}109720[#109720] + +TSDB:: +* Optimise `time_series` aggregation for single value fields {es-pull}107990[#107990] +* Support `ignore_above` on keyword dimensions {es-pull}110337[#110337] + +Vector Search:: +* Adding hamming distance function to painless for `dense_vector` fields {es-pull}109359[#109359] +* Support k parameter for knn query {es-pull}110233[#110233] (issue: {es-issue}108473[#108473]) + +[[feature-8.15.0]] +[float] +=== New features + +Aggregations:: +* Opt `scripted_metric` out of parallelization {es-pull}109597[#109597] + +Application:: +* [Connector API] Add claim sync job endpoint {es-pull}109480[#109480] + +ES|QL:: +* ESQL: Add `ip_prefix` function {es-pull}109070[#109070] (issue: {es-issue}99064[#99064]) +* ESQL: Introduce a casting operator, `::` {es-pull}107409[#107409] +* ESQL: `top_list` aggregation {es-pull}109386[#109386] (issue: {es-issue}109213[#109213]) +* ESQL: add Arrow dataframes output format {es-pull}109873[#109873] +* Reapply "ESQL: Expose "_ignored" metadata field" {es-pull}108871[#108871] + +Infra/REST API:: +* Add a capabilities API to check node and cluster capabilities {es-pull}106820[#106820] + +Ingest Node:: +* Directly download commercial ip geolocation databases from providers {es-pull}110844[#110844] +* Mark the Redact processor as Generally Available {es-pull}110395[#110395] + +Logs:: +* Introduce logs index mode as Tech Preview {es-pull}108896[#108896] (issue: {es-issue}108896[#108896]) + +Machine Learning:: +* Add support for Azure AI Studio embeddings and completions to the inference service. {es-pull}108472[#108472] + +Mapping:: +* Add `semantic_text` field type and `semantic` query {es-pull}110338[#110338] +* Add generic fallback implementation for synthetic source {es-pull}108222[#108222] +* Add synthetic source support for `geo_shape` via fallback implementation {es-pull}108881[#108881] +* Add synthetic source support for binary fields {es-pull}107549[#107549] +* Enable fallback synthetic source by default {es-pull}109370[#109370] (issue: {es-issue}106460[#106460]) +* Enable fallback synthetic source for `point` and `shape` {es-pull}109312[#109312] +* Enable fallback synthetic source for `token_count` {es-pull}109044[#109044] +* Implement synthetic source support for annotated text field {es-pull}107735[#107735] +* Implement synthetic source support for range fields {es-pull}107081[#107081] +* Support arrays in fallback synthetic source implementation {es-pull}108878[#108878] +* Support synthetic source for `aggregate_metric_double` when ignore_malf… {es-pull}108746[#108746] + +Ranking:: +* Add text similarity reranker retriever {es-pull}109813[#109813] + +Relevance:: +* Mark Query Rules as GA {es-pull}110004[#110004] + +Search:: +* Add new int4 quantization to dense_vector {es-pull}109317[#109317] +* Adding RankFeature search phase implementation {es-pull}108538[#108538] +* Adding aggregations support for the `_ignored` field {es-pull}101373[#101373] (issue: {es-issue}59946[#59946]) +* Update Lucene version to 9.11 {es-pull}109219[#109219] + +Security:: +* Query Roles API {es-pull}108733[#108733] + +Transform:: +* Introduce _transform/_node_stats API {es-pull}107279[#107279] + +Vector Search:: +* Adds new `bit` `element_type` for `dense_vectors` {es-pull}110059[#110059] + +[[upgrade-8.15.0]] +[float] +=== Upgrades + +Infra/Plugins:: +* Update ASM to 9.7 for plugin scanner {es-pull}108822[#108822] (issue: {es-issue}108776[#108776]) + +Ingest Node:: +* Bump Tika dependency to 2.9.2 {es-pull}108144[#108144] + +Network:: +* Upgrade to Netty 4.1.109 {es-pull}108155[#108155] + +Search:: +* Upgrade to Lucene-9.11.1 {es-pull}110234[#110234] + +Security:: +* Upgrade bouncy castle (non-fips) to 1.78.1 {es-pull}108223[#108223] + +Snapshot/Restore:: +* Bump jackson version in modules:repository-azure {es-pull}109717[#109717] + diff --git a/docs/reference/release-notes/highlights.asciidoc b/docs/reference/release-notes/highlights.asciidoc index ead1596c64fdd..66c85789dd447 100644 --- a/docs/reference/release-notes/highlights.asciidoc +++ b/docs/reference/release-notes/highlights.asciidoc @@ -44,6 +44,90 @@ faster indexing and similar retrieval latencies. {es-pull}103374[#103374] +[discrete] +[[stricter_failure_handling_in_multi_repo_get_snapshots_request_handling]] +=== Stricter failure handling in multi-repo get-snapshots request handling +If a multi-repo get-snapshots request encounters a failure in one of the +targeted repositories then earlier versions of Elasticsearch would proceed +as if the faulty repository did not exist, except for a per-repository +failure report in a separate section of the response body. This makes it +impossible to paginate the results properly in the presence of failures. In +versions 8.15.0 and later this API's failure handling behaviour has been +made stricter, reporting an overall failure if any targeted repository's +contents cannot be listed. + +{es-pull}107191[#107191] + +[discrete] +[[introduce_logs_index_mode_as_tech_preview]] +=== Introduce `logsdb` index mode as Tech Preview +This change introduces a new index mode named `logsdb`. +When the new index mode is enabled then the following storage savings features are enabled automatically: + +* Synthetic source, which omits storing the `_source` field. When `_source` or part of it is requested it is synthesized on the fly at runtime. +* Index sorting. By default indices are sorted by `host.name` and `@timestamp` fields at index time. This can be overwritten if other sorting fields yield better compression rate. +* Enable more space efficient compression for fields with doc values enabled. These are the same codecs used + when `time_series` index mode is enabled. + +The `index.mode` index setting set to `logsdb` should be configured in index templates or defined when creating a plain index. +Benchmarks and other tests have shown that logs data sets use around 2.5 times less storage with the new index mode enabled compared to not configuring it. +The new `logsdb` index mode is a tech preview feature. + +{es-pull}108896[#108896] + +[discrete] +[[add_new_int4_quantization_to_dense_vector]] +=== Add new int4 quantization to dense_vector +New int4 (half-byte) scalar quantization support via two knew index types: `int4_hnsw` and `int4_flat`. +This gives an 8x reduction from `float32` with some accuracy loss. In addition to less memory required, this +improves query and merge speed significantly when compared to raw vectors. + +{es-pull}109317[#109317] + +[discrete] +[[mark_query_rules_as_ga]] +=== Mark Query Rules as GA +This PR marks query rules as Generally Available. All APIs are no longer +in tech preview. + +{es-pull}110004[#110004] + +[discrete] +[[adds_new_bit_element_type_for_dense_vectors]] +=== Adds new `bit` `element_type` for `dense_vectors` +This adds `bit` vector support by adding `element_type: bit` for +vectors. This new element type works for indexed and non-indexed +vectors. Additionally, it works with `hnsw` and `flat` index types. No +quantization based codec works with this element type, this is +consistent with `byte` vectors. + +`bit` vectors accept up to `32768` dimensions in size and expect vectors +that are being indexed to be encoded either as a hexidecimal string or a +`byte[]` array where each element of the `byte` array represents `8` +bits of the vector. + +`bit` vectors support script usage and regular query usage. When +indexed, all comparisons done are `xor` and `popcount` summations (aka, +hamming distance), and the scores are transformed and normalized given +the vector dimensions. + +For scripts, `l1norm` is the same as `hamming` distance and `l2norm` is +`sqrt(l1norm)`. `dotProduct` and `cosineSimilarity` are not supported. + +Note, the dimensions expected by this element_type are always to be +divisible by `8`, and the `byte[]` vectors provided for index must be +have size `dim/8` size, where each byte element represents `8` bits of +the vectors. + +{es-pull}110059[#110059] + +[discrete] +[[redact_processor_generally_available]] +=== The Redact processor is Generally Available +The Redact processor uses the Grok rules engine to obscure text in the input document matching the given Grok patterns. The Redact processor was initially released as Technical Preview in `8.7.0`, and is now released as Generally Available. + +{es-pull}110395[#110395] + // end::notable-highlights[] @@ -61,6 +145,17 @@ set the JVM property `es.datetime.java_time_parsers=true` on all ES nodes. {es-pull}106486[#106486] +[discrete] +[[new_custom_parser_for_more_iso_8601_date_formats]] +=== New custom parser for more ISO-8601 date formats +Following on from #106486, this extends the custom ISO-8601 datetime parser to cover the `strict_year`, +`strict_year_month`, `strict_date_time`, `strict_date_time_no_millis`, `strict_date_hour_minute_second`, +`strict_date_hour_minute_second_millis`, and `strict_date_hour_minute_second_fraction` date formats. +As before, the parser will use the existing java.time parser if there are parsing issues, and the +`es.datetime.java_time_parsers=true` JVM property will force the use of the old parsers regardless. + +{es-pull}108606[#108606] + [discrete] [[preview_support_for_connection_type_domain_isp_databases_in_geoip_processor]] === Preview: Support for the 'Connection Type, 'Domain', and 'ISP' databases in the geoip processor @@ -86,3 +181,23 @@ But, here are some particular highlights: {es-pull}109219[#109219] +[discrete] +[[synthetic_source_improvements]] +=== Synthetic `_source` improvements +There are multiple improvements to synthetic `_source` functionality: + +* Synthetic `_source` is now supported for all field types including `nested` and `object`. `object` fields are supported with `enabled` set to `false`. + +* Synthetic `_source` can be enabled together with `ignore_malformed` and `ignore_above` parameters for all field types that support them. + +{es-pull}109501[#109501] + +[discrete] +[[index_sorting_on_indexes_with_nested_fields]] +=== Index sorting on indexes with nested fields +Index sorting is now supported for indexes with mappings containing nested objects. +The index sort spec (as specified by `index.sort.field`) can't contain any nested +fields, still. + +{es-pull}110251[#110251] + diff --git a/docs/reference/rest-api/security.asciidoc b/docs/reference/rest-api/security.asciidoc index 04cd838c45600..82cf38e52bd80 100644 --- a/docs/reference/rest-api/security.asciidoc +++ b/docs/reference/rest-api/security.asciidoc @@ -50,6 +50,7 @@ Use the following APIs to add, remove, update, and retrieve roles in the native * <> * <> * <> +* <> [discrete] [[security-token-apis]] @@ -192,6 +193,7 @@ include::security/get-app-privileges.asciidoc[] include::security/get-builtin-privileges.asciidoc[] include::security/get-role-mappings.asciidoc[] include::security/get-roles.asciidoc[] +include::security/query-role.asciidoc[] include::security/get-service-accounts.asciidoc[] include::security/get-service-credentials.asciidoc[] include::security/get-settings.asciidoc[] diff --git a/docs/reference/rest-api/security/get-roles.asciidoc b/docs/reference/rest-api/security/get-roles.asciidoc index 3eb5a735194c6..3cc2f95c6ea7e 100644 --- a/docs/reference/rest-api/security/get-roles.asciidoc +++ b/docs/reference/rest-api/security/get-roles.asciidoc @@ -38,7 +38,10 @@ API cannot retrieve roles that are defined in roles files. ==== {api-response-body-title} A successful call returns an array of roles with the JSON representation of the -role. +role. The returned role format is a simple extension of the <> format, +only adding an extra field `transient_metadata.enabled`. +This field is `false` in case the role is automatically disabled, for example when the license +level does not allow some permissions that the role grants. [[security-api-get-role-response-codes]] ==== {api-response-codes-title} diff --git a/docs/reference/rest-api/security/query-role.asciidoc b/docs/reference/rest-api/security/query-role.asciidoc new file mode 100644 index 0000000000000..937bd263140fc --- /dev/null +++ b/docs/reference/rest-api/security/query-role.asciidoc @@ -0,0 +1,283 @@ +[role="xpack"] +[[security-api-query-role]] +=== Query Role API + +++++ +Query Role +++++ + +Retrieves roles with <> in a <> fashion. + +[[security-api-query-role-request]] +==== {api-request-title} + +`GET /_security/_query/role` + +`POST /_security/_query/role` + +[[security-api-query-role-prereqs]] +==== {api-prereq-title} + +* To use this API, you must have at least the `read_security` cluster privilege. + +[[security-api-query-role-desc]] +==== {api-description-title} + +The role management APIs are generally the preferred way to manage roles, rather than using +<>. +The query roles API does not retrieve roles that are defined in roles files, nor <> ones. +You can optionally filter the results with a query. Also, the results can be paginated and sorted. + +[[security-api-query-role-request-body]] +==== {api-request-body-title} + +You can specify the following parameters in the request body: + +`query`:: +(Optional, string) A <> to filter which roles to return. +The query supports a subset of query types, including +<>, <>, +<>, <>, +<>, <>, +<>, <>, +<>, <>, +and <>. ++ +You can query the following values associated with a role. ++ +.Valid values for `query` +[%collapsible%open] +==== +`name`:: +(keyword) The <> of the role. + +`description`:: +(text) The <> of the role. + +`metadata`:: +(flattened) Metadata field associated with the <>, such as `metadata.app_tag`. +Note that metadata is internally indexed as a <> field type. +This means that all sub-fields act like `keyword` fields when querying and sorting. +It also implies that it is not possible to refer to a subset of metadata fields using wildcard patterns, +e.g. `metadata.field*`, even for query types that support field name patterns. +Lastly, all the metadata fields can be searched together when simply mentioning the +`metadata` field (i.e. not followed by any dot and sub-field name). + +`applications`:: +The list of <> that the role grants. + +`application`::: +(keyword) The name of the application associated to the privileges and resources. + +`privileges`::: +(keyword) The names of the privileges that the role grants. + +`resources`::: +(keyword) The resources to which the privileges apply. + +==== + +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=from] ++ +By default, you cannot page through more than 10,000 hits using the `from` and +`size` parameters. To page through more hits, use the +<> parameter. + +`size`:: +(Optional, integer) The number of hits to return. Must not be negative and defaults to `10`. ++ +By default, you cannot page through more than 10,000 hits using the `from` and +`size` parameters. To page through more hits, use the +<> parameter. + +`sort`:: +(Optional, object) <>. You can sort on `username`, `roles` or `enabled`. +In addition, sort can also be applied to the `_doc` field to sort by index order. + +`search_after`:: +(Optional, array) <> definition. + + +[[security-api-query-role-response-body]] +==== {api-response-body-title} + +This API returns the following top level fields: + +`total`:: +The total number of roles found. + +`count`:: +The number of roles returned in the response. + +`roles`:: +A list of roles that match the query. +The returned role format is an extension of the <> format. +It adds the `transient_metadata.enabled` and the `_sort` fields. +`transient_metadata.enabled` is set to `false` in case the role is automatically disabled, +for example when the role grants privileges that are not allowed by the installed license. +`_sort` is present when the search query sorts on some field. +It contains the array of values that have been used for sorting. + +[[security-api-query-role-example]] +==== {api-examples-title} + +The following request lists all roles, sorted by the role name: + +[source,console] +---- +POST /_security/_query/role +{ + "sort": ["name"] +} +---- +// TEST[setup:admin_role,user_role] + +A successful call returns a JSON structure that contains the information +retrieved for one or more roles: + +[source,console-result] +---- +{ + "total": 2, + "count": 2, + "roles": [ <1> + { + "name" : "my_admin_role", + "cluster" : [ + "all" + ], + "indices" : [ + { + "names" : [ + "index1", + "index2" + ], + "privileges" : [ + "all" + ], + "field_security" : { + "grant" : [ + "title", + "body" + ] + }, + "allow_restricted_indices" : false + } + ], + "applications" : [ ], + "run_as" : [ + "other_user" + ], + "metadata" : { + "version" : 1 + }, + "transient_metadata" : { + "enabled" : true + }, + "description" : "Grants full access to all management features within the cluster.", + "_sort" : [ + "my_admin_role" + ] + }, + { + "name" : "my_user_role", + "cluster" : [ ], + "indices" : [ + { + "names" : [ + "index1", + "index2" + ], + "privileges" : [ + "all" + ], + "field_security" : { + "grant" : [ + "title", + "body" + ] + }, + "allow_restricted_indices" : false + } + ], + "applications" : [ ], + "run_as" : [ ], + "metadata" : { + "version" : 1 + }, + "transient_metadata" : { + "enabled" : true + }, + "description" : "Grants user access to some indicies.", + "_sort" : [ + "my_user_role" + ] + } + ] +} +---- +// TEST[continued] + +<1> The list of roles that were retrieved for this request + +Similarly, the following request can be used to query only the user access role, +given its description: + +[source,console] +---- +POST /_security/_query/role +{ + "query": { + "match": { + "description": { + "query": "user access" + } + } + }, + "size": 1 <1> +} +---- +// TEST[continued] + +<1> Return only the best matching role + +[source,console-result] +---- +{ + "total": 2, + "count": 1, + "roles": [ + { + "name" : "my_user_role", + "cluster" : [ ], + "indices" : [ + { + "names" : [ + "index1", + "index2" + ], + "privileges" : [ + "all" + ], + "field_security" : { + "grant" : [ + "title", + "body" + ] + }, + "allow_restricted_indices" : false + } + ], + "applications" : [ ], + "run_as" : [ ], + "metadata" : { + "version" : 1 + }, + "transient_metadata" : { + "enabled" : true + }, + "description" : "Grants user access to some indicies." + } + ] +} +---- diff --git a/docs/reference/rest-api/security/query-user.asciidoc b/docs/reference/rest-api/security/query-user.asciidoc index 952e0f40f2a3a..23852f0f2eed7 100644 --- a/docs/reference/rest-api/security/query-user.asciidoc +++ b/docs/reference/rest-api/security/query-user.asciidoc @@ -66,13 +66,6 @@ The email of the user. Specifies whether the user is enabled. ==== -[[security-api-query-user-query-params]] -==== {api-query-parms-title} - -`with_profile_uid`:: -(Optional, boolean) Determines whether to retrieve the <> `uid`, -if exists, for the users. Defaults to `false`. - include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=from] + By default, you cannot page through more than 10,000 hits using the `from` and @@ -93,6 +86,12 @@ In addition, sort can also be applied to the `_doc` field to sort by index order `search_after`:: (Optional, array) <> definition. +[[security-api-query-user-query-params]] +==== {api-query-parms-title} + +`with_profile_uid`:: +(Optional, boolean) Determines whether to retrieve the <> `uid`, +if exists, for the users. Defaults to `false`. [[security-api-query-user-response-body]] ==== {api-response-body-title} @@ -191,7 +190,7 @@ Use the user information retrieve the user with a query: [source,console] ---- -GET /_security/_query/user +POST /_security/_query/user { "query": { "prefix": { @@ -231,7 +230,7 @@ To retrieve the user `profile_uid` as part of the response: [source,console] -------------------------------------------------- -GET /_security/_query/user?with_profile_uid=true +POST /_security/_query/user?with_profile_uid=true { "query": { "prefix": { @@ -272,7 +271,7 @@ Use a `bool` query to issue complex logical conditions and use [source,js] ---- -GET /_security/_query/user +POST /_security/_query/user { "query": { "bool": { diff --git a/docs/reference/scripting/security.asciidoc b/docs/reference/scripting/security.asciidoc index 0f322d08726b9..249a705e92817 100644 --- a/docs/reference/scripting/security.asciidoc +++ b/docs/reference/scripting/security.asciidoc @@ -9,8 +9,8 @@ security in a defense in depth strategy for scripting. The second layer of security is the https://www.oracle.com/java/technologies/javase/seccodeguide.html[Java Security Manager]. As part of its startup sequence, {es} enables the Java Security Manager to limit the actions that -portions of the code can take. <> uses -the Java Security Manager as an additional layer of defense to prevent scripts +portions of the code can take. <> uses +the Java Security Manager as an additional layer of defense to prevent scripts from doing things like writing files and listening to sockets. {es} uses @@ -18,22 +18,28 @@ from doing things like writing files and listening to sockets. https://www.chromium.org/developers/design-documents/sandbox/osx-sandboxing-design[Seatbelt] in macOS, and https://msdn.microsoft.com/en-us/library/windows/desktop/ms684147[ActiveProcessLimit] -on Windows as additional security layers to prevent {es} from forking or +on Windows as additional security layers to prevent {es} from forking or running other processes. +Finally, scripts used in +<> +can be restricted to a defined list of scripts, or forbidden altogether. +This can prevent users from running particularly slow or resource intensive aggregation +queries. + You can modify the following script settings to restrict the type of scripts -that are allowed to run, and control the available +that are allowed to run, and control the available {painless}/painless-contexts.html[contexts] that scripts can run in. To -implement additional layers in your defense in depth strategy, follow the +implement additional layers in your defense in depth strategy, follow the <>. [[allowed-script-types-setting]] [discrete] === Allowed script types setting -{es} supports two script types: `inline` and `stored`. By default, {es} is -configured to run both types of scripts. To limit what type of scripts are run, -set `script.allowed_types` to `inline` or `stored`. To prevent any scripts from +{es} supports two script types: `inline` and `stored`. By default, {es} is +configured to run both types of scripts. To limit what type of scripts are run, +set `script.allowed_types` to `inline` or `stored`. To prevent any scripts from running, set `script.allowed_types` to `none`. IMPORTANT: If you use {kib}, set `script.allowed_types` to both or just `inline`. @@ -61,3 +67,48 @@ For example, to allow scripts to run only in `scoring` and `update` contexts: ---- script.allowed_contexts: score, update ---- + +[[allowed-script-in-aggs-settings]] +[discrete] +=== Allowed scripts in scripted metrics aggregations + +By default, all scripts are permitted in +<>. +To restrict the set of allowed scripts, set +<> +to `true` and provide the allowed scripts using +<> +and/or +<>. + +To disallow certain script types, omit the corresponding script list +(`search.aggs.allowed_inline_metric_scripts` or +`search.aggs.allowed_stored_metric_scripts`) or set it to an empty array. +When both script lists are not empty, the given stored scripts and the given inline scripts +will be allowed. + +The following example permits only 4 specific stored scripts to be used, and no inline scripts: + +[source,yaml] +---- +search.aggs.only_allowed_metric_scripts: true +search.aggs.allowed_inline_metric_scripts: [] +search.aggs.allowed_stored_metric_scripts: + - script_id_1 + - script_id_2 + - script_id_3 + - script_id_4 +---- + +Conversely, the next example allows specific inline scripts but no stored scripts: + +[source,yaml] +---- +search.aggs.only_allowed_metric_scripts: true +search.aggs.allowed_inline_metric_scripts: + - 'state.transactions = []' + - 'state.transactions.add(doc.some_field.value)' + - 'long sum = 0; for (t in state.transactions) { sum += t } return sum' + - 'long sum = 0; for (a in states) { sum += a } return sum' +search.aggs.allowed_stored_metric_scripts: [] +---- diff --git a/docs/reference/search/multi-search-template-api.asciidoc b/docs/reference/search/multi-search-template-api.asciidoc index c8eea52a6fd9b..b1c9518b1f2bc 100644 --- a/docs/reference/search/multi-search-template-api.asciidoc +++ b/docs/reference/search/multi-search-template-api.asciidoc @@ -22,9 +22,6 @@ PUT _scripts/my-search-template }, "from": "{{from}}", "size": "{{size}}" - }, - "params": { - "query_string": "My query string" } } } diff --git a/docs/reference/search/render-search-template-api.asciidoc b/docs/reference/search/render-search-template-api.asciidoc index 1f259dddf6879..0c782f26068e6 100644 --- a/docs/reference/search/render-search-template-api.asciidoc +++ b/docs/reference/search/render-search-template-api.asciidoc @@ -22,9 +22,6 @@ PUT _scripts/my-search-template }, "from": "{{from}}", "size": "{{size}}" - }, - "params": { - "query_string": "My query string" } } } diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc index 590df272cc89e..bf97da15a1ccf 100644 --- a/docs/reference/search/retriever.asciidoc +++ b/docs/reference/search/retriever.asciidoc @@ -28,6 +28,9 @@ A <> that replaces the functionality of a <> that produces top documents from <>. +`text_similarity_reranker`:: +A <> that enhances search results by re-ranking documents based on semantic similarity to a specified inference text, using a machine learning model. + [[standard-retriever]] ==== Standard Retriever @@ -74,23 +77,48 @@ Collapses the top documents by a specified key into a single top document per ke When a retriever tree contains a compound retriever (a retriever with two or more child retrievers) *only* the query element is allowed. -===== Example +[discrete] +[[standard-retriever-example]] +==== Example [source,js] ---- -GET /index/_search +GET /restaurants/_search { - "retriever": { - "standard": { - "query" { ... }, - "filter" { ... }, - "min_score": ... + "retriever": { <1> + "standard": { <2> + "query": { <3> + "bool": { <4> + "should": [ <5> + { + "match": { <6> + "region": "Austria" + } + } + ], + "filter": [ <7> + { + "term": { <8> + "year": "2019" <9> + } + } + ] } - }, - "size": ... + } + } + } } ---- // NOTCONSOLE +<1> Opens the `retriever` object. +<2> The `standard` retriever is used for definining traditional {es} queries. +<3> The entry point for defining the search query. +<4> The `bool` object allows for combining multiple query clauses logically. +<5> The `should` array indicates conditions under which a document will match. Documents matching these conditions will increase their relevancy score. +<6> The `match` object finds documents where the `region` field contains the word "Austria." +<7> The `filter` array provides filtering conditions that must be met but do not contribute to the relevancy score. +<8> The `term` object is used for exact matches, in this case, filtering documents by the `year` field. +<9> The exact value to match in the `year` field. [[knn-retriever]] ==== kNN Retriever @@ -139,29 +167,39 @@ include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=knn-similarity] The parameters `query_vector` and `query_vector_builder` cannot be used together. -===== Example: +[discrete] +[[knn-retriever-example]] +==== Example [source,js] ---- -GET /index/_search +GET my-embeddings/_search { - "retriever": { - "knn": { - "field": ..., - "query_vector": ..., - "k": ..., - "num_candidates": ... - } + "retriever": { + "knn": { <1> + "field": "vector", <2> + "query_vector": [10, 22, 77], <3> + "k": 10, <4> + "num_candidates": 10 <5> } + } } ---- // NOTCONSOLE +<1> Configuration for k-nearest neighbor (knn) search, which is based on vector similarity. +<2> Specifies the field name that contains the vectors. +<3> The query vector against which document vectors are compared in the `knn` search. +<4> The number of nearest neighbors to return as top hits. This value must be fewer than or equal to `num_candidates`. +<5> The size of the initial candidate set from which the final `k` nearest neighbors are selected. + [[rrf-retriever]] ==== RRF Retriever -An <> retriever returns top documents based on the RRF formula +An <> retriever returns top documents based on the RRF formula, equally weighting two or more child retrievers. +Reciprocal rank fusion (RRF) is a method for combining multiple result +sets with different relevance indicators into a single result set. ===== Parameters @@ -177,29 +215,272 @@ An RRF retriever is a compound retriever. Child retrievers may not use elements that are restricted by having a compound retriever as part of the retriever tree. -===== Example +[discrete] +[[rrf-retriever-example-hybrid]] +==== Example: Hybrid search + +A simple hybrid search example (lexical search + dense vector search) combining a `standard` retriever with a `knn` retriever using RRF: + +[source,js] +---- +GET /restaurants/_search +{ + "retriever": { + "rrf": { <1> + "retrievers": [ <2> + { + "standard": { <3> + "query": { + "multi_match": { + "query": "San Francisco", + "fields": [ + "city", + "region" + ] + } + } + } + }, + { + "knn": { <4> + "field": "vector", + "query_vector": [10, 22, 77], + "k": 10, + "num_candidates": 10 + } + } + ], + "rank_constant": 0.3, <5> + "rank_window_size": 50 <6> + } + } +} +---- +// NOTCONSOLE +<1> Defines a retriever tree with an RRF retriever. +<2> The sub-retriever array. +<3> The first sub-retriever is a `standard` retriever. +<4> The second sub-retriever is a `knn` retriever. +<5> The rank constant for the RRF retriever. +<6> The rank window size for the RRF retriever. + +[discrete] +[[rrf-retriever-example-hybrid-sparse]] +==== Example: Hybrid search with sparse vectors + +A more complex hybrid search example (lexical search + ELSER sparse vector search + dense vector search) using RRF: + +[source,js] +---- +GET movies/_search +{ + "retriever": { + "rrf": { + "retrievers": [ + { + "standard": { + "query": { + "sparse_vector": { + "field": "plot_embedding", + "inference_id": "my-elser-model", + "query": "films that explore psychological depths" + } + } + } + }, + { + "standard": { + "query": { + "multi_match": { + "query": "crime", + "fields": [ + "plot", + "title" + ] + } + } + } + }, + { + "knn": { + "field": "vector", + "query_vector": [10, 22, 77], + "k": 10, + "num_candidates": 10 + } + } + ] + } + } +} +---- +// NOTCONSOLE + +[[text-similarity-reranker-retriever]] +==== Text Similarity Re-ranker Retriever + +The `text_similarity_reranker` retriever uses an NLP model to improve search results by reordering the top-k documents based on their semantic similarity to the query. + +[TIP] +==== +Refer to <> for a high level overview of semantic reranking. +==== + +===== Prerequisites + +To use `text_similarity_reranker` you must first set up a `rerank` task using the <>. +The `rerank` task should be set up with a machine learning model that can compute text similarity. Refer to {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-similarity[the Elastic NLP model reference] for a list of third-party text similarity models supported by {es}. + +Currently you can: + +* Integrate directly with the <> using the `rerank` task type +* Integrate directly with the <> using the `rerank` task type +* Upload a model to {es} with {eland-docs}/machine-learning.html#ml-nlp-pytorch[Eland] using the `text_similarity` NLP task type. +** Then set up an <> with the `rerank` task type +** Refer to the <> on this page for a step-by-step guide. + +===== Parameters + +`field`:: +(Required, `string`) ++ +The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the `inferenceText`. + +`inference_id`:: +(Required, `string`) ++ +Unique identifier of the inference endpoint created using the {infer} API. + +`inference_text`:: +(Required, `string`) ++ +The text snippet used as the basis for similarity comparison. + +`rank_window_size`:: +(Optional, `int`) ++ +The number of top documents to consider in the re-ranking process. Defaults to `10`. + +`min_score`:: +(Optional, `float`) ++ +Sets a minimum threshold score for including documents in the re-ranked results. Documents with similarity scores below this threshold will be excluded. Note that score calculations vary depending on the model used. + +===== Restrictions + +A text similarity re-ranker retriever is a compound retriever. Child retrievers may not use elements that are restricted by having a compound retriever as part of the retriever tree. + +[discrete] +[[text-similarity-reranker-retriever-example-cohere]] +==== Example: Cohere Rerank + +This example enables out-of-the-box semantic search by reranking top documents using the Cohere Rerank API. This approach eliminate the need to generate and store embeddings for all indexed documents. +This requires a <> using the `rerank` task type. [source,js] ---- GET /index/_search { - "retriever": { - "rrf": { - "retrievers": [ - { - "standard" { ... } - }, - { - "knn": { ... } - } - ], - "rank_constant": ... - "rank_window_size": ... + "retriever": { + "text_similarity_reranker": { + "retriever": { + "standard": { + "query": { + "match_phrase": { + "text": "landmark in Paris" + } + } + } + }, + "field": "text", + "inference_id": "my-cohere-rerank-model", + "inference_text": "Most famous landmark in Paris", + "rank_window_size": 100, + "min_score": 0.5 + } + } +} +---- +// NOTCONSOLE + +[discrete] +[[text-similarity-reranker-retriever-example-eland]] +==== Example: Semantic reranking with a Hugging Face model + +The following example uses the `cross-encoder/ms-marco-MiniLM-L-6-v2` model from Hugging Face to rerank search results based on semantic similarity. +The model must be uploaded to {es} using https://www.elastic.co/guide/en/elasticsearch/client/eland/current/machine-learning.html#ml-nlp-pytorch[Eland]. + +[TIP] +==== +Refer to {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-similarity[the Elastic NLP model reference] for a list of third party text similarity models supported by {es}. +==== + +Follow these steps to load the model and create a semantic reranker. + +. Install Eland using `pip` ++ +[source,sh] +---- +python -m pip install eland[pytorch] +---- ++ +. Upload the model to {es} using Eland. This example assumes you have an Elastic Cloud deployment and an API key. Refer to the https://www.elastic.co/guide/en/elasticsearch/client/eland/current/machine-learning.html#ml-nlp-pytorch-auth[Eland documentation] for more authentication options. ++ +[source,sh] +---- +eland_import_hub_model \ + --cloud-id $CLOUD_ID \ + --es-api-key $ES_API_KEY \ + --hub-model-id cross-encoder/ms-marco-MiniLM-L-6-v2 \ + --task-type text_similarity \ + --clear-previous \ + --start +---- ++ +. Create an inference endpoint for the `rerank` task ++ +[source,js] +---- +PUT _inference/rerank/my-msmarco-minilm-model +{ + "service": "elasticsearch", + "service_settings": { + "num_allocations": 1, + "num_threads": 1, + "model_id": "cross-encoder__ms-marco-minilm-l-6-v2" + } +} +---- +// NOTCONSOLE ++ +. Define a `text_similarity_rerank` retriever. ++ +[source,js] +---- +POST movies/_search +{ + "retriever": { + "text_similarity_reranker": { + "retriever": { + "standard": { + "query": { + "match": { + "genre": "drama" + } + } } + }, + "field": "plot", + "inference_id": "my-msmarco-minilm-model", + "inference_text": "films that explore psychological depths" } + } } ---- // NOTCONSOLE ++ +This retriever uses a standard `match` query to search the `movie` index for films tagged with the genre "drama". +It then re-ranks the results based on semantic similarity to the text in the `inference_text` parameter, using the model we uploaded to {es}. ==== Using `from` and `size` with a retriever tree diff --git a/docs/reference/search/search-template-api.asciidoc b/docs/reference/search/search-template-api.asciidoc index 038396e558607..c60b5281c05e5 100644 --- a/docs/reference/search/search-template-api.asciidoc +++ b/docs/reference/search/search-template-api.asciidoc @@ -21,9 +21,6 @@ PUT _scripts/my-search-template }, "from": "{{from}}", "size": "{{size}}" - }, - "params": { - "query_string": "My query string" } } } diff --git a/docs/reference/search/search-your-data/ccs-version-compat-matrix.asciidoc b/docs/reference/search/search-your-data/ccs-version-compat-matrix.asciidoc index 4a5efe09ea5a0..6b9b13b124e9f 100644 --- a/docs/reference/search/search-your-data/ccs-version-compat-matrix.asciidoc +++ b/docs/reference/search/search-your-data/ccs-version-compat-matrix.asciidoc @@ -1,24 +1,24 @@ -[cols="^,^,^,^,^,^,^,^,^,^,^,^,^,^,^,^,^,^,^"] |==== -| 18+^h| Remote cluster version +| 19+^h| Remote cluster version h| Local cluster version - | 6.8 | 7.1–7.16 | 7.17 | 8.0 | 8.1 | 8.2 | 8.3 | 8.4 | 8.5 | 8.6 | 8.7 | 8.8 | 8.9 | 8.10 | 8.11 | 8.12 | 8.13 | 8.14 -| 6.8 | {yes-icon} | {yes-icon} | {yes-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} -| 7.1–7.16 | {yes-icon} | {yes-icon} | {yes-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} -| 7.17 | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.0 | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.1 | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.2 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.3 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.4 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.5 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.6 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.7 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.8 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.9 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.10 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.11 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.12 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.13 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.14 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} + | 6.8 | 7.1–7.16 | 7.17 | 8.0 | 8.1 | 8.2 | 8.3 | 8.4 | 8.5 | 8.6 | 8.7 | 8.8 | 8.9 | 8.10 | 8.11 | 8.12 | 8.13 | 8.14 | 8.15 +| 6.8 | {yes-icon} | {yes-icon} | {yes-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} +| 7.1–7.16 | {yes-icon} | {yes-icon} | {yes-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} +| 7.17 | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.0 | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.1 | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.2 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.3 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.4 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.5 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.6 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.7 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.8 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.9 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.10 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.11 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.12 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.13 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.14 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.15 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} |==== diff --git a/docs/reference/search/search-your-data/learning-to-rank-model-training.asciidoc b/docs/reference/search/search-your-data/learning-to-rank-model-training.asciidoc index 6525147839412..0f4640ebdf347 100644 --- a/docs/reference/search/search-your-data/learning-to-rank-model-training.asciidoc +++ b/docs/reference/search/search-your-data/learning-to-rank-model-training.asciidoc @@ -4,8 +4,6 @@ Deploy and manage LTR models ++++ -preview::["The Learning To Rank feature is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but this feature is not subject to the support SLA of official GA features."] - NOTE: This feature was introduced in version 8.12.0 and is only available to certain subscription levels. For more information, see {subscriptions}. diff --git a/docs/reference/search/search-your-data/learning-to-rank-search-usage.asciidoc b/docs/reference/search/search-your-data/learning-to-rank-search-usage.asciidoc index 2e9693eff0451..f14219e24bc11 100644 --- a/docs/reference/search/search-your-data/learning-to-rank-search-usage.asciidoc +++ b/docs/reference/search/search-your-data/learning-to-rank-search-usage.asciidoc @@ -4,8 +4,6 @@ Search using LTR ++++ -preview::["The Learning To Rank feature is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but this feature is not subject to the support SLA of official GA features."] - NOTE: This feature was introduced in version 8.12.0 and is only available to certain subscription levels. For more information, see {subscriptions}. diff --git a/docs/reference/search/search-your-data/learning-to-rank.asciidoc b/docs/reference/search/search-your-data/learning-to-rank.asciidoc index 08fad9db9c0f6..ebd6d67fe42da 100644 --- a/docs/reference/search/search-your-data/learning-to-rank.asciidoc +++ b/docs/reference/search/search-your-data/learning-to-rank.asciidoc @@ -1,8 +1,6 @@ [[learning-to-rank]] == Learning To Rank -preview::["The Learning To Rank feature is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but this feature is not subject to the support SLA of official GA features."] - NOTE: This feature was introduced in version 8.12.0 and is only available to certain subscription levels. For more information, see {subscriptions}. diff --git a/docs/reference/search/search-your-data/paginate-search-results.asciidoc b/docs/reference/search/search-your-data/paginate-search-results.asciidoc index a81598273dfd3..edd1546dd0854 100644 --- a/docs/reference/search/search-your-data/paginate-search-results.asciidoc +++ b/docs/reference/search/search-your-data/paginate-search-results.asciidoc @@ -362,7 +362,7 @@ Perl:: Python:: - See https://elasticsearch-py.readthedocs.org/en/master/helpers.html[elasticsearch.helpers.*] + See https://elasticsearch-py.readthedocs.io/en/stable/helpers.html[elasticsearch.helpers.*] JavaScript:: diff --git a/docs/reference/search/search-your-data/retrievers-reranking/index.asciidoc b/docs/reference/search/search-your-data/retrievers-reranking/index.asciidoc new file mode 100644 index 0000000000000..87ed52e365370 --- /dev/null +++ b/docs/reference/search/search-your-data/retrievers-reranking/index.asciidoc @@ -0,0 +1,8 @@ +[[retrievers-reranking-overview]] +== Retrievers and reranking + +* <> +* <> + +include::retrievers-overview.asciidoc[] +include::semantic-reranking.asciidoc[] diff --git a/docs/reference/search/search-your-data/retrievers-overview.asciidoc b/docs/reference/search/search-your-data/retrievers-reranking/retrievers-overview.asciidoc similarity index 75% rename from docs/reference/search/search-your-data/retrievers-overview.asciidoc rename to docs/reference/search/search-your-data/retrievers-reranking/retrievers-overview.asciidoc index 92cd085583916..99659ae76e092 100644 --- a/docs/reference/search/search-your-data/retrievers-overview.asciidoc +++ b/docs/reference/search/search-your-data/retrievers-reranking/retrievers-overview.asciidoc @@ -1,7 +1,5 @@ [[retrievers-overview]] -== Retrievers - -// Will move to a top level "Retrievers and reranking" section once reranking is live +=== Retrievers preview::[] @@ -15,33 +13,32 @@ For implementation details, including notable restrictions, check out the [discrete] [[retrievers-overview-types]] -=== Retriever types +==== Retriever types Retrievers come in various types, each tailored for different search operations. The following retrievers are currently available: -* <>. -Returns top documents from a traditional https://www.elastic.co/guide/en/elasticsearch/reference/master/query-dsl.html[query]. -Mimics a traditional query but in the context of a retriever framework. -This ensures backward compatibility as existing `_search` requests remain supported. -That way you can transition to the new abstraction at your own pace without mixing syntaxes. -* <>. -Returns top documents from a <>, in the context of a retriever framework. -* <>. -Combines and ranks multiple first-stage retrievers using the reciprocal rank fusion (RRF) algorithm. -Allows you to combine multiple result sets with different relevance indicators into a single result set. -An RRF retriever is a *compound retriever*, where its `filter` element is propagated to its sub retrievers. +* <>. Returns top documents from a +traditional https://www.elastic.co/guide/en/elasticsearch/reference/master/query-dsl.html[query]. +Mimics a traditional query but in the context of a retriever framework. This +ensures backward compatibility as existing `_search` requests remain supported. +That way you can transition to the new abstraction at your own pace without +mixing syntaxes. +* <>. Returns top documents from a <>, +in the context of a retriever framework. +* <>. Combines and ranks multiple first-stage retrievers using +the reciprocal rank fusion (RRF) algorithm. Allows you to combine multiple result sets +with different relevance indicators into a single result set. +An RRF retriever is a *compound retriever*, where its `filter` element is +propagated to its sub retrievers. + Sub retrievers may not use elements that are restricted by having a compound retriever as part of the retriever tree. See the <> for detailed examples and information on how to use the RRF retriever. - -[NOTE] -==== -Stay tuned for more retriever types in future releases! -==== +* <>. Used for <>. +Requires first creating a `rerank` task using the <>. [discrete] -=== What makes retrievers useful? +==== What makes retrievers useful? Here's an overview of what makes retrievers useful and how they differ from regular queries. @@ -73,7 +70,7 @@ When using compound retrievers, only the query element is allowed, which enforce [discrete] [[retrievers-overview-example]] -=== Example +==== Example The following example demonstrates how using retrievers simplify the composability of queries for RRF ranking. @@ -154,25 +151,23 @@ GET example-index/_search [discrete] [[retrievers-overview-glossary]] -=== Glossary +==== Glossary Here are some important terms: -* *Retrieval Pipeline*. -Defines the entire retrieval and ranking logic to produce top hits. -* *Retriever Tree*. -A hierarchical structure that defines how retrievers interact. -* *First-stage Retriever*. -Returns an initial set of candidate documents. -* *Compound Retriever*. -Builds on one or more retrievers, enhancing document retrieval and ranking logic. -* *Combiners*. -Compound retrievers that merge top hits from multiple sub-retrievers. -//* NOT YET *Rerankers*. Special compound retrievers that reorder hits and may adjust the number of hits, with distinctions between first-stage and second-stage rerankers. +* *Retrieval Pipeline*. Defines the entire retrieval and ranking logic to +produce top hits. +* *Retriever Tree*. A hierarchical structure that defines how retrievers interact. +* *First-stage Retriever*. Returns an initial set of candidate documents. +* *Compound Retriever*. Builds on one or more retrievers, +enhancing document retrieval and ranking logic. +* *Combiners*. Compound retrievers that merge top hits +from multiple sub-retrievers. +* *Rerankers*. Special compound retrievers that reorder hits and may adjust the number of hits, with distinctions between first-stage and second-stage rerankers. [discrete] [[retrievers-overview-play-in-search]] -=== Retrievers in action +==== Retrievers in action The Search Playground builds Elasticsearch queries using the retriever abstraction. It automatically detects the fields and types in your index and builds a retriever tree based on your selections. @@ -180,7 +175,9 @@ It automatically detects the fields and types in your index and builds a retriev You can use the Playground to experiment with different retriever configurations and see how they affect search results. Refer to the {kibana-ref}/playground.html[Playground documentation] for more information. -// Content coming in https://github.com/elastic/kibana/pull/182692 - +[discrete] +[[retrievers-overview-api-reference]] +==== API reference +For implementation details, including notable restrictions, check out the <> in the Search API docs. \ No newline at end of file diff --git a/docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc b/docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc new file mode 100644 index 0000000000000..add2d7455983e --- /dev/null +++ b/docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc @@ -0,0 +1,142 @@ +[[semantic-reranking]] +=== Semantic reranking + +preview::[] + +[TIP] +==== +This overview focuses more on the high-level concepts and use cases for semantic reranking. For full implementation details on how to set up and use semantic reranking in {es}, see the <> in the Search API docs. +==== + +Rerankers improve the relevance of results from earlier-stage retrieval mechanisms. +_Semantic_ rerankers use machine learning models to reorder search results based on their semantic similarity to a query. + +First-stage retrievers and rankers must be very fast and efficient because they process either the entire corpus, or all matching documents. +In a multi-stage pipeline, you can progressively use more computationally intensive ranking functions and techniques, as they will operate on smaller result sets at each step. +This helps avoid query latency degradation and keeps costs manageable. + +Semantic reranking requires relatively large and complex machine learning models and operates in real-time in response to queries. +This technique makes sense on a small _top-k_ result set, as one the of the final steps in a pipeline. +This is a powerful technique for improving search relevance that works equally well with keyword, semantic, or hybrid retrieval algorithms. + +The next sections provide more details on the benefits, use cases, and model types used for semantic reranking. +The final sections include a practical, high-level overview of how to implement <> and links to the full reference documentation. + +[discrete] +[[semantic-reranking-use-cases]] +==== Use cases + +Semantic reranking enables a variety of use cases: + +* *Lexical (BM25) retrieval results reranking* +** Out-of-the-box semantic search by adding a simple API call to any lexical/BM25 retrieval pipeline. +** Adds semantic search capabilities on top of existing indices without reindexing, perfect for quick improvements. +** Ideal for environments with complex existing indices. + +* *Semantic retrieval results reranking* +** Improves results from semantic retrievers using ELSER sparse vector embeddings or dense vector embeddings by using more powerful models. +** Adds a refinement layer on top of hybrid retrieval with <>. + +* *General applications* +** Supports automatic and transparent chunking, eliminating the need for pre-chunking at index time. +** Provides explicit control over document relevance in retrieval-augmented generation (RAG) uses cases or other scenarios involving language model (LLM) inputs. + +Now that we've outlined the value of semantic reranking, we'll explore the specific models that power this process and how they differ. + +[discrete] +[[semantic-reranking-models]] +==== Cross-encoder and bi-encoder models + +At a high level, two model types are used for semantic reranking: cross-encoders and bi-encoders. + +NOTE: In this version, {es} *only supports cross-encoders* for semantic reranking. + +* A *cross-encoder model* can be thought of as a more powerful, all-in-one solution, because it generates query-aware document representations. +It takes the query and document texts as a single, concatenated input. +* A *bi-encoder model* takes as input either document or query text. +Documents and query embeddings are computed separately, so they aren't aware of each other. +** To compute a ranking score, an external operation is required. This typically involves computing dot-product or cosine similarity between the query and document embeddings. + +In brief, cross-encoders provide high accuracy but are more resource-intensive. +Bi-encoders are faster and more cost-effective but less precise. + +In future versions, {es} will also support bi-encoders. +If you're interested in a more detailed analysis of the practical differences between cross-encoders and bi-encoders, untoggle the next section. + +.Comparisons between cross-encoder and bi-encoder +[%collapsible] +============== +The following is a non-exhaustive list of considerations when choosing between cross-encoders and bi-encoders for semantic reranking: + +* Because a cross-encoder model simultaneously processes both query and document texts, it can better infer their relevance, making it more effective as a reranker than a bi-encoder. +* Cross-encoder models are generally larger and more computationally intensive, resulting in higher latencies and increased computational costs. +* There are significantly fewer open-source cross-encoders, while bi-encoders offer a wide variety of sizes, languages, and other trade-offs. +* The effectiveness of cross-encoders can also improve the relevance of semantic retrievers. +For example, their ability to take word order into account can improve on dense or sparse embedding retrieval. +* When trained in tandem with specific retrievers (like lexical/BM25), cross-encoders can “correct” typical errors made by those retrievers. +* Cross-encoders output scores that are consistent across queries. +This enables you to maintain high relevance in result sets, by setting a minimum score threshold for all queries. +For example, this is important when using results in a RAG workflow or if you're otherwise feeding results to LLMs. +Note that similarity scores from bi-encoders/embedding similarities are _query-dependent_, meaning you cannot set universal cut-offs. +* Bi-encoders rerank using embeddings. You can improve your reranking latency by creating embeddings at ingest-time. These embeddings can be stored for reranking without being indexed for retrieval, reducing your memory footprint. +============== + +[discrete] +[[semantic-reranking-in-es]] +==== Semantic reranking in {es} + +In {es}, semantic rerankers are implemented using the {es} <> and a <>. + +To use semantic reranking in {es}, you need to: + +. *Choose a reranking model*. +Currently you can: + +** Integrate directly with the <> using the `rerank` task type +** Integrate directly with the <> using the `rerank` task type +** Upload a model to {es} from Hugging Face with {eland-docs}/machine-learning.html#ml-nlp-pytorch[Eland]. You'll need to use the `text_similarity` NLP task type when loading the model using Eland. Refer to {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-similarity[the Elastic NLP model reference] for a list of third party text similarity models supported by {es} for semantic reranking. +*** Then set up an <> with the `rerank` task type +. *Create a `rerank` task using the <>*. +The Inference API creates an inference endpoint and configures your chosen machine learning model to perform the reranking task. +. *Define a `text_similarity_reranker` retriever in your search request*. +The retriever syntax makes it simple to configure both the retrieval and reranking of search results in a single API call. + +.*Example search request* with semantic reranker +[%collapsible] +============== +The following example shows a search request that uses a semantic reranker to reorder the top-k documents based on their semantic similarity to the query. +[source,console] +---- +POST _search +{ + "retriever": { + "text_similarity_reranker": { + "retriever": { + "standard": { + "query": { + "match": { + "text": "How often does the moon hide the sun?" + } + } + } + }, + "field": "text", + "inference_id": "my-cohere-rerank-model", + "inference_text": "How often does the moon hide the sun?", + "rank_window_size": 100, + "min_score": 0.5 + } + } +} +---- +// TEST[skip:TBD] +============== + +[discrete] +[[semantic-reranking-learn-more]] +==== Learn more + +* Read the <> for syntax and implementation details +* Learn more about the <> abstraction +* Learn more about the Elastic <> +* Check out our https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/integrations/cohere/cohere-elasticsearch.ipynb[Python notebook] for using Cohere with {es} \ No newline at end of file diff --git a/docs/reference/search/search-your-data/search-template.asciidoc b/docs/reference/search/search-your-data/search-template.asciidoc index 7a7f09f4a37a7..489a03c0a6a2a 100644 --- a/docs/reference/search/search-your-data/search-template.asciidoc +++ b/docs/reference/search/search-your-data/search-template.asciidoc @@ -42,9 +42,6 @@ PUT _scripts/my-search-template }, "from": "{{from}}", "size": "{{size}}" - }, - "params": { - "query_string": "My query string" } } } diff --git a/docs/reference/search/search-your-data/search-with-synonyms.asciidoc b/docs/reference/search/search-your-data/search-with-synonyms.asciidoc index 596af695b7910..61d3a1d8f925b 100644 --- a/docs/reference/search/search-your-data/search-with-synonyms.asciidoc +++ b/docs/reference/search/search-your-data/search-with-synonyms.asciidoc @@ -82,6 +82,19 @@ If an index is created referencing a nonexistent synonyms set, the index will re The only way to recover from this scenario is to ensure the synonyms set exists then either delete and re-create the index, or close and re-open the index. ====== +[WARNING] +==== +Invalid synonym rules can cause errors when applying analyzer changes. +For reloadable analyzers, this prevents reloading and applying changes. +You must correct errors in the synonym rules and reload the analyzer. + +An index with invalid synonym rules cannot be reopened, making it inoperable when: + +* A node containing the index starts +* The index is opened from a closed state +* A node restart occurs (which reopens the node assigned shards) +==== + {es} uses synonyms as part of the <>. You can use two types of <> to include synonyms: diff --git a/docs/reference/search/search-your-data/search-your-data.asciidoc b/docs/reference/search/search-your-data/search-your-data.asciidoc index e1c1618410f2f..a885df2f2179e 100644 --- a/docs/reference/search/search-your-data/search-your-data.asciidoc +++ b/docs/reference/search/search-your-data/search-your-data.asciidoc @@ -45,7 +45,7 @@ results directly in the Kibana Search UI. include::search-api.asciidoc[] include::knn-search.asciidoc[] include::semantic-search.asciidoc[] -include::retrievers-overview.asciidoc[] +include::retrievers-reranking/index.asciidoc[] include::learning-to-rank.asciidoc[] include::search-across-clusters.asciidoc[] include::search-with-synonyms.asciidoc[] diff --git a/docs/reference/search/search-your-data/semantic-search-inference.asciidoc b/docs/reference/search/search-your-data/semantic-search-inference.asciidoc index 6ecfea0a02dbc..f74bc65e31bf0 100644 --- a/docs/reference/search/search-your-data/semantic-search-inference.asciidoc +++ b/docs/reference/search/search-your-data/semantic-search-inference.asciidoc @@ -11,12 +11,12 @@ IMPORTANT: For the easiest way to perform semantic search in the {stack}, refer The following examples use Cohere's `embed-english-v3.0` model, the `all-mpnet-base-v2` model from HuggingFace, and OpenAI's `text-embedding-ada-002` second generation embedding model. You can use any Cohere and OpenAI models, they are all supported by the {infer} API. -For a list of supported models available on HuggingFace, refer to -<>. +For a list of recommended models available on HuggingFace, refer to <>. Azure based examples use models available through https://ai.azure.com/explore/models?selectedTask=embeddings[Azure AI Studio] or https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models[Azure OpenAI]. Mistral examples use the `mistral-embed` model from https://docs.mistral.ai/getting-started/models/[the Mistral API]. +Amazon Bedrock examples use the `amazon.titan-embed-text-v1` model from https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html[the Amazon Bedrock base models]. Click the name of the service you want to use on any of the widgets below to review the corresponding instructions. @@ -39,8 +39,7 @@ include::{es-ref-dir}/tab-widgets/inference-api/infer-api-task-widget.asciidoc[] ==== Create the index mapping The mapping of the destination index - the index that contains the embeddings that the model will create based on your input text - must be created. -The destination index must have a field with the <> -field type to index the output of the used model. +The destination index must have a field with the <> field type for most models and the <> field type for the sparse vector models like in the case of the `elser` service to index the output of the used model. include::{es-ref-dir}/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc[] @@ -48,8 +47,7 @@ include::{es-ref-dir}/tab-widgets/inference-api/infer-api-mapping-widget.asciido [[infer-service-inference-ingest-pipeline]] ==== Create an ingest pipeline with an inference processor -Create an <> with an -<> and use the model you created above to infer against the data that is being ingested in the pipeline. +Create an <> with an <> and use the model you created above to infer against the data that is being ingested in the pipeline. include::{es-ref-dir}/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc[] @@ -102,8 +100,8 @@ POST _tasks//_cancel ==== Semantic search After the data set has been enriched with the embeddings, you can query the data using {ref}/knn-search.html#knn-semantic-search[semantic search]. -Pass a -`query_vector_builder` to the k-nearest neighbor (kNN) vector search API, and provide the query text and the model you have used to create the embeddings. +In case of dense vector models, pass a `query_vector_builder` to the k-nearest neighbor (kNN) vector search API, and provide the query text and the model you have used to create the embeddings. +In case of a sparse vector model like ELSER, use a `sparse_vector` query, and provide the query text with the model you have used to create the embeddings. NOTE: If you cancelled the reindexing process, you run the query only a part of the data which affects the quality of your results. diff --git a/docs/reference/search/search-your-data/semantic-search-semantic-text.asciidoc b/docs/reference/search/search-your-data/semantic-search-semantic-text.asciidoc index c2dabedb0336c..2b8b6c9c25afe 100644 --- a/docs/reference/search/search-your-data/semantic-search-semantic-text.asciidoc +++ b/docs/reference/search/search-your-data/semantic-search-semantic-text.asciidoc @@ -24,7 +24,6 @@ This tutorial uses the <> for demonstra To use the `semantic_text` field type, you must have an {infer} endpoint deployed in your cluster using the <>. - [discrete] [[semantic-text-infer-endpoint]] ==== Create the {infer} endpoint @@ -48,6 +47,13 @@ be used and ELSER creates sparse vectors. The `inference_id` is `my-elser-endpoint`. <2> The `elser` service is used in this example. +[NOTE] +==== +You might see a 502 bad gateway error in the response when using the {kib} Console. +This error usually just reflects a timeout, while the model downloads in the background. +You can check the download progress in the {ml-app} UI. +If using the Python client, you can set the `timeout` parameter to a higher value. +==== [discrete] [[semantic-text-index-mapping]] diff --git a/docs/reference/search/search.asciidoc b/docs/reference/search/search.asciidoc index 15985088a6ff7..501d645665a02 100644 --- a/docs/reference/search/search.asciidoc +++ b/docs/reference/search/search.asciidoc @@ -141,7 +141,7 @@ When unspecified, the pre-filter phase is executed if any of these conditions is - The primary sort of the query targets an indexed field. [[search-preference]] -tag::search-preference[] +// tag::search-preference[] `preference`:: (Optional, string) Nodes and shards used for the search. By default, {es} selects from eligible @@ -178,7 +178,7 @@ Any string that does not start with `_`. If the cluster state and selected shards do not change, searches using the same `` value are routed to the same shards in the same order. ==== -end::search-preference[] +// end::search-preference[] [[search-api-query-params-q]] diff --git a/docs/reference/security/authorization/managing-roles.asciidoc b/docs/reference/security/authorization/managing-roles.asciidoc index 253aa33822234..535d70cbc5e9c 100644 --- a/docs/reference/security/authorization/managing-roles.asciidoc +++ b/docs/reference/security/authorization/managing-roles.asciidoc @@ -13,7 +13,9 @@ A role is defined by the following JSON structure: "indices": [ ... ], <4> "applications": [ ... ], <5> "remote_indices": [ ... ], <6> - "remote_cluster": [ ... ] <7> + "remote_cluster": [ ... ], <7> + "metadata": { ... }, <8> + "description": "..." <9> } ----- // NOTCONSOLE @@ -40,6 +42,16 @@ A role is defined by the following JSON structure: <>. This field is optional (missing `remote_cluster` privileges effectively means no additional cluster permissions for any API key based remote clusters). +<8> Metadata field associated with the role, such as `metadata.app_tag`. + Metadata is internally indexed as a <> field type. + This means that all sub-fields act like `keyword` fields when querying and sorting. + Metadata values can be simple values, but also lists and maps. + This field is optional. +<9> A string value with the description text of the role. + The maximum length of it is `1000` chars. + The field is internally indexed as a <> field type + (with default values for all parameters). + This field is optional. [[valid-role-name]] NOTE: Role names must be at least 1 and no more than 507 characters. They can diff --git a/docs/reference/security/authorization/privileges.asciidoc b/docs/reference/security/authorization/privileges.asciidoc index cc44c97a08129..145bd8ebc06bb 100644 --- a/docs/reference/security/authorization/privileges.asciidoc +++ b/docs/reference/security/authorization/privileges.asciidoc @@ -2,7 +2,7 @@ === Security privileges :frontmatter-description: A list of privileges that can be assigned to user roles. :frontmatter-tags-products: [elasticsearch] -:frontmatter-tags-content-type: [reference] +:frontmatter-tags-content-type: [reference] :frontmatter-tags-user-goals: [secure] This section lists the privileges that you can assign to a role. @@ -198,6 +198,10 @@ All {slm} ({slm-init}) actions, including creating and updating policies and starting and stopping {slm-init}. + This privilege is not available in {serverless-full}. ++ +deprecated:[8.15] Also grants the permission to start and stop {Ilm}, using +the {ref}/ilm-start.html[ILM start] and {ref}/ilm-stop.html[ILM stop] APIs. +In a future major release, this privilege will not grant any {Ilm} permissions. `manage_token`:: All security-related operations on tokens that are generated by the {es} Token @@ -278,13 +282,17 @@ status of {Ilm} This privilege is not available in {serverless-full}. `read_pipeline`:: -Read-only access to ingest pipline (get, simulate). +Read-only access to ingest pipeline (get, simulate). `read_slm`:: All read-only {slm-init} actions, such as getting policies and checking the {slm-init} status. + This privilege is not available in {serverless-full}. ++ +deprecated:[8.15] Also grants the permission to get the {Ilm} status, using +the {ref}/ilm-get-status.html[ILM get status API]. In a future major release, +this privilege will not grant any {Ilm} permissions. `read_security`:: All read-only security-related operations, such as getting users, user profiles, diff --git a/docs/reference/setup/logging-config.asciidoc b/docs/reference/setup/logging-config.asciidoc index 7b36b6382c9bf..e382bbdacb464 100644 --- a/docs/reference/setup/logging-config.asciidoc +++ b/docs/reference/setup/logging-config.asciidoc @@ -140,19 +140,41 @@ documentation]. [[configuring-logging-levels]] === Configuring logging levels -Each Java package in the {es-repo}[{es} source code] has a related logger. For -example, the `org.elasticsearch.discovery` package has -`logger.org.elasticsearch.discovery` for logs related to the -<> process. - -To get more or less verbose logs, use the <> to change the related logger's log level. Each logger -accepts Log4j 2's built-in log levels, from least to most verbose: `OFF`, -`FATAL`, `ERROR`, `WARN`, `INFO`, `DEBUG`, and `TRACE`. The default log level is -`INFO`. Messages logged at higher verbosity levels (`DEBUG` and `TRACE`) are -only intended for expert use. To prevent leaking sensitive information in logs, -{es} will reject setting certain loggers to higher verbosity levels unless -<> is enabled. +Log4J 2 log messages include a _level_ field, which is one of the following (in +order of increasing verbosity): + +* `FATAL` +* `ERROR` +* `WARN` +* `INFO` +* `DEBUG` +* `TRACE` + +By default {es} includes all messages at levels `INFO`, `WARN`, `ERROR` and +`FATAL` in its logs, but filters out messages at levels `DEBUG` and `TRACE`. +This is the recommended configuration. Do not filter out messages at `INFO` or +higher log levels or else you may not be able to understand your cluster's +behaviour or troubleshoot common problems. Do not enable logging at levels +`DEBUG` or `TRACE` unless you are following instructions elsewhere in this +manual which call for more detailed logging, or you are an expert user who will +be reading the {es} source code to determine the meaning of the logs. + +Messages are logged by a hierarchy of loggers which matches the hierarchy of +Java packages and classes in the {es-repo}[{es} source code]. Every logger has +a corresponding <> which can be used +to control the verbosity of its logs. The setting's name is the fully-qualified +name of the package or class, prefixed with `logger.`. + +You may set each logger's verbosity to the name of a log level, for instance +`DEBUG`, which means that messages from this logger at levels up to the +specified one will be included in the logs. You may also use the value `OFF` to +suppress all messages from the logger. + +For example, the `org.elasticsearch.discovery` package contains functionality +related to the <> process, and you can +control the verbosity of its logs with the `logger.org.elasticsearch.discovery` +setting. To enable `DEBUG` logging for this package, use the +<> as follows: [source,console] ---- @@ -164,8 +186,8 @@ PUT /_cluster/settings } ---- -To reset a logger's verbosity to its default level, set the logger setting to -`null`: +To reset this package's log verbosity to its default level, set the logger +setting to `null`: [source,console] ---- @@ -211,6 +233,14 @@ formatting the same information in different ways, renaming the logger or adjusting the log level for specific messages. Do not rely on the contents of the application logs remaining precisely the same between versions. +NOTE: To prevent leaking sensitive information in logs, {es} suppresses certain +log messages by default even at the highest verbosity levels. To disable this +protection on a node, set the Java system property +`es.insecure_network_trace_enabled` to `true`. This feature is primarily +intended for test systems which do not contain any sensitive information. If you +set this property on a system which contains sensitive information, you must +protect your logs from unauthorized access. + [discrete] [[deprecation-logging]] === Deprecation logging diff --git a/docs/reference/setup/restart-cluster.asciidoc b/docs/reference/setup/restart-cluster.asciidoc index 9488c6632836b..a3bf7723cb5a9 100644 --- a/docs/reference/setup/restart-cluster.asciidoc +++ b/docs/reference/setup/restart-cluster.asciidoc @@ -11,7 +11,7 @@ time, so the service remains uninterrupted. [WARNING] ==== Nodes exceeding the low watermark threshold will be slow to restart. Reduce the disk -usage below the <> before to restarting nodes. +usage below the <> before restarting nodes. ==== [discrete] diff --git a/docs/reference/snapshot-restore/apis/get-snapshot-status-api.asciidoc b/docs/reference/snapshot-restore/apis/get-snapshot-status-api.asciidoc index d8b03cbc0e880..e677408da3f25 100644 --- a/docs/reference/snapshot-restore/apis/get-snapshot-status-api.asciidoc +++ b/docs/reference/snapshot-restore/apis/get-snapshot-status-api.asciidoc @@ -4,7 +4,7 @@ Get snapshot status ++++ -Retrieves a detailed description of the current state for each shard participating in the snapshot. +Retrieves a detailed description of the current state for each shard participating in the snapshot. Note that this API should only be used to obtain detailed shard-level information for ongoing snapshots. If this detail is not needed, or you want to obtain information about one or more existing snapshots, use the <>. //// [source,console] @@ -172,13 +172,8 @@ Indicates the current snapshot state. `STARTED`:: The snapshot is currently running. -`PARTIAL`:: - The global cluster state was stored, but data of at least one shard was not stored successfully. - The <> section of the response contains more detailed information about shards - that were not processed correctly. - `SUCCESS`:: - The snapshot finished and all shards were stored successfully. + The snapshot completed. ==== -- diff --git a/docs/reference/synonyms/apis/synonyms-apis.asciidoc b/docs/reference/synonyms/apis/synonyms-apis.asciidoc index c9de52939b2fe..dbbc26c36d3df 100644 --- a/docs/reference/synonyms/apis/synonyms-apis.asciidoc +++ b/docs/reference/synonyms/apis/synonyms-apis.asciidoc @@ -21,6 +21,23 @@ These filters are applied as part of the <> process by the << NOTE: Synonyms sets are limited to a maximum of 10,000 synonym rules per set. If you need to manage more synonym rules, you can create multiple synonyms sets. +WARNING: Synonyms sets must exist before they can be added to indices. +If an index is created referencing a nonexistent synonyms set, the index will remain in a partially created and inoperable state. +The only way to recover from this scenario is to ensure the synonyms set exists then either delete and re-create the index, or close and re-open the index. + +[WARNING] +==== +Invalid synonym rules can cause errors when applying analyzer changes. +For reloadable analyzers, this prevents reloading and applying changes. +You must correct errors in the synonym rules and reload the analyzer. + +An index with invalid synonym rules cannot be reopened, making it inoperable when: + +* A node containing the index starts +* The index is opened from a closed state +* A node restart occurs (which reopens the node assigned shards) +==== + [discrete] [[synonyms-sets-apis]] === Synonyms sets APIs diff --git a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc index c8a42c4d0585a..997dbbe8a20e6 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc @@ -7,6 +7,12 @@ id="infer-api-ingest-cohere"> Cohere + +
+
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc index a239c79e5a6d1..6adf3d2ebbf46 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc @@ -35,6 +35,32 @@ and the `output_field` that will contain the {infer} results. // end::cohere[] +// tag::elser[] + +[source,console] +-------------------------------------------------- +PUT _ingest/pipeline/elser_embeddings +{ + "processors": [ + { + "inference": { + "model_id": "elser_embeddings", <1> + "input_output": { <2> + "input_field": "content", + "output_field": "content_embedding" + } + } + } + ] +} +-------------------------------------------------- +<1> The name of the inference endpoint you created by using the +<>, it's referred to as `inference_id` in that step. +<2> Configuration object that defines the `input_field` for the {infer} process +and the `output_field` that will contain the {infer} results. + +// end::elser[] + // tag::hugging-face[] [source,console] @@ -164,3 +190,29 @@ PUT _ingest/pipeline/mistral_embeddings and the `output_field` that will contain the {infer} results. // end::mistral[] + +// tag::amazon-bedrock[] + +[source,console] +-------------------------------------------------- +PUT _ingest/pipeline/amazon_bedrock_embeddings +{ + "processors": [ + { + "inference": { + "model_id": "amazon_bedrock_embeddings", <1> + "input_output": { <2> + "input_field": "content", + "output_field": "content_embedding" + } + } + } + ] +} +-------------------------------------------------- +<1> The name of the inference endpoint you created by using the +<>, it's referred to as `inference_id` in that step. +<2> Configuration object that defines the `input_field` for the {infer} process +and the `output_field` that will contain the {infer} results. + +// end::amazon-bedrock[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc index 80c7c7ef23ee3..4e3a453a7bbea 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc @@ -7,6 +7,12 @@ id="infer-api-mapping-cohere"> Cohere + +
+
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc index a1bce38a02ad2..abeeb87f03e75 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc @@ -31,6 +31,34 @@ the {infer} pipeline configuration in the next step. // end::cohere[] +// tag::elser[] + +[source,console] +-------------------------------------------------- +PUT elser-embeddings +{ + "mappings": { + "properties": { + "content_embedding": { <1> + "type": "sparse_vector" <2> + }, + "content": { <3> + "type": "text" <4> + } + } + } +} +-------------------------------------------------- +<1> The name of the field to contain the generated tokens. It must be refrenced +in the {infer} pipeline configuration in the next step. +<2> The field to contain the tokens is a `sparse_vector` field for ELSER. +<3> The name of the field from which to create the dense vector representation. +In this example, the name of the field is `content`. It must be referenced in +the {infer} pipeline configuration in the next step. +<4> The field type which is text in this example. + +// end::elser[] + // tag::hugging-face[] [source,console] @@ -207,3 +235,38 @@ the {infer} pipeline configuration in the next step. <6> The field type which is text in this example. // end::mistral[] + +// tag::amazon-bedrock[] + +[source,console] +-------------------------------------------------- +PUT amazon-bedrock-embeddings +{ + "mappings": { + "properties": { + "content_embedding": { <1> + "type": "dense_vector", <2> + "dims": 1024, <3> + "element_type": "float", + "similarity": "dot_product" <4> + }, + "content": { <5> + "type": "text" <6> + } + } + } +} +-------------------------------------------------- +<1> The name of the field to contain the generated tokens. It must be referenced +in the {infer} pipeline configuration in the next step. +<2> The field to contain the tokens is a `dense_vector` field. +<3> The output dimensions of the model. This value may be different depending on the underlying model used. +See the https://docs.aws.amazon.com/bedrock/latest/userguide/titan-multiemb-models.html[Amazon Titan model] or the https://docs.cohere.com/reference/embed[Cohere Embeddings model] documentation. +<4> For Amazon Bedrock embeddings, the `dot_product` function should be used to +calculate similarity for Amazon titan models, or `cosine` for Cohere models. +<5> The name of the field from which to create the dense vector representation. +In this example, the name of the field is `content`. It must be referenced in +the {infer} pipeline configuration in the next step. +<6> The field type which is text in this example. + +// end::amazon-bedrock[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc index 4face6a105819..45cb9fc51b9f1 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc @@ -7,6 +7,12 @@ id="infer-api-reindex-cohere"> Cohere + +
+
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc index 927e47ea4d67c..d961ec8bd39bd 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc @@ -25,6 +25,29 @@ may affect the throughput of the reindexing process. // end::cohere[] +// tag::elser[] + +[source,console] +---- +POST _reindex?wait_for_completion=false +{ + "source": { + "index": "test-data", + "size": 50 <1> + }, + "dest": { + "index": "elser-embeddings", + "pipeline": "elser_embeddings" + } +} +---- +// TEST[skip:TBD] +<1> The default batch size for reindexing is 1000. Reducing `size` to a smaller +number makes the update of the reindexing process quicker which enables you to +follow the progress closely and detect errors early. + +// end::elser[] + // tag::hugging-face[] [source,console] @@ -154,3 +177,26 @@ number makes the update of the reindexing process quicker which enables you to follow the progress closely and detect errors early. // end::mistral[] + +// tag::amazon-bedrock[] + +[source,console] +---- +POST _reindex?wait_for_completion=false +{ + "source": { + "index": "test-data", + "size": 50 <1> + }, + "dest": { + "index": "amazon-bedrock-embeddings", + "pipeline": "amazon_bedrock_embeddings" + } +} +---- +// TEST[skip:TBD] +<1> The default batch size for reindexing is 1000. Reducing `size` to a smaller +number makes the update of the reindexing process quicker which enables you to +follow the progress closely and detect errors early. + +// end::amazon-bedrock[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc index 9981eb90d4929..c867b39b88e3b 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc @@ -7,6 +7,12 @@ id="infer-api-requirements-cohere"> Cohere + +
+
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc index 435e53bbc0bc0..603cd85a8f93d 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc @@ -5,6 +5,13 @@ the Cohere service. // end::cohere[] +// tag::elser[] + +ELSER is a model trained by Elastic. If you have an {es} deployment, there is no +further requirement for using the {infer} API with the `elser` service. + +// end::elser[] + // tag::hugging-face[] A https://huggingface.co/[HuggingFace account] is required to use the {infer} @@ -39,3 +46,9 @@ You can apply for access to Azure OpenAI by completing the form at https://aka.m * An API key generated for your account // end::mistral[] + +// tag::amazon-bedrock[] +* An AWS Account with https://aws.amazon.com/bedrock/[Amazon Bedrock] access +* A pair of access and secret keys used to access Amazon Bedrock + +// end::amazon-bedrock[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc index 6a67b28f91601..fa4a11c59a158 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc @@ -7,6 +7,12 @@ id="infer-api-search-cohere"> Cohere + +
+
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc index 523c2301e75ff..f23ed1dfef05d 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc @@ -72,6 +72,67 @@ query from the `cohere-embeddings` index sorted by their proximity to the query: // end::cohere[] +// tag::elser[] + +[source,console] +-------------------------------------------------- +GET elser-embeddings/_search +{ + "query":{ + "sparse_vector":{ + "field": "content_embedding", + "inference_id": "elser_embeddings", + "query": "How to avoid muscle soreness after running?" + } + }, + "_source": [ + "id", + "content" + ] +} +-------------------------------------------------- +// TEST[skip:TBD] + +As a result, you receive the top 10 documents that are closest in meaning to the +query from the `cohere-embeddings` index sorted by their proximity to the query: + +[source,consol-result] +-------------------------------------------------- +"hits": [ + { + "_index": "elser-embeddings", + "_id": "ZLGc_pABZbBmsu5_eCoH", + "_score": 21.472063, + "_source": { + "id": 2258240, + "content": "You may notice some muscle aches while you are exercising. This is called acute soreness. More often, you may begin to feel sore about 12 hours after exercising, and the discomfort usually peaks at 48 to 72 hours after exercise. This is called delayed-onset muscle soreness.It is thought that, during this time, your body is repairing the muscle, making it stronger and bigger.You may also notice the muscles feel better if you exercise lightly. This is normal.his is called delayed-onset muscle soreness. It is thought that, during this time, your body is repairing the muscle, making it stronger and bigger. You may also notice the muscles feel better if you exercise lightly. This is normal." + } + }, + { + "_index": "elser-embeddings", + "_id": "ZbGc_pABZbBmsu5_eCoH", + "_score": 21.421381, + "_source": { + "id": 2258242, + "content": "Photo Credit Jupiterimages/Stockbyte/Getty Images. That stiff, achy feeling you get in the days after exercise is a normal physiological response known as delayed onset muscle soreness. You can take it as a positive sign that your muscles have felt the workout, but the pain may also turn you off to further exercise.ou are more likely to develop delayed onset muscle soreness if you are new to working out, if you’ve gone a long time without exercising and start up again, if you have picked up a new type of physical activity or if you have recently boosted the intensity, length or frequency of your exercise sessions." + } + }, + { + "_index": "elser-embeddings", + "_id": "ZrGc_pABZbBmsu5_eCoH", + "_score": 20.542095, + "_source": { + "id": 2258248, + "content": "They found that stretching before and after exercise has no effect on muscle soreness. Exercise might cause inflammation, which leads to an increase in the production of immune cells (comprised mostly of macrophages and neutrophils). Levels of these immune cells reach a peak 24-48 hours after exercise.These cells, in turn, produce bradykinins and prostaglandins, which make the pain receptors in your body more sensitive. Whenever you move, these pain receptors are stimulated.hey found that stretching before and after exercise has no effect on muscle soreness. Exercise might cause inflammation, which leads to an increase in the production of immune cells (comprised mostly of macrophages and neutrophils). Levels of these immune cells reach a peak 24-48 hours after exercise." + } + }, + (...) + ] +-------------------------------------------------- +// NOTCONSOLE + +// end::elser[] + // tag::hugging-face[] [source,console] @@ -405,3 +466,68 @@ query from the `mistral-embeddings` index sorted by their proximity to the query // NOTCONSOLE // end::mistral[] + +// tag::amazon-bedrock[] + +[source,console] +-------------------------------------------------- +GET amazon-bedrock-embeddings/_search +{ + "knn": { + "field": "content_embedding", + "query_vector_builder": { + "text_embedding": { + "model_id": "amazon_bedrock_embeddings", + "model_text": "Calculate fuel cost" + } + }, + "k": 10, + "num_candidates": 100 + }, + "_source": [ + "id", + "content" + ] +} +-------------------------------------------------- +// TEST[skip:TBD] + +As a result, you receive the top 10 documents that are closest in meaning to the +query from the `amazon-bedrock-embeddings` index sorted by their proximity to the query: + +[source,consol-result] +-------------------------------------------------- +"hits": [ + { + "_index": "amazon-bedrock-embeddings", + "_id": "DDd5OowBHxQKHyc3TDSC", + "_score": 0.83704096, + "_source": { + "id": 862114, + "body": "How to calculate fuel cost for a road trip. By Tara Baukus Mello • Bankrate.com. Dear Driving for Dollars, My family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost.It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes.y family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost. It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes." + } + }, + { + "_index": "amazon-bedrock-embeddings", + "_id": "ajd5OowBHxQKHyc3TDSC", + "_score": 0.8345704, + "_source": { + "id": 820622, + "body": "Home Heating Calculator. Typically, approximately 50% of the energy consumed in a home annually is for space heating. When deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important.This calculator can help you estimate the cost of fuel for different heating appliances.hen deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important. This calculator can help you estimate the cost of fuel for different heating appliances." + } + }, + { + "_index": "amazon-bedrock-embeddings", + "_id": "Djd5OowBHxQKHyc3TDSC", + "_score": 0.8327426, + "_source": { + "id": 8202683, + "body": "Fuel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel.If you are paying $4 per gallon, the trip would cost you $200.Most boats have much larger gas tanks than cars.uel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel." + } + }, + (...) + ] +-------------------------------------------------- +// NOTCONSOLE + +// end::amazon-bedrock[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc index 1f3ad645d7c29..f12be341d866d 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc @@ -7,7 +7,13 @@ id="infer-api-task-cohere"> Cohere - + +
+
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc index 18fa3ba541bff..b186b2c58ccc5 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc @@ -28,9 +28,29 @@ NOTE: When using this model the recommended similarity measure to use in the embeddings are normalized to unit length in which case the `dot_product` and the `cosine` measures are equivalent. +// end::cohere[] +// tag::elser[] -// end::cohere[] +[source,console] +------------------------------------------------------------ +PUT _inference/sparse_embedding/elser_embeddings <1> +{ + "service": "elser", + "service_settings": { + "num_allocations": 1, + "num_threads": 1 + } +} +------------------------------------------------------------ +// TEST[skip:TBD] +<1> The task type is `sparse_embedding` in the path and the `inference_id` which +is the unique identifier of the {infer} endpoint is `elser_embeddings`. + +You don't need to download and deploy the ELSER model upfront, the API request +above will download the model if it's not downloaded yet and then deploy it. + +// end::elser[] // tag::hugging-face[] @@ -177,3 +197,29 @@ PUT _inference/text_embedding/mistral_embeddings <1> <3> The Mistral embeddings model name, for example `mistral-embed`. // end::mistral[] + +// tag::amazon-bedrock[] + +[source,console] +------------------------------------------------------------ +PUT _inference/text_embedding/amazon_bedrock_embeddings <1> +{ + "service": "amazonbedrock", + "service_settings": { + "access_key": "", <2> + "secret_key": "", <3> + "region": "", <4> + "provider": "", <5> + "model": "" <6> + } +} +------------------------------------------------------------ +// TEST[skip:TBD] +<1> The task type is `text_embedding` in the path and the `inference_id` which is the unique identifier of the {infer} endpoint is `amazon_bedrock_embeddings`. +<2> The access key can be found on your AWS IAM management page for the user account to access Amazon Bedrock. +<3> The secret key should be the paired key for the specified access key. +<4> Specify the region that your model is hosted in. +<5> Specify the model provider. +<6> The model ID or ARN of the model to use. + +// end::amazon-bedrock[] diff --git a/docs/reference/upgrade/disable-shard-alloc.asciidoc b/docs/reference/upgrade/disable-shard-alloc.asciidoc index a93b6dfc6c60b..f69a673095257 100644 --- a/docs/reference/upgrade/disable-shard-alloc.asciidoc +++ b/docs/reference/upgrade/disable-shard-alloc.asciidoc @@ -17,3 +17,7 @@ PUT _cluster/settings } -------------------------------------------------- // TEST[skip:indexes don't assign] + +You can also consider <> when restarting +large clusters to reduce initial strain while nodes are processing +<>. \ No newline at end of file diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index cd408ba75aa10..02313c5ed82a2 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -84,6 +84,11 @@ + + + + + diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index e6441136f3d4b..2c3521197d7c4 100644 Binary files a/gradle/wrapper/gradle-wrapper.jar and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 515ab9d5f1822..efe2ff3449216 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,7 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionSha256Sum=f8b4f4772d302c8ff580bc40d0f56e715de69b163546944f787c87abf209c961 -distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-all.zip +distributionSha256Sum=258e722ec21e955201e31447b0aed14201765a3bfbae296a46cf60b70e66db70 +distributionUrl=https\://services.gradle.org/distributions/gradle-8.9-all.zip networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME diff --git a/gradlew b/gradlew index b740cf13397ab..f5feea6d6b116 100755 --- a/gradlew +++ b/gradlew @@ -15,6 +15,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # +# SPDX-License-Identifier: Apache-2.0 +# ############################################################################## # @@ -84,7 +86,8 @@ done # shellcheck disable=SC2034 APP_BASE_NAME=${0##*/} # Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) -APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit +APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s +' "$PWD" ) || exit # Use the maximum available, or set MAX_FD != -1 to use that value. MAX_FD=maximum diff --git a/gradlew.bat b/gradlew.bat index 7101f8e4676fc..9b42019c7915b 100644 --- a/gradlew.bat +++ b/gradlew.bat @@ -13,6 +13,8 @@ @rem See the License for the specific language governing permissions and @rem limitations under the License. @rem +@rem SPDX-License-Identifier: Apache-2.0 +@rem @if "%DEBUG%"=="" @echo off @rem ########################################################################## diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java index f3f53f1b3c5ea..3c01e490369de 100644 --- a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java @@ -203,7 +203,7 @@ public Map parse(String inputString) { DissectKey key = dissectPair.key(); byte[] delimiter = dissectPair.delimiter().getBytes(StandardCharsets.UTF_8); // start dissection after the first delimiter - int i = leadingDelimiter.length(); + int i = leadingDelimiter.getBytes(StandardCharsets.UTF_8).length; int valueStart = i; int lookAheadMatches; // start walking the input string byte by byte, look ahead for matches where needed diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java index 431b26fc1155d..2893e419a84a3 100644 --- a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java +++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java @@ -211,6 +211,18 @@ public void testMatchUnicode() { assertMatch("%{a->}࿏%{b}", "⟳༒࿏࿏࿏࿏࿏༒⟲", Arrays.asList("a", "b"), Arrays.asList("⟳༒", "༒⟲")); assertMatch("%{*a}࿏%{&a}", "⟳༒࿏༒⟲", Arrays.asList("⟳༒"), Arrays.asList("༒⟲")); assertMatch("%{}࿏%{a}", "⟳༒࿏༒⟲", Arrays.asList("a"), Arrays.asList("༒⟲")); + assertMatch( + "Zürich, the %{adjective} city in Switzerland", + "Zürich, the largest city in Switzerland", + Arrays.asList("adjective"), + Arrays.asList("largest") + ); + assertMatch( + "Zürich, the %{one} city in Switzerland; Zürich, the %{two} city in Switzerland", + "Zürich, the largest city in Switzerland; Zürich, the LARGEST city in Switzerland", + Arrays.asList("one", "two"), + Arrays.asList("largest", "LARGEST") + ); } public void testMatchRemainder() { diff --git a/libs/preallocate/build.gradle b/libs/preallocate/build.gradle index a490c7168516e..2bc802daee1d2 100644 --- a/libs/preallocate/build.gradle +++ b/libs/preallocate/build.gradle @@ -11,6 +11,11 @@ dependencies { implementation project(':libs:elasticsearch-core') implementation project(':libs:elasticsearch-logging') implementation "net.java.dev.jna:jna:${versions.jna}" + + testImplementation "junit:junit:${versions.junit}" + testImplementation(project(":test:framework")) { + exclude group: 'org.elasticsearch', module: 'elasticsearch-preallocate' + } } tasks.named('forbiddenApisMain').configure { diff --git a/libs/preallocate/src/main/java/module-info.java b/libs/preallocate/src/main/java/module-info.java index 89c85d95ab2f0..4e980b083701a 100644 --- a/libs/preallocate/src/main/java/module-info.java +++ b/libs/preallocate/src/main/java/module-info.java @@ -11,7 +11,7 @@ requires org.elasticsearch.logging; requires com.sun.jna; - exports org.elasticsearch.preallocate to org.elasticsearch.blobcache, com.sun.jna; + exports org.elasticsearch.preallocate to org.elasticsearch.blobcache, com.sun.jna, org.elasticsearch.server; provides org.elasticsearch.jdk.ModuleQualifiedExportsService with org.elasticsearch.preallocate.PreallocateModuleExportsService; } diff --git a/libs/preallocate/src/main/java/org/elasticsearch/preallocate/AbstractPosixPreallocator.java b/libs/preallocate/src/main/java/org/elasticsearch/preallocate/AbstractPosixPreallocator.java index e841b38c0059e..cfc5855c71f8d 100644 --- a/libs/preallocate/src/main/java/org/elasticsearch/preallocate/AbstractPosixPreallocator.java +++ b/libs/preallocate/src/main/java/org/elasticsearch/preallocate/AbstractPosixPreallocator.java @@ -8,21 +8,27 @@ package org.elasticsearch.preallocate; -import com.sun.jna.FunctionMapper; import com.sun.jna.Library; import com.sun.jna.Native; +import com.sun.jna.NativeLibrary; import com.sun.jna.NativeLong; import com.sun.jna.Platform; import com.sun.jna.Structure; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; + import java.io.IOException; import java.security.AccessController; import java.security.PrivilegedAction; +import java.util.Arrays; +import java.util.List; import java.util.Locale; -import java.util.Map; abstract class AbstractPosixPreallocator implements Preallocator { + static final Logger logger = LogManager.getLogger(AbstractPosixPreallocator.class); + /** * Constants relating to posix libc. * @@ -35,7 +41,7 @@ protected record PosixConstants(int SIZEOF_STAT, int STAT_ST_SIZE_OFFSET, int O_ private static final int O_WRONLY = 1; - static final class Stat64 extends Structure implements Structure.ByReference { + public static final class Stat64 extends Structure implements Structure.ByReference { public byte[] _ignore1; public NativeLong st_size = new NativeLong(0); public byte[] _ignore2; @@ -44,6 +50,11 @@ static final class Stat64 extends Structure implements Structure.ByReference { this._ignore1 = new byte[stSizeOffset]; this._ignore2 = new byte[sizeof - stSizeOffset - 8]; } + + @Override + protected List getFieldOrder() { + return Arrays.asList("_ignore1", "st_size", "_ignore2"); + } } private interface NativeFunctions extends Library { @@ -58,6 +69,10 @@ private interface FStat64Function extends Library { int fstat64(int fd, Stat64 stat); } + private interface FXStatFunction extends Library { + int __fxstat(int version, int fd, Stat64 stat); + } + public static final boolean NATIVES_AVAILABLE; private static final NativeFunctions functions; private static final FStat64Function fstat64; @@ -67,18 +82,29 @@ private interface FStat64Function extends Library { try { return Native.load(Platform.C_LIBRARY_NAME, NativeFunctions.class); } catch (final UnsatisfiedLinkError e) { + logger.warn("Failed to load posix functions for preallocate"); return null; } }); fstat64 = AccessController.doPrivileged((PrivilegedAction) () -> { try { + // JNA lazily finds symbols, so even though we try to bind two different functions below, if fstat64 + // isn't found, we won't know until runtime when calling the function. To force resolution of the + // symbol we get a function object directly from the native library. We don't use it, we just want to + // see if it will throw UnsatisfiedLinkError + NativeLibrary.getInstance(Platform.C_LIBRARY_NAME).getFunction("fstat64"); return Native.load(Platform.C_LIBRARY_NAME, FStat64Function.class); } catch (final UnsatisfiedLinkError e) { + // fstat has a long history in linux from the 32-bit architecture days. On some modern linux systems, + // fstat64 doesn't exist as a symbol in glibc. Instead, the compiler replaces fstat64 calls with + // the internal __fxstat method. Here we fall back to __fxstat, and statically bind the special + // "version" argument so that the call site looks the same as that of fstat64 try { - // on Linux fstat64 isn't available as a symbol, but instead uses a special __ name - var options = Map.of(Library.OPTION_FUNCTION_MAPPER, (FunctionMapper) (lib, method) -> "__fxstat64"); - return Native.load(Platform.C_LIBRARY_NAME, FStat64Function.class, options); + var fxstat = Native.load(Platform.C_LIBRARY_NAME, FXStatFunction.class); + int version = System.getProperty("os.arch").equals("aarch64") ? 0 : 1; + return (fd, stat) -> fxstat.__fxstat(version, fd, stat); } catch (UnsatisfiedLinkError e2) { + logger.warn("Failed to load __fxstat for preallocate"); return null; } } @@ -124,12 +150,20 @@ public void close() throws IOException { @Override public boolean useNative() { - return false; + return NATIVES_AVAILABLE; } @Override public NativeFileHandle open(String path) throws IOException { - int fd = functions.open(path, O_WRONLY, constants.O_CREAT); + // We pass down O_CREAT, so open will create the file if it does not exist. + // From the open man page (https://www.man7.org/linux/man-pages/man2/open.2.html): + // - The mode parameter is needed when specifying O_CREAT + // - The effective mode is modified by the process's umask: in the absence of a default ACL, the mode of the created file is + // (mode & ~umask). + // We choose to pass down 0666 (r/w permission for user/group/others) to mimic what the JDK does for its open operations; + // see for example the fileOpen implementation in libjava: + // https://github.com/openjdk/jdk/blob/98562166e4a4c8921709014423c6cbc993aa0d97/src/java.base/unix/native/libjava/io_util_md.c#L105 + int fd = functions.open(path, O_WRONLY | constants.O_CREAT, 0666); if (fd < 0) { throw newIOException(String.format(Locale.ROOT, "Could not open file [%s] for preallocation", path)); } diff --git a/libs/preallocate/src/main/java/org/elasticsearch/preallocate/MacOsPreallocator.java b/libs/preallocate/src/main/java/org/elasticsearch/preallocate/MacOsPreallocator.java index 149cf80527bd0..f80d6cbafd5cd 100644 --- a/libs/preallocate/src/main/java/org/elasticsearch/preallocate/MacOsPreallocator.java +++ b/libs/preallocate/src/main/java/org/elasticsearch/preallocate/MacOsPreallocator.java @@ -7,18 +7,27 @@ */ package org.elasticsearch.preallocate; +import com.sun.jna.Library; +import com.sun.jna.Memory; import com.sun.jna.Native; import com.sun.jna.NativeLong; import com.sun.jna.Platform; -import com.sun.jna.Structure; +import java.lang.invoke.MethodHandles; import java.security.AccessController; import java.security.PrivilegedAction; -import java.util.Arrays; -import java.util.List; final class MacOsPreallocator extends AbstractPosixPreallocator { + static { + try { + MethodHandles.lookup().ensureInitialized(Natives.class); + logger.info("Initialized macos natives: " + Natives.NATIVES_AVAILABLE); + } catch (IllegalAccessException unexpected) { + throw new AssertionError(unexpected); + } + } + MacOsPreallocator() { super(new PosixConstants(144, 96, 512)); } @@ -31,21 +40,25 @@ public boolean useNative() { @Override public int preallocate(final int fd, final long currentSize /* unused */ , final long fileSize) { // the Structure.ByReference constructor requires access to declared members - final Natives.Fcntl.FStore fst = AccessController.doPrivileged((PrivilegedAction) Natives.Fcntl.FStore::new); - fst.fst_flags = Natives.Fcntl.F_ALLOCATECONTIG; - fst.fst_posmode = Natives.Fcntl.F_PEOFPOSMODE; - fst.fst_offset = new NativeLong(0); - fst.fst_length = new NativeLong(fileSize); + final Natives.Fcntl.FStore fst = new Natives.Fcntl.FStore(); + fst.setFlags(Natives.Fcntl.F_ALLOCATECONTIG); + fst.setPosmode(Natives.Fcntl.F_PEOFPOSMODE); + fst.setOffset(0); + fst.setLength(fileSize); // first, try allocating contiguously - if (Natives.fcntl(fd, Natives.Fcntl.F_PREALLOCATE, fst) != 0) { + logger.info("Calling fcntl for preallocate"); + if (Natives.functions.fcntl(fd, Natives.Fcntl.F_PREALLOCATE, fst.memory) != 0) { + logger.warn("Failed to get contiguous preallocate, trying non-contiguous"); // that failed, so let us try allocating non-contiguously - fst.fst_flags = Natives.Fcntl.F_ALLOCATEALL; - if (Natives.fcntl(fd, Natives.Fcntl.F_PREALLOCATE, fst) != 0) { + fst.setFlags(Natives.Fcntl.F_ALLOCATEALL); + if (Natives.functions.fcntl(fd, Natives.Fcntl.F_PREALLOCATE, fst.memory) != 0) { + logger.warn("Failed to get non-continugous preallocate"); // i'm afraid captain dale had to bail return Native.getLastError(); } } - if (Natives.ftruncate(fd, new NativeLong(fileSize)) != 0) { + if (Natives.functions.ftruncate(fd, new NativeLong(fileSize)) != 0) { + logger.warn("Failed to ftruncate"); return Native.getLastError(); } return 0; @@ -53,17 +66,20 @@ public int preallocate(final int fd, final long currentSize /* unused */ , final private static class Natives { - static boolean NATIVES_AVAILABLE; + static final boolean NATIVES_AVAILABLE; + static final NativeFunctions functions; static { - NATIVES_AVAILABLE = AccessController.doPrivileged((PrivilegedAction) () -> { + NativeFunctions nativeFunctions = AccessController.doPrivileged((PrivilegedAction) () -> { try { - Native.register(Natives.class, Platform.C_LIBRARY_NAME); + return Native.load(Platform.C_LIBRARY_NAME, NativeFunctions.class); } catch (final UnsatisfiedLinkError e) { - return false; + logger.warn("Failed to load macos native preallocate functions"); + return null; } - return true; }); + functions = nativeFunctions; + NATIVES_AVAILABLE = nativeFunctions != null; } static class Fcntl { @@ -79,25 +95,37 @@ static class Fcntl { @SuppressWarnings("unused") private static final int F_VOLPOSMODE = 4; // allocate from the volume offset - public static final class FStore extends Structure implements Structure.ByReference { - public int fst_flags = 0; - public int fst_posmode = 0; - public NativeLong fst_offset = new NativeLong(0); - public NativeLong fst_length = new NativeLong(0); - @SuppressWarnings("unused") - public NativeLong fst_bytesalloc = new NativeLong(0); - - @Override - protected List getFieldOrder() { - return Arrays.asList("fst_flags", "fst_posmode", "fst_offset", "fst_length", "fst_bytesalloc"); + public static final class FStore { + final Memory memory = new Memory(32); + + public void setFlags(int flags) { + memory.setInt(0, flags); + } + + public void setPosmode(int posmode) { + memory.setInt(4, posmode); + } + + public void setOffset(long offset) { + memory.setLong(8, offset); + } + + public void setLength(long length) { + memory.setLong(16, length); + } + + public void getBytesalloc() { + memory.getLong(24); } } } - static native int fcntl(int fd, int cmd, Fcntl.FStore fst); + private interface NativeFunctions extends Library { + int fcntl(int fd, int cmd, Object... args); - static native int ftruncate(int fd, NativeLong length); + int ftruncate(int fd, NativeLong length); + } } } diff --git a/libs/preallocate/src/main/java/org/elasticsearch/preallocate/Preallocate.java b/libs/preallocate/src/main/java/org/elasticsearch/preallocate/Preallocate.java index 8f7214e0877ba..bd6dc50f48af3 100644 --- a/libs/preallocate/src/main/java/org/elasticsearch/preallocate/Preallocate.java +++ b/libs/preallocate/src/main/java/org/elasticsearch/preallocate/Preallocate.java @@ -15,6 +15,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.RandomAccessFile; +import java.lang.invoke.MethodHandles; import java.lang.reflect.Field; import java.nio.file.Files; import java.nio.file.Path; @@ -24,12 +25,27 @@ public class Preallocate { private static final Logger logger = LogManager.getLogger(Preallocate.class); - private static final boolean IS_LINUX; - private static final boolean IS_MACOS; + static final boolean IS_LINUX; + static final boolean IS_MACOS; static { String osName = System.getProperty("os.name"); IS_LINUX = osName.startsWith("Linux"); IS_MACOS = osName.startsWith("Mac OS X"); + + // make sure the allocator native methods are initialized + Class clazz = null; + if (IS_LINUX) { + clazz = LinuxPreallocator.class; + } else if (IS_MACOS) { + clazz = MacOsPreallocator.class; + } + if (clazz != null) { + try { + MethodHandles.lookup().ensureInitialized(clazz); + } catch (IllegalAccessException unexpected) { + throw new AssertionError(unexpected); + } + } } public static void preallocate(final Path cacheFile, final long fileSize) throws IOException { @@ -47,7 +63,9 @@ private static void preallocate(final Path cacheFile, final long fileSize, final boolean success = false; try { if (prealloactor.useNative()) { - try (NativeFileHandle openFile = prealloactor.open(cacheFile.toAbsolutePath().toString())) { + var absolutePath = cacheFile.toAbsolutePath(); + Files.createDirectories(absolutePath.getParent()); + try (NativeFileHandle openFile = prealloactor.open(absolutePath.toString())) { long currentSize = openFile.getSize(); if (currentSize < fileSize) { logger.info("pre-allocating cache file [{}] ({} bytes) using native methods", cacheFile, fileSize); diff --git a/libs/preallocate/src/test/java/org/elasticsearch/preallocate/PreallocateTests.java b/libs/preallocate/src/test/java/org/elasticsearch/preallocate/PreallocateTests.java new file mode 100644 index 0000000000000..04d7e6adee556 --- /dev/null +++ b/libs/preallocate/src/test/java/org/elasticsearch/preallocate/PreallocateTests.java @@ -0,0 +1,55 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.preallocate; + +import org.elasticsearch.common.filesystem.FileSystemNatives; +import org.elasticsearch.test.ESTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.OptionalLong; + +import static org.hamcrest.Matchers.equalTo; + +public class PreallocateTests extends ESTestCase { + + @Before + public void setup() { + assumeFalse("no preallocate on windows", System.getProperty("os.name").startsWith("Windows")); + assumeFalse("preallocate not supported on encrypted block devices", "encryption-at-rest".equals(System.getenv("BUILDKITE_LABEL"))); + } + + public void testPreallocate() throws Exception { + Path cacheFile = createTempFile(); + long size = 1024 * 1024; // 1 MB + Preallocate.preallocate(cacheFile, size); + OptionalLong foundSize = FileSystemNatives.allocatedSizeInBytes(cacheFile); + assertTrue(foundSize.isPresent()); + assertThat(foundSize.getAsLong(), equalTo(size)); + } + + public void testPreallocateNonExistingFile() throws IOException { + Path file = createTempDir().resolve("test-preallocate"); + long size = 1024 * 1024; // 1 MB + Preallocate.preallocate(file, size); + OptionalLong foundSize = FileSystemNatives.allocatedSizeInBytes(file); + assertTrue(foundSize.isPresent()); + assertThat(foundSize.getAsLong(), equalTo(size)); + } + + public void testPreallocateNonExistingDirectory() throws IOException { + Path file = createTempDir().resolve("intermediate-dir").resolve("test-preallocate"); + long size = 1024 * 1024; // 1 MB + Preallocate.preallocate(file, size); + OptionalLong foundSize = FileSystemNatives.allocatedSizeInBytes(file); + assertTrue(foundSize.isPresent()); + assertThat(foundSize.getAsLong(), equalTo(size)); + } +} diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/support/AbstractXContentParser.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/support/AbstractXContentParser.java index be100e1a6d120..9672c73ef56df 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/support/AbstractXContentParser.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/support/AbstractXContentParser.java @@ -151,11 +151,8 @@ public int intValue(boolean coerce) throws IOException { protected abstract int doIntValue() throws IOException; - private static BigInteger LONG_MAX_VALUE_AS_BIGINTEGER = BigInteger.valueOf(Long.MAX_VALUE); - private static BigInteger LONG_MIN_VALUE_AS_BIGINTEGER = BigInteger.valueOf(Long.MIN_VALUE); - // weak bounds on the BigDecimal representation to allow for coercion - private static BigDecimal BIGDECIMAL_GREATER_THAN_LONG_MAX_VALUE = BigDecimal.valueOf(Long.MAX_VALUE).add(BigDecimal.ONE); - private static BigDecimal BIGDECIMAL_LESS_THAN_LONG_MIN_VALUE = BigDecimal.valueOf(Long.MIN_VALUE).subtract(BigDecimal.ONE); + private static final BigInteger LONG_MAX_VALUE_AS_BIGINTEGER = BigInteger.valueOf(Long.MAX_VALUE); + private static final BigInteger LONG_MIN_VALUE_AS_BIGINTEGER = BigInteger.valueOf(Long.MIN_VALUE); /** Return the long that {@code stringValue} stores or throws an exception if the * stored value cannot be converted to a long that stores the exact same @@ -170,11 +167,21 @@ private static long toLong(String stringValue, boolean coerce) { final BigInteger bigIntegerValue; try { final BigDecimal bigDecimalValue = new BigDecimal(stringValue); - if (bigDecimalValue.compareTo(BIGDECIMAL_GREATER_THAN_LONG_MAX_VALUE) >= 0 - || bigDecimalValue.compareTo(BIGDECIMAL_LESS_THAN_LONG_MIN_VALUE) <= 0) { + // long can have a maximum of 19 digits - any more than that cannot be a long + // the scale is stored as the negation, so negative scale -> big number + if (bigDecimalValue.scale() < -19) { throw new IllegalArgumentException("Value [" + stringValue + "] is out of range for a long"); } - bigIntegerValue = coerce ? bigDecimalValue.toBigInteger() : bigDecimalValue.toBigIntegerExact(); + // large scale -> very small number + if (bigDecimalValue.scale() > 19) { + if (coerce) { + bigIntegerValue = BigInteger.ZERO; + } else { + throw new ArithmeticException("Number has a decimal part"); + } + } else { + bigIntegerValue = coerce ? bigDecimalValue.toBigInteger() : bigDecimalValue.toBigIntegerExact(); + } } catch (ArithmeticException e) { throw new IllegalArgumentException("Value [" + stringValue + "] has a decimal part"); } catch (NumberFormatException e) { diff --git a/libs/x-content/src/test/java/org/elasticsearch/xcontent/XContentParserTests.java b/libs/x-content/src/test/java/org/elasticsearch/xcontent/XContentParserTests.java index c8df9929d007b..b9cb7df84a8e4 100644 --- a/libs/x-content/src/test/java/org/elasticsearch/xcontent/XContentParserTests.java +++ b/libs/x-content/src/test/java/org/elasticsearch/xcontent/XContentParserTests.java @@ -31,6 +31,7 @@ import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.in; import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.nullValue; import static org.junit.internal.matchers.ThrowableMessageMatcher.hasMessage; @@ -74,6 +75,44 @@ public void testFloat() throws IOException { } } + public void testLongCoercion() throws IOException { + XContentType xContentType = randomFrom(XContentType.values()); + + try (XContentBuilder builder = XContentBuilder.builder(xContentType.xContent())) { + builder.startObject(); + builder.field("decimal", "5.5"); + builder.field("expInRange", "5e18"); + builder.field("expTooBig", "2e100"); + builder.field("expTooSmall", "2e-100"); + builder.endObject(); + + try (XContentParser parser = createParser(xContentType.xContent(), BytesReference.bytes(builder))) { + assertThat(parser.nextToken(), is(XContentParser.Token.START_OBJECT)); + + assertThat(parser.nextToken(), is(XContentParser.Token.FIELD_NAME)); + assertThat(parser.currentName(), is("decimal")); + assertThat(parser.nextToken(), is(XContentParser.Token.VALUE_STRING)); + assertThat(parser.longValue(), equalTo(5L)); + + assertThat(parser.nextToken(), is(XContentParser.Token.FIELD_NAME)); + assertThat(parser.currentName(), is("expInRange")); + assertThat(parser.nextToken(), is(XContentParser.Token.VALUE_STRING)); + assertThat(parser.longValue(), equalTo((long) 5e18)); + + assertThat(parser.nextToken(), is(XContentParser.Token.FIELD_NAME)); + assertThat(parser.currentName(), is("expTooBig")); + assertThat(parser.nextToken(), is(XContentParser.Token.VALUE_STRING)); + expectThrows(IllegalArgumentException.class, parser::longValue); + + // too small goes to zero + assertThat(parser.nextToken(), is(XContentParser.Token.FIELD_NAME)); + assertThat(parser.currentName(), is("expTooSmall")); + assertThat(parser.nextToken(), is(XContentParser.Token.VALUE_STRING)); + assertThat(parser.longValue(), equalTo(0L)); + } + } + } + public void testReadList() throws IOException { assertThat(readList("{\"foo\": [\"bar\"]}"), contains("bar")); assertThat(readList("{\"foo\": [\"bar\",\"baz\"]}"), contains("bar", "baz")); diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/LogsDataStreamIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/LogsDataStreamIT.java index f95d9a0b0431f..52ce2a7a33ea6 100644 --- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/LogsDataStreamIT.java +++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/LogsDataStreamIT.java @@ -165,7 +165,7 @@ public void testLogsIndexModeDataStreamIndexing() throws IOException, ExecutionE client(), "logs-composable-template", LOGS_OR_STANDARD_MAPPING, - Map.of("index.mode", "logs"), + Map.of("index.mode", "logsdb"), List.of("logs-*-*") ); final String dataStreamName = generateDataStreamName("logs"); @@ -188,7 +188,7 @@ public void testIndexModeLogsAndStandardSwitching() throws IOException, Executio ); createDataStream(client(), dataStreamName); for (int i = 0; i < randomIntBetween(5, 10); i++) { - final IndexMode indexMode = i % 2 == 0 ? IndexMode.LOGS : IndexMode.STANDARD; + final IndexMode indexMode = i % 2 == 0 ? IndexMode.LOGSDB : IndexMode.STANDARD; indexModes.add(indexMode); updateComposableIndexTemplate( client(), @@ -206,7 +206,7 @@ public void testIndexModeLogsAndStandardSwitching() throws IOException, Executio public void testIndexModeLogsAndTimeSeriesSwitching() throws IOException, ExecutionException, InterruptedException { final String dataStreamName = generateDataStreamName("custom"); final List indexPatterns = List.of("custom-*-*"); - final Map logsSettings = Map.of("index.mode", "logs"); + final Map logsSettings = Map.of("index.mode", "logsdb"); final Map timeSeriesSettings = Map.of("index.mode", "time_series", "index.routing_path", "host.name"); putComposableIndexTemplate(client(), "custom-composable-template", LOGS_OR_STANDARD_MAPPING, logsSettings, indexPatterns); @@ -221,13 +221,13 @@ public void testIndexModeLogsAndTimeSeriesSwitching() throws IOException, Execut rolloverDataStream(dataStreamName); indexLogOrStandardDocuments(client(), randomIntBetween(10, 20), randomIntBetween(32, 64), dataStreamName); - assertDataStreamBackingIndicesModes(dataStreamName, List.of(IndexMode.LOGS, IndexMode.TIME_SERIES, IndexMode.LOGS)); + assertDataStreamBackingIndicesModes(dataStreamName, List.of(IndexMode.LOGSDB, IndexMode.TIME_SERIES, IndexMode.LOGSDB)); } public void testInvalidIndexModeTimeSeriesSwitchWithoutRoutingPath() throws IOException, ExecutionException, InterruptedException { final String dataStreamName = generateDataStreamName("custom"); final List indexPatterns = List.of("custom-*-*"); - final Map logsSettings = Map.of("index.mode", "logs"); + final Map logsSettings = Map.of("index.mode", "logsdb"); final Map timeSeriesSettings = Map.of("index.mode", "time_series"); putComposableIndexTemplate(client(), "custom-composable-template", LOGS_OR_STANDARD_MAPPING, logsSettings, indexPatterns); @@ -249,7 +249,7 @@ public void testInvalidIndexModeTimeSeriesSwitchWithoutRoutingPath() throws IOEx public void testInvalidIndexModeTimeSeriesSwitchWithoutDimensions() throws IOException, ExecutionException, InterruptedException { final String dataStreamName = generateDataStreamName("custom"); final List indexPatterns = List.of("custom-*-*"); - final Map logsSettings = Map.of("index.mode", "logs"); + final Map logsSettings = Map.of("index.mode", "logsdb"); final Map timeSeriesSettings = Map.of("index.mode", "time_series", "index.routing_path", "host.name"); putComposableIndexTemplate(client(), "custom-composable-template", LOGS_OR_STANDARD_MAPPING, logsSettings, indexPatterns); diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/LogsDataStreamRestIT.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/LogsDataStreamRestIT.java index d3ec5b29ff5b9..780864db8b629 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/LogsDataStreamRestIT.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/LogsDataStreamRestIT.java @@ -72,7 +72,7 @@ private static void waitForLogs(RestClient client) throws Exception { "template": { "settings": { "index": { - "mode": "logs" + "mode": "logsdb" } }, "mappings": { @@ -161,7 +161,7 @@ public void testLogsIndexing() throws IOException { randomIp(randomBoolean()) ) ); - assertDataStreamBackingIndexMode("logs", 0); + assertDataStreamBackingIndexMode("logsdb", 0); rolloverDataStream(client, DATA_STREAM_NAME); indexDocument( client, @@ -175,7 +175,7 @@ public void testLogsIndexing() throws IOException { randomIp(randomBoolean()) ) ); - assertDataStreamBackingIndexMode("logs", 1); + assertDataStreamBackingIndexMode("logsdb", 1); } public void testLogsStandardIndexModeSwitch() throws IOException { @@ -193,7 +193,7 @@ public void testLogsStandardIndexModeSwitch() throws IOException { randomIp(randomBoolean()) ) ); - assertDataStreamBackingIndexMode("logs", 0); + assertDataStreamBackingIndexMode("logsdb", 0); putTemplate(client, "custom-template", STANDARD_TEMPLATE); rolloverDataStream(client, DATA_STREAM_NAME); @@ -225,7 +225,7 @@ public void testLogsStandardIndexModeSwitch() throws IOException { randomIp(randomBoolean()) ) ); - assertDataStreamBackingIndexMode("logs", 2); + assertDataStreamBackingIndexMode("logsdb", 2); } private void assertDataStreamBackingIndexMode(final String indexMode, int backingIndex) throws IOException { diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/LogsIndexModeDisabledRestTestIT.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/LogsIndexModeDisabledRestTestIT.java index dcd2457b88f18..fada21224e3b2 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/LogsIndexModeDisabledRestTestIT.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/LogsIndexModeDisabledRestTestIT.java @@ -50,7 +50,7 @@ public void setup() throws Exception { public void testLogsSettingsIndexModeDisabled() throws IOException { assertOK(createDataStream(client, "logs-custom-dev")); final String indexMode = (String) getSetting(client, getDataStreamBackingIndex(client, "logs-custom-dev", 0), "index.mode"); - assertThat(indexMode, Matchers.not(equalTo(IndexMode.LOGS.getName()))); + assertThat(indexMode, Matchers.not(equalTo(IndexMode.LOGSDB.getName()))); } } diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/LogsIndexModeEnabledRestTestIT.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/LogsIndexModeEnabledRestTestIT.java index 832267cebf97c..a4277748ea9bd 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/LogsIndexModeEnabledRestTestIT.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/LogsIndexModeEnabledRestTestIT.java @@ -179,7 +179,7 @@ public void testCreateDataStream() throws IOException { assertOK(putComponentTemplate(client, "logs@custom", MAPPINGS)); assertOK(createDataStream(client, "logs-custom-dev")); final String indexMode = (String) getSetting(client, getDataStreamBackingIndex(client, "logs-custom-dev", 0), "index.mode"); - assertThat(indexMode, equalTo(IndexMode.LOGS.getName())); + assertThat(indexMode, equalTo(IndexMode.LOGSDB.getName())); } public void testBulkIndexing() throws IOException { diff --git a/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderIT.java b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderIT.java new file mode 100644 index 0000000000000..cc757c413713d --- /dev/null +++ b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderIT.java @@ -0,0 +1,196 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import fixture.geoip.EnterpriseGeoIpHttpFixture; + +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.ResourceAlreadyExistsException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.bulk.BulkItemResponse; +import org.elasticsearch.action.bulk.BulkRequest; +import org.elasticsearch.action.bulk.BulkResponse; +import org.elasticsearch.action.get.GetRequest; +import org.elasticsearch.action.get.GetResponse; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.ingest.PutPipelineRequest; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.settings.MockSecureSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.CollectionUtils; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.ingest.EnterpriseGeoIpTask; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration; +import org.elasticsearch.ingest.geoip.direct.PutDatabaseConfigurationAction; +import org.elasticsearch.persistent.PersistentTasksService; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.reindex.ReindexPlugin; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.transport.RemoteTransportException; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xcontent.json.JsonXContent; +import org.junit.ClassRule; + +import java.io.IOException; +import java.util.Collection; +import java.util.Map; + +import static org.elasticsearch.ingest.EnterpriseGeoIpTask.ENTERPRISE_GEOIP_DOWNLOADER; +import static org.elasticsearch.ingest.geoip.EnterpriseGeoIpDownloaderTaskExecutor.MAXMIND_LICENSE_KEY_SETTING; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.hamcrest.Matchers.equalTo; + +public class EnterpriseGeoIpDownloaderIT extends ESIntegTestCase { + + private static final String DATABASE_TYPE = "GeoIP2-City"; + + @ClassRule + public static final EnterpriseGeoIpHttpFixture fixture = new EnterpriseGeoIpHttpFixture(DATABASE_TYPE); + + protected String getEndpoint() { + return fixture.getAddress(); + } + + @Override + protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { + MockSecureSettings secureSettings = new MockSecureSettings(); + secureSettings.setString(MAXMIND_LICENSE_KEY_SETTING.getKey(), "license_key"); + Settings.Builder builder = Settings.builder(); + builder.setSecureSettings(secureSettings) + .put(super.nodeSettings(nodeOrdinal, otherSettings)) + .put(GeoIpDownloaderTaskExecutor.ENABLED_SETTING.getKey(), true); + // note: this is using the enterprise fixture for the regular downloader, too, as + // a slightly hacky way of making the regular downloader not actually download any files + builder.put(GeoIpDownloader.ENDPOINT_SETTING.getKey(), getEndpoint()); + return builder.build(); + } + + @SuppressWarnings("unchecked") + protected Collection> nodePlugins() { + // the reindex plugin is (somewhat surprisingly) necessary in order to be able to delete-by-query, + // which modules/ingest-geoip does to delete old chunks + return CollectionUtils.appendToCopyNoNullElements(super.nodePlugins(), IngestGeoIpPlugin.class, ReindexPlugin.class); + } + + @SuppressWarnings("unchecked") + public void testEnterpriseDownloaderTask() throws Exception { + /* + * This test starts the enterprise geoip downloader task, and creates a database configuration. Then it creates an ingest + * pipeline that references that database, and ingests a single document using that pipeline. It then asserts that the document + * was updated with information from the database. + * Note that the "enterprise database" is actually just a geolite database being loaded by the GeoIpHttpFixture. + */ + EnterpriseGeoIpDownloader.DEFAULT_MAXMIND_ENDPOINT = getEndpoint(); + final String pipelineName = "enterprise_geoip_pipeline"; + final String indexName = "enterprise_geoip_test_index"; + final String sourceField = "ip"; + final String targetField = "ip-city"; + + startEnterpriseGeoIpDownloaderTask(); + configureDatabase(DATABASE_TYPE); + createGeoIpPipeline(pipelineName, DATABASE_TYPE, sourceField, targetField); + + assertBusy(() -> { + /* + * We know that the .geoip_databases index has been populated, but we don't know for sure that the database has been pulled + * down and made available on all nodes. So we run this ingest-and-check step in an assertBusy. + */ + logger.info("Ingesting a test document"); + String documentId = ingestDocument(indexName, pipelineName, sourceField); + GetResponse getResponse = client().get(new GetRequest(indexName, documentId)).actionGet(); + Map returnedSource = getResponse.getSource(); + assertNotNull(returnedSource); + Object targetFieldValue = returnedSource.get(targetField); + assertNotNull(targetFieldValue); + assertThat(((Map) targetFieldValue).get("organization_name"), equalTo("Bredband2 AB")); + }); + } + + private void startEnterpriseGeoIpDownloaderTask() { + PersistentTasksService persistentTasksService = internalCluster().getInstance(PersistentTasksService.class); + persistentTasksService.sendStartRequest( + ENTERPRISE_GEOIP_DOWNLOADER, + ENTERPRISE_GEOIP_DOWNLOADER, + new EnterpriseGeoIpTask.EnterpriseGeoIpTaskParams(), + TimeValue.MAX_VALUE, + ActionListener.wrap(r -> logger.debug("Started enterprise geoip downloader task"), e -> { + Throwable t = e instanceof RemoteTransportException ? ExceptionsHelper.unwrapCause(e) : e; + if (t instanceof ResourceAlreadyExistsException == false) { + logger.error("failed to create enterprise geoip downloader task", e); + } + }) + ); + } + + private void configureDatabase(String databaseType) throws Exception { + admin().cluster() + .execute( + PutDatabaseConfigurationAction.INSTANCE, + new PutDatabaseConfigurationAction.Request( + TimeValue.MAX_VALUE, + TimeValue.MAX_VALUE, + new DatabaseConfiguration("test", databaseType, new DatabaseConfiguration.Maxmind("test_account")) + ) + ) + .actionGet(); + ensureGreen(GeoIpDownloader.DATABASES_INDEX); + assertBusy(() -> { + SearchResponse searchResponse = client().search(new SearchRequest(GeoIpDownloader.DATABASES_INDEX)).actionGet(); + try { + assertThat(searchResponse.getHits().getHits().length, equalTo(1)); + } finally { + searchResponse.decRef(); + } + }); + } + + private void createGeoIpPipeline(String pipelineName, String databaseType, String sourceField, String targetField) throws IOException { + final BytesReference bytes; + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + builder.startObject(); + { + builder.field("description", "test"); + builder.startArray("processors"); + { + builder.startObject(); + { + builder.startObject("geoip"); + { + builder.field("field", sourceField); + builder.field("target_field", targetField); + builder.field("database_file", databaseType + ".mmdb"); + } + builder.endObject(); + } + builder.endObject(); + } + builder.endArray(); + } + builder.endObject(); + bytes = BytesReference.bytes(builder); + } + assertAcked(clusterAdmin().putPipeline(new PutPipelineRequest(pipelineName, bytes, XContentType.JSON)).actionGet()); + } + + private String ingestDocument(String indexName, String pipelineName, String sourceField) { + BulkRequest bulkRequest = new BulkRequest(); + bulkRequest.add( + new IndexRequest(indexName).source("{\"" + sourceField + "\": \"89.160.20.128\"}", XContentType.JSON).setPipeline(pipelineName) + ); + BulkResponse response = client().bulk(bulkRequest).actionGet(); + BulkItemResponse[] bulkItemResponses = response.getItems(); + assertThat(bulkItemResponses.length, equalTo(1)); + assertThat(bulkItemResponses[0].status(), equalTo(RestStatus.CREATED)); + return bulkItemResponses[0].getId(); + } +} diff --git a/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java index 9dcd8abc7bc57..f7ab384c69bf1 100644 --- a/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java +++ b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java @@ -152,9 +152,9 @@ public void testInvalidTimestamp() throws Exception { updateClusterSettings(Settings.builder().put(GeoIpDownloaderTaskExecutor.ENABLED_SETTING.getKey(), true)); assertBusy(() -> { GeoIpTaskState state = getGeoIpTaskState(); - assertEquals( - Set.of("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb"), - state.getDatabases().keySet() + assertThat( + state.getDatabases().keySet(), + containsInAnyOrder("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb") ); }, 2, TimeUnit.MINUTES); @@ -227,9 +227,9 @@ public void testGeoIpDatabasesDownload() throws Exception { updateClusterSettings(Settings.builder().put(GeoIpDownloaderTaskExecutor.ENABLED_SETTING.getKey(), true)); assertBusy(() -> { GeoIpTaskState state = getGeoIpTaskState(); - assertEquals( - Set.of("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb"), - state.getDatabases().keySet() + assertThat( + state.getDatabases().keySet(), + containsInAnyOrder("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb") ); putGeoIpPipeline(); // This is to work around the race condition described in #92888 }, 2, TimeUnit.MINUTES); @@ -238,11 +238,11 @@ public void testGeoIpDatabasesDownload() throws Exception { assertBusy(() -> { try { GeoIpTaskState state = (GeoIpTaskState) getTask().getState(); - assertEquals( - Set.of("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb"), - state.getDatabases().keySet() + assertThat( + state.getDatabases().keySet(), + containsInAnyOrder("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb") ); - GeoIpTaskState.Metadata metadata = state.get(id); + GeoIpTaskState.Metadata metadata = state.getDatabases().get(id); int size = metadata.lastChunk() - metadata.firstChunk() + 1; assertResponse( prepareSearch(GeoIpDownloader.DATABASES_INDEX).setSize(size) @@ -301,9 +301,9 @@ public void testGeoIpDatabasesDownloadNoGeoipProcessors() throws Exception { assertNotNull(getTask().getState()); // removing all geoip processors should not result in the task being stopped assertBusy(() -> { GeoIpTaskState state = getGeoIpTaskState(); - assertEquals( - Set.of("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb"), - state.getDatabases().keySet() + assertThat( + state.getDatabases().keySet(), + containsInAnyOrder("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb") ); }); } @@ -337,9 +337,9 @@ public void testDoNotDownloadDatabaseOnPipelineCreation() throws Exception { assertAcked(indicesAdmin().prepareUpdateSettings(indexIdentifier).setSettings(indexSettings).get()); assertBusy(() -> { GeoIpTaskState state = getGeoIpTaskState(); - assertEquals( - Set.of("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb"), - state.getDatabases().keySet() + assertThat( + state.getDatabases().keySet(), + containsInAnyOrder("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb") ); }, 2, TimeUnit.MINUTES); diff --git a/modules/ingest-geoip/src/main/java/module-info.java b/modules/ingest-geoip/src/main/java/module-info.java index fa0b0266414f0..4d0acefcb6c9f 100644 --- a/modules/ingest-geoip/src/main/java/module-info.java +++ b/modules/ingest-geoip/src/main/java/module-info.java @@ -15,5 +15,6 @@ requires com.maxmind.geoip2; requires com.maxmind.db; + exports org.elasticsearch.ingest.geoip.direct to org.elasticsearch.server; exports org.elasticsearch.ingest.geoip.stats to org.elasticsearch.server; } diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java index efae8fa0c50ca..dcb882ede230c 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java @@ -24,6 +24,7 @@ import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.core.CheckedRunnable; import org.elasticsearch.core.IOUtils; +import org.elasticsearch.core.Tuple; import org.elasticsearch.env.Environment; import org.elasticsearch.gateway.GatewayService; import org.elasticsearch.index.Index; @@ -52,7 +53,6 @@ import java.util.Collection; import java.util.List; import java.util.Locale; -import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -64,6 +64,7 @@ import java.util.zip.GZIPInputStream; import static org.elasticsearch.core.Strings.format; +import static org.elasticsearch.ingest.geoip.EnterpriseGeoIpTaskState.getEnterpriseGeoIpTaskState; import static org.elasticsearch.ingest.geoip.GeoIpTaskState.getGeoIpTaskState; /** @@ -183,13 +184,14 @@ public Boolean isValid(String databaseFile) { if (state == null) { return true; } + GeoIpTaskState.Metadata metadata = state.getDatabases().get(databaseFile); // we never remove metadata from cluster state, if metadata is null we deal with built-in database, which is always valid if (metadata == null) { return true; } - boolean valid = metadata.isValid(currentState.metadata().settings()); + boolean valid = metadata.isNewEnough(currentState.metadata().settings()); if (valid && metadata.isCloseToExpiration()) { HeaderWarning.addWarning( "database [{}] was not updated for over 25 days, geoip processor will stop working if there is no update for 30 days", @@ -269,20 +271,52 @@ void checkDatabases(ClusterState state) { } } - GeoIpTaskState taskState = getGeoIpTaskState(state); - if (taskState == null) { - // Note: an empty state will purge stale entries in databases map - taskState = GeoIpTaskState.EMPTY; + // we'll consult each of the geoip downloaders to build up a list of database metadatas to work with + List> validMetadatas = new ArrayList<>(); + + // process the geoip task state for the (ordinary) geoip downloader + { + GeoIpTaskState taskState = getGeoIpTaskState(state); + if (taskState == null) { + // Note: an empty state will purge stale entries in databases map + taskState = GeoIpTaskState.EMPTY; + } + validMetadatas.addAll( + taskState.getDatabases() + .entrySet() + .stream() + .filter(e -> e.getValue().isNewEnough(state.getMetadata().settings())) + .map(entry -> Tuple.tuple(entry.getKey(), entry.getValue())) + .toList() + ); + } + + // process the geoip task state for the enterprise geoip downloader + { + EnterpriseGeoIpTaskState taskState = getEnterpriseGeoIpTaskState(state); + if (taskState == null) { + // Note: an empty state will purge stale entries in databases map + taskState = EnterpriseGeoIpTaskState.EMPTY; + } + validMetadatas.addAll( + taskState.getDatabases() + .entrySet() + .stream() + .filter(e -> e.getValue().isNewEnough(state.getMetadata().settings())) + .map(entry -> Tuple.tuple(entry.getKey(), entry.getValue())) + .toList() + ); } - taskState.getDatabases().entrySet().stream().filter(e -> e.getValue().isValid(state.getMetadata().settings())).forEach(e -> { - String name = e.getKey(); - GeoIpTaskState.Metadata metadata = e.getValue(); + // run through all the valid metadatas, regardless of source, and retrieve them + validMetadatas.forEach(e -> { + String name = e.v1(); + GeoIpTaskState.Metadata metadata = e.v2(); DatabaseReaderLazyLoader reference = databases.get(name); String remoteMd5 = metadata.md5(); String localMd5 = reference != null ? reference.getMd5() : null; if (Objects.equals(localMd5, remoteMd5)) { - logger.debug("Current reference of [{}] is up to date [{}] with was recorded in CS [{}]", name, localMd5, remoteMd5); + logger.debug("[{}] is up to date [{}] with cluster state [{}]", name, localMd5, remoteMd5); return; } @@ -293,15 +327,14 @@ void checkDatabases(ClusterState state) { } }); + // TODO perhaps we need to handle the license flap persistent task state better than we do + // i think the ideal end state is that we *do not* drop the files that the enterprise downloader + // handled if they fall out -- which means we need to track that in the databases map itself + + // start with the list of all databases we currently know about in this service, + // then drop the ones that didn't check out as valid from the task states List staleEntries = new ArrayList<>(databases.keySet()); - staleEntries.removeAll( - taskState.getDatabases() - .entrySet() - .stream() - .filter(e -> e.getValue().isValid(state.getMetadata().settings())) - .map(Map.Entry::getKey) - .collect(Collectors.toSet()) - ); + staleEntries.removeAll(validMetadatas.stream().map(Tuple::v1).collect(Collectors.toSet())); removeStaleEntries(staleEntries); } diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloader.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloader.java new file mode 100644 index 0000000000000..9645e34751642 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloader.java @@ -0,0 +1,474 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.admin.indices.flush.FlushRequest; +import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.client.internal.Client; +import org.elasticsearch.cluster.block.ClusterBlockLevel; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.hash.MessageDigests; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.MatchQueryBuilder; +import org.elasticsearch.index.query.RangeQueryBuilder; +import org.elasticsearch.index.reindex.DeleteByQueryAction; +import org.elasticsearch.index.reindex.DeleteByQueryRequest; +import org.elasticsearch.ingest.geoip.GeoIpTaskState.Metadata; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata; +import org.elasticsearch.persistent.AllocatedPersistentTask; +import org.elasticsearch.persistent.PersistentTasksCustomMetadata.PersistentTask; +import org.elasticsearch.tasks.TaskId; +import org.elasticsearch.threadpool.Scheduler; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xcontent.XContentType; + +import java.io.IOException; +import java.io.InputStream; +import java.net.PasswordAuthentication; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import static org.elasticsearch.ingest.geoip.EnterpriseGeoIpDownloaderTaskExecutor.MAXMIND_SETTINGS_PREFIX; + +/** + * Main component responsible for downloading new GeoIP databases. + * New databases are downloaded in chunks and stored in .geoip_databases index + * Downloads are verified against MD5 checksum provided by the server + * Current state of all stored databases is stored in cluster state in persistent task state + */ +public class EnterpriseGeoIpDownloader extends AllocatedPersistentTask { + + private static final Logger logger = LogManager.getLogger(EnterpriseGeoIpDownloader.class); + private static final Pattern CHECKSUM_PATTERN = Pattern.compile("(\\w{64})\\s\\s(.*)"); + + // for overriding in tests + static String DEFAULT_MAXMIND_ENDPOINT = System.getProperty( + MAXMIND_SETTINGS_PREFIX + "endpoint.default", + "https://download.maxmind.com/geoip/databases" + ); + // n.b. a future enhancement might be to allow for a MAXMIND_ENDPOINT_SETTING, but + // at the moment this is an unsupported system property for use in tests (only) + + static String downloadUrl(final String name, final String suffix) { + String endpointPattern = DEFAULT_MAXMIND_ENDPOINT; + if (endpointPattern.contains("%")) { + throw new IllegalArgumentException("Invalid endpoint [" + endpointPattern + "]"); + } + if (endpointPattern.endsWith("/") == false) { + endpointPattern += "/"; + } + endpointPattern += "%s/download?suffix=%s"; + + // at this point the pattern looks like this (in the default case): + // https://download.maxmind.com/geoip/databases/%s/download?suffix=%s + + return Strings.format(endpointPattern, name, suffix); + } + + static final String DATABASES_INDEX = ".geoip_databases"; + static final int MAX_CHUNK_SIZE = 1024 * 1024; + + private final Client client; + private final HttpClient httpClient; + private final ClusterService clusterService; + private final ThreadPool threadPool; + + // visible for testing + protected volatile EnterpriseGeoIpTaskState state; + private volatile Scheduler.ScheduledCancellable scheduled; + private final Supplier pollIntervalSupplier; + private final Function credentialsBuilder; + + EnterpriseGeoIpDownloader( + Client client, + HttpClient httpClient, + ClusterService clusterService, + ThreadPool threadPool, + long id, + String type, + String action, + String description, + TaskId parentTask, + Map headers, + Supplier pollIntervalSupplier, + Function credentialsBuilder + ) { + super(id, type, action, description, parentTask, headers); + this.client = client; + this.httpClient = httpClient; + this.clusterService = clusterService; + this.threadPool = threadPool; + this.pollIntervalSupplier = pollIntervalSupplier; + this.credentialsBuilder = credentialsBuilder; + } + + void setState(EnterpriseGeoIpTaskState state) { + // this is for injecting the state in GeoIpDownloaderTaskExecutor#nodeOperation just after the task instance has been created + // by the PersistentTasksNodeService -- since the GeoIpDownloader is newly created, the state will be null, and the passed-in + // state cannot be null + assert this.state == null + : "setState() cannot be called when state is already non-null. This most likely happened because setState() was called twice"; + assert state != null : "Should never call setState with a null state. Pass an EnterpriseGeoIpTaskState.EMPTY instead."; + this.state = state; + } + + // visible for testing + void updateDatabases() throws IOException { + var clusterState = clusterService.state(); + var geoipIndex = clusterState.getMetadata().getIndicesLookup().get(EnterpriseGeoIpDownloader.DATABASES_INDEX); + if (geoipIndex != null) { + logger.trace("the geoip index [{}] exists", EnterpriseGeoIpDownloader.DATABASES_INDEX); + if (clusterState.getRoutingTable().index(geoipIndex.getWriteIndex()).allPrimaryShardsActive() == false) { + logger.debug("not updating databases because not all primary shards of [{}] index are active yet", DATABASES_INDEX); + return; + } + var blockException = clusterState.blocks().indexBlockedException(ClusterBlockLevel.WRITE, geoipIndex.getWriteIndex().getName()); + if (blockException != null) { + throw blockException; + } + } + + logger.trace("Updating geoip databases"); + IngestGeoIpMetadata geoIpMeta = clusterState.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); + + // if there are entries in the cs that aren't in the persistent task state, + // then download those (only) + // --- + // if there are in the persistent task state, that aren't in the cluster state + // then nuke those (only) + // --- + // else, just download everything + boolean addedSomething = false; + { + Set existingDatabaseNames = state.getDatabases().keySet(); + for (Map.Entry entry : geoIpMeta.getDatabases().entrySet()) { + final String id = entry.getKey(); + DatabaseConfiguration database = entry.getValue().database(); + if (existingDatabaseNames.contains(database.name() + ".mmdb") == false) { + logger.debug("A new database appeared [{}]", database.name()); + + final String accountId = database.maxmind().accountId(); + try (HttpClient.PasswordAuthenticationHolder holder = credentialsBuilder.apply(accountId)) { + if (holder == null) { + logger.warn("No credentials found to download database [{}], skipping download...", id); + } else { + processDatabase(holder.get(), database); + addedSomething = true; + } + } + } + } + } + + boolean droppedSomething = false; + { + // rip anything out of the task state that doesn't match what's in the cluster state, + // that is, if there's no longer an entry for a database in the repository, + // then drop it from the task state, too + Set databases = geoIpMeta.getDatabases() + .values() + .stream() + .map(c -> c.database().name() + ".mmdb") + .collect(Collectors.toSet()); + EnterpriseGeoIpTaskState _state = state; + Collection> metas = _state.getDatabases() + .entrySet() + .stream() + .map(entry -> Tuple.tuple(entry.getKey(), entry.getValue())) + .toList(); + for (Tuple metaTuple : metas) { + String name = metaTuple.v1(); + Metadata meta = metaTuple.v2(); + if (databases.contains(name) == false) { + logger.debug("Dropping [{}], databases was {}", name, databases); + _state = _state.remove(name); + deleteOldChunks(name, meta.lastChunk() + 1); + droppedSomething = true; + } + } + if (droppedSomething) { + state = _state; + updateTaskState(); + } + } + + if (addedSomething == false && droppedSomething == false) { + RuntimeException accumulator = null; + for (Map.Entry entry : geoIpMeta.getDatabases().entrySet()) { + final String id = entry.getKey(); + DatabaseConfiguration database = entry.getValue().database(); + + final String accountId = database.maxmind().accountId(); + try (HttpClient.PasswordAuthenticationHolder holder = credentialsBuilder.apply(accountId)) { + if (holder == null) { + logger.warn("No credentials found to download database [{}], skipping download...", id); + } else { + processDatabase(holder.get(), database); + } + } catch (Exception e) { + accumulator = ExceptionsHelper.useOrSuppress(accumulator, ExceptionsHelper.convertToRuntime(e)); + } + } + if (accumulator != null) { + throw accumulator; + } + } + } + + /** + * This method fetches the sha256 file and tar.gz file for the given database from the Maxmind endpoint, then indexes that tar.gz + * file into the .geoip_databases Elasticsearch index, deleting any old versions of the database tar.gz from the index if they exist. + * If the computed sha256 does not match the expected sha256, an error will be logged and the database will not be put into the + * Elasticsearch index. + *

+ * As an implementation detail, this method retrieves the sha256 checksum of the database to download and then invokes + * {@link EnterpriseGeoIpDownloader#processDatabase(PasswordAuthentication, String, String, String)} with that checksum, deferring to + * that method to actually download and process the tar.gz itself. + * + * @param auth The credentials to use to download from the Maxmind endpoint + * @param database The database to be downloaded from Maxmind and indexed into an Elasticsearch index + * @throws IOException If there is an error fetching the sha256 file + */ + void processDatabase(PasswordAuthentication auth, DatabaseConfiguration database) throws IOException { + final String name = database.name(); + logger.debug("Processing database [{}] for configuration [{}]", name, database.id()); + + final String sha256Url = downloadUrl(name, "tar.gz.sha256"); + final String tgzUrl = downloadUrl(name, "tar.gz"); + + String result = new String(httpClient.getBytes(auth, sha256Url), StandardCharsets.UTF_8).trim(); // this throws if the auth is bad + var matcher = CHECKSUM_PATTERN.matcher(result); + boolean match = matcher.matches(); + if (match == false) { + throw new RuntimeException("Unexpected sha256 response from [" + sha256Url + "]"); + } + final String sha256 = matcher.group(1); + // the name that comes from the enterprise downloader cluster state doesn't include the .mmdb extension, + // but the downloading and indexing of database code expects it to be there, so we add it on here before further processing + processDatabase(auth, name + ".mmdb", sha256, tgzUrl); + } + + /** + * This method fetches the tar.gz file for the given database from the Maxmind endpoint, then indexes that tar.gz + * file into the .geoip_databases Elasticsearch index, deleting any old versions of the database tar.gz from the index if they exist. + * + * @param auth The credentials to use to download from the Maxmind endpoint + * The name of the database to be downloaded from Maxmind and indexed into an Elasticsearch index + * @param sha256 The sha256 to compare to the computed sha256 of the downloaded tar.gz file + * @param url The URL for the Maxmind endpoint from which the database's tar.gz will be downloaded + */ + private void processDatabase(PasswordAuthentication auth, String name, String sha256, String url) { + Metadata metadata = state.getDatabases().getOrDefault(name, Metadata.EMPTY); + if (Objects.equals(metadata.sha256(), sha256)) { + updateTimestamp(name, metadata); + return; + } + logger.debug("downloading geoip database [{}]", name); + long start = System.currentTimeMillis(); + try (InputStream is = httpClient.get(auth, url)) { + int firstChunk = metadata.lastChunk() + 1; // if there is no metadata, then Metadata.EMPTY + 1 = 0 + Tuple tuple = indexChunks(name, is, firstChunk, MessageDigests.sha256(), sha256, start); + int lastChunk = tuple.v1(); + String md5 = tuple.v2(); + if (lastChunk > firstChunk) { + state = state.put(name, new Metadata(start, firstChunk, lastChunk - 1, md5, start, sha256)); + updateTaskState(); + logger.info("successfully downloaded geoip database [{}]", name); + deleteOldChunks(name, firstChunk); + } + } catch (Exception e) { + logger.error(() -> "error downloading geoip database [" + name + "]", e); + } + } + + // visible for testing + void deleteOldChunks(String name, int firstChunk) { + BoolQueryBuilder queryBuilder = new BoolQueryBuilder().filter(new MatchQueryBuilder("name", name)) + .filter(new RangeQueryBuilder("chunk").to(firstChunk, false)); + DeleteByQueryRequest request = new DeleteByQueryRequest(); + request.indices(DATABASES_INDEX); + request.setQuery(queryBuilder); + client.execute( + DeleteByQueryAction.INSTANCE, + request, + ActionListener.wrap(r -> {}, e -> logger.warn("could not delete old chunks for geoip database [" + name + "]", e)) + ); + } + + // visible for testing + protected void updateTimestamp(String name, Metadata old) { + logger.debug("geoip database [{}] is up to date, updated timestamp", name); + state = state.put( + name, + new Metadata(old.lastUpdate(), old.firstChunk(), old.lastChunk(), old.md5(), System.currentTimeMillis(), old.sha256()) + ); + updateTaskState(); + } + + void updateTaskState() { + PlainActionFuture> future = new PlainActionFuture<>(); + updatePersistentTaskState(state, future); + state = ((EnterpriseGeoIpTaskState) future.actionGet().getState()); + } + + // visible for testing + Tuple indexChunks( + String name, + InputStream is, + int chunk, + @Nullable MessageDigest digest, + String expectedChecksum, + long timestamp + ) throws IOException { + MessageDigest md5 = MessageDigests.md5(); + for (byte[] buf = getChunk(is); buf.length != 0; buf = getChunk(is)) { + md5.update(buf); + if (digest != null) { + digest.update(buf); + } + IndexRequest indexRequest = new IndexRequest(DATABASES_INDEX).id(name + "_" + chunk + "_" + timestamp) + .create(true) + .source(XContentType.SMILE, "name", name, "chunk", chunk, "data", buf); + client.index(indexRequest).actionGet(); + chunk++; + } + + // May take some time before automatic flush kicks in: + // (otherwise the translog will contain large documents for some time without good reason) + FlushRequest flushRequest = new FlushRequest(DATABASES_INDEX); + client.admin().indices().flush(flushRequest).actionGet(); + // Ensure that the chunk documents are visible: + RefreshRequest refreshRequest = new RefreshRequest(DATABASES_INDEX); + client.admin().indices().refresh(refreshRequest).actionGet(); + + String actualMd5 = MessageDigests.toHexString(md5.digest()); + String actualChecksum = digest == null ? actualMd5 : MessageDigests.toHexString(digest.digest()); + if (Objects.equals(expectedChecksum, actualChecksum) == false) { + throw new IOException("checksum mismatch, expected [" + expectedChecksum + "], actual [" + actualChecksum + "]"); + } + return Tuple.tuple(chunk, actualMd5); + } + + // visible for testing + static byte[] getChunk(InputStream is) throws IOException { + byte[] buf = new byte[MAX_CHUNK_SIZE]; + int chunkSize = 0; + while (chunkSize < MAX_CHUNK_SIZE) { + int read = is.read(buf, chunkSize, MAX_CHUNK_SIZE - chunkSize); + if (read == -1) { + break; + } + chunkSize += read; + } + if (chunkSize < MAX_CHUNK_SIZE) { + buf = Arrays.copyOf(buf, chunkSize); + } + return buf; + } + + /** + * Downloads the geoip databases now, and schedules them to be downloaded again after pollInterval. + */ + synchronized void runDownloader() { + // by the time we reach here, the state will never be null + assert this.state != null : "this.setState() is null. You need to call setState() before calling runDownloader()"; + + // there's a race condition between here and requestReschedule. originally this scheduleNextRun call was at the end of this + // block, but remember that updateDatabases can take seconds to run (it's downloading bytes from the internet), and so during the + // very first run there would be no future run scheduled to reschedule in requestReschedule. which meant that if you went from zero + // to N(>=2) databases in quick succession, then all but the first database wouldn't necessarily get downloaded, because the + // requestReschedule call in the EnterpriseGeoIpDownloaderTaskExecutor's clusterChanged wouldn't have a scheduled future run to + // reschedule. scheduling the next run at the beginning of this run means that there's a much smaller window (milliseconds?, rather + // than seconds) in which such a race could occur. technically there's a window here, still, but i think it's _greatly_ reduced. + scheduleNextRun(pollIntervalSupplier.get()); + // TODO regardless of the above comment, i like the idea of checking the lowest last-checked time and then running the math to get + // to the next interval from then -- maybe that's a neat future enhancement to add + + if (isCancelled() || isCompleted()) { + return; + } + try { + updateDatabases(); // n.b. this downloads bytes from the internet, it can take a while + } catch (Exception e) { + logger.error("exception during geoip databases update", e); + } + try { + cleanDatabases(); + } catch (Exception e) { + logger.error("exception during geoip databases cleanup", e); + } + } + + /** + * This method requests that the downloader be rescheduled to run immediately (presumably because a dynamic property supplied by + * pollIntervalSupplier or eagerDownloadSupplier has changed, or a pipeline with a geoip processor has been added). This method does + * nothing if this task is cancelled, completed, or has not yet been scheduled to run for the first time. It cancels any existing + * scheduled run. + */ + public void requestReschedule() { + if (isCancelled() || isCompleted()) { + return; + } + if (scheduled != null && scheduled.cancel()) { + scheduleNextRun(TimeValue.ZERO); + } + } + + private void cleanDatabases() { + List> expiredDatabases = state.getDatabases() + .entrySet() + .stream() + .filter(e -> e.getValue().isNewEnough(clusterService.state().metadata().settings()) == false) + .map(entry -> Tuple.tuple(entry.getKey(), entry.getValue())) + .toList(); + expiredDatabases.forEach(e -> { + String name = e.v1(); + Metadata meta = e.v2(); + deleteOldChunks(name, meta.lastChunk() + 1); + state = state.put(name, new Metadata(meta.lastUpdate(), meta.firstChunk(), meta.lastChunk(), meta.md5(), meta.lastCheck() - 1)); + updateTaskState(); + }); + } + + @Override + protected void onCancelled() { + if (scheduled != null) { + scheduled.cancel(); + } + markAsCompleted(); + } + + private void scheduleNextRun(TimeValue time) { + if (threadPool.scheduler().isShutdown() == false) { + scheduled = threadPool.schedule(this::runDownloader, time, threadPool.generic()); + } + } + +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTaskExecutor.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTaskExecutor.java new file mode 100644 index 0000000000000..8fc46fe157548 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTaskExecutor.java @@ -0,0 +1,257 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.client.internal.Client; +import org.elasticsearch.client.internal.OriginSettingClient; +import org.elasticsearch.cluster.ClusterChangedEvent; +import org.elasticsearch.cluster.ClusterStateListener; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.settings.SecureSetting; +import org.elasticsearch.common.settings.SecureSettings; +import org.elasticsearch.common.settings.SecureString; +import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.ingest.EnterpriseGeoIpTask.EnterpriseGeoIpTaskParams; +import org.elasticsearch.ingest.IngestService; +import org.elasticsearch.persistent.AllocatedPersistentTask; +import org.elasticsearch.persistent.PersistentTaskState; +import org.elasticsearch.persistent.PersistentTasksCustomMetadata; +import org.elasticsearch.persistent.PersistentTasksExecutor; +import org.elasticsearch.tasks.TaskId; +import org.elasticsearch.threadpool.ThreadPool; + +import java.io.IOException; +import java.io.InputStream; +import java.security.GeneralSecurityException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.atomic.AtomicReference; + +import static org.elasticsearch.ingest.EnterpriseGeoIpTask.ENTERPRISE_GEOIP_DOWNLOADER; +import static org.elasticsearch.ingest.geoip.GeoIpDownloaderTaskExecutor.ENABLED_SETTING; +import static org.elasticsearch.ingest.geoip.GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING; + +public class EnterpriseGeoIpDownloaderTaskExecutor extends PersistentTasksExecutor + implements + ClusterStateListener { + private static final Logger logger = LogManager.getLogger(EnterpriseGeoIpDownloader.class); + + static final String MAXMIND_SETTINGS_PREFIX = "ingest.geoip.downloader.maxmind."; + + public static final Setting MAXMIND_LICENSE_KEY_SETTING = SecureSetting.secureString( + MAXMIND_SETTINGS_PREFIX + "license_key", + null + ); + + private final Client client; + private final HttpClient httpClient; + private final ClusterService clusterService; + private final ThreadPool threadPool; + private final Settings settings; + private volatile TimeValue pollInterval; + private final AtomicReference currentTask = new AtomicReference<>(); + + private volatile SecureSettings cachedSecureSettings; + + EnterpriseGeoIpDownloaderTaskExecutor(Client client, HttpClient httpClient, ClusterService clusterService, ThreadPool threadPool) { + super(ENTERPRISE_GEOIP_DOWNLOADER, threadPool.generic()); + this.client = new OriginSettingClient(client, IngestService.INGEST_ORIGIN); + this.httpClient = httpClient; + this.clusterService = clusterService; + this.threadPool = threadPool; + this.settings = clusterService.getSettings(); + this.pollInterval = POLL_INTERVAL_SETTING.get(settings); + + // do an initial load using the node settings + reload(clusterService.getSettings()); + } + + /** + * This method completes the initialization of the EnterpriseGeoIpDownloaderTaskExecutor by registering several listeners. + */ + public void init() { + clusterService.addListener(this); + clusterService.getClusterSettings().addSettingsUpdateConsumer(POLL_INTERVAL_SETTING, this::setPollInterval); + } + + private void setPollInterval(TimeValue pollInterval) { + if (Objects.equals(this.pollInterval, pollInterval) == false) { + this.pollInterval = pollInterval; + EnterpriseGeoIpDownloader currentDownloader = getCurrentTask(); + if (currentDownloader != null) { + currentDownloader.requestReschedule(); + } + } + } + + private HttpClient.PasswordAuthenticationHolder buildCredentials(final String username) { + final char[] passwordChars; + if (cachedSecureSettings.getSettingNames().contains(MAXMIND_LICENSE_KEY_SETTING.getKey())) { + passwordChars = cachedSecureSettings.getString(MAXMIND_LICENSE_KEY_SETTING.getKey()).getChars(); + } else { + passwordChars = null; + } + + // if the username is missing, empty, or blank, return null as 'no auth' + if (username == null || username.isEmpty() || username.isBlank()) { + return null; + } + + // likewise if the password chars array is missing or empty, return null as 'no auth' + if (passwordChars == null || passwordChars.length == 0) { + return null; + } + + return new HttpClient.PasswordAuthenticationHolder(username, passwordChars); + } + + @Override + protected EnterpriseGeoIpDownloader createTask( + long id, + String type, + String action, + TaskId parentTaskId, + PersistentTasksCustomMetadata.PersistentTask taskInProgress, + Map headers + ) { + return new EnterpriseGeoIpDownloader( + client, + httpClient, + clusterService, + threadPool, + id, + type, + action, + getDescription(taskInProgress), + parentTaskId, + headers, + () -> pollInterval, + this::buildCredentials + ); + } + + @Override + protected void nodeOperation(AllocatedPersistentTask task, EnterpriseGeoIpTaskParams params, PersistentTaskState state) { + EnterpriseGeoIpDownloader downloader = (EnterpriseGeoIpDownloader) task; + EnterpriseGeoIpTaskState geoIpTaskState = (state == null) ? EnterpriseGeoIpTaskState.EMPTY : (EnterpriseGeoIpTaskState) state; + downloader.setState(geoIpTaskState); + currentTask.set(downloader); + if (ENABLED_SETTING.get(clusterService.state().metadata().settings(), settings)) { + downloader.runDownloader(); + } + } + + public EnterpriseGeoIpDownloader getCurrentTask() { + return currentTask.get(); + } + + @Override + public void clusterChanged(ClusterChangedEvent event) { + EnterpriseGeoIpDownloader currentDownloader = getCurrentTask(); + if (currentDownloader != null) { + boolean hasGeoIpMetadataChanges = event.metadataChanged() + && event.changedCustomMetadataSet().contains(IngestGeoIpMetadata.TYPE); + if (hasGeoIpMetadataChanges) { + currentDownloader.requestReschedule(); // watching the cluster changed events to kick the thing off if it's not running + } + } + } + + public synchronized void reload(Settings settings) { + // `SecureSettings` are available here! cache them as they will be needed + // whenever dynamic cluster settings change and we have to rebuild the accounts + try { + this.cachedSecureSettings = extractSecureSettings(settings, List.of(MAXMIND_LICENSE_KEY_SETTING)); + } catch (GeneralSecurityException e) { + // rethrow as a runtime exception, there's logging higher up the call chain around ReloadablePlugin + throw new ElasticsearchException("Exception while reloading enterprise geoip download task executor", e); + } + } + + /** + * Extracts the {@link SecureSettings}` out of the passed in {@link Settings} object. The {@code Setting} argument has to have the + * {@code SecureSettings} open/available. Normally {@code SecureSettings} are available only under specific callstacks (eg. during node + * initialization or during a `reload` call). The returned copy can be reused freely as it will never be closed (this is a bit of + * cheating, but it is necessary in this specific circumstance). Only works for secure settings of type string (not file). + * + * @param source A {@code Settings} object with its {@code SecureSettings} open/available. + * @param securePluginSettings The list of settings to copy. + * @return A copy of the {@code SecureSettings} of the passed in {@code Settings} argument. + */ + private static SecureSettings extractSecureSettings(Settings source, List> securePluginSettings) + throws GeneralSecurityException { + // get the secure settings out + final SecureSettings sourceSecureSettings = Settings.builder().put(source, true).getSecureSettings(); + // filter and cache them... + final Map innerMap = new HashMap<>(); + if (sourceSecureSettings != null && securePluginSettings != null) { + for (final String settingKey : sourceSecureSettings.getSettingNames()) { + for (final Setting secureSetting : securePluginSettings) { + if (secureSetting.match(settingKey)) { + innerMap.put( + settingKey, + new SecureSettingValue( + sourceSecureSettings.getString(settingKey), + sourceSecureSettings.getSHA256Digest(settingKey) + ) + ); + } + } + } + } + return new SecureSettings() { + @Override + public boolean isLoaded() { + return true; + } + + @Override + public SecureString getString(String setting) { + return innerMap.get(setting).value(); + } + + @Override + public Set getSettingNames() { + return innerMap.keySet(); + } + + @Override + public InputStream getFile(String setting) { + throw new UnsupportedOperationException("A cached SecureSetting cannot be a file"); + } + + @Override + public byte[] getSHA256Digest(String setting) { + return innerMap.get(setting).sha256Digest(); + } + + @Override + public void close() throws IOException {} + + @Override + public void writeTo(StreamOutput out) throws IOException { + throw new UnsupportedOperationException("A cached SecureSetting cannot be serialized"); + } + }; + } + + /** + * A single-purpose record for the internal implementation of extractSecureSettings + */ + private record SecureSettingValue(SecureString value, byte[] sha256Digest) {} +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpTaskState.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpTaskState.java new file mode 100644 index 0000000000000..1dd6422fd388a --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpTaskState.java @@ -0,0 +1,153 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.TransportVersions; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.VersionedNamedWriteable; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.ingest.EnterpriseGeoIpTask; +import org.elasticsearch.ingest.geoip.GeoIpTaskState.Metadata; +import org.elasticsearch.persistent.PersistentTaskState; +import org.elasticsearch.persistent.PersistentTasksCustomMetadata; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +import static org.elasticsearch.ingest.geoip.GeoIpDownloader.GEOIP_DOWNLOADER; +import static org.elasticsearch.persistent.PersistentTasksCustomMetadata.getTaskWithId; +import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; + +class EnterpriseGeoIpTaskState implements PersistentTaskState, VersionedNamedWriteable { + + private static final ParseField DATABASES = new ParseField("databases"); + + static final EnterpriseGeoIpTaskState EMPTY = new EnterpriseGeoIpTaskState(Map.of()); + + @SuppressWarnings("unchecked") + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + GEOIP_DOWNLOADER, + true, + args -> { + List> databases = (List>) args[0]; + return new EnterpriseGeoIpTaskState(databases.stream().collect(Collectors.toMap(Tuple::v1, Tuple::v2))); + } + ); + + static { + PARSER.declareNamedObjects(constructorArg(), (p, c, name) -> Tuple.tuple(name, Metadata.fromXContent(p)), DATABASES); + } + + public static EnterpriseGeoIpTaskState fromXContent(XContentParser parser) throws IOException { + return PARSER.parse(parser, null); + } + + private final Map databases; + + EnterpriseGeoIpTaskState(Map databases) { + this.databases = Map.copyOf(databases); + } + + EnterpriseGeoIpTaskState(StreamInput input) throws IOException { + databases = input.readImmutableMap( + in -> new Metadata(in.readLong(), in.readVInt(), in.readVInt(), in.readString(), in.readLong(), in.readOptionalString()) + ); + } + + public EnterpriseGeoIpTaskState put(String name, Metadata metadata) { + HashMap newDatabases = new HashMap<>(databases); + newDatabases.put(name, metadata); + return new EnterpriseGeoIpTaskState(newDatabases); + } + + public EnterpriseGeoIpTaskState remove(String name) { + HashMap newDatabases = new HashMap<>(databases); + newDatabases.remove(name); + return new EnterpriseGeoIpTaskState(newDatabases); + } + + public Map getDatabases() { + return databases; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + EnterpriseGeoIpTaskState that = (EnterpriseGeoIpTaskState) o; + return databases.equals(that.databases); + } + + @Override + public int hashCode() { + return Objects.hash(databases); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + { + builder.startObject("databases"); + for (Map.Entry e : databases.entrySet()) { + builder.field(e.getKey(), e.getValue()); + } + builder.endObject(); + } + builder.endObject(); + return builder; + } + + @Override + public String getWriteableName() { + return "enterprise-geoip-downloader"; + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersions.ENTERPRISE_GEOIP_DOWNLOADER_BACKPORT_8_15; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeMap(databases, (o, v) -> { + o.writeLong(v.lastUpdate()); + o.writeVInt(v.firstChunk()); + o.writeVInt(v.lastChunk()); + o.writeString(v.md5()); + o.writeLong(v.lastCheck()); + o.writeOptionalString(v.sha256()); + }); + } + + /** + * Retrieves the geoip downloader's task state from the cluster state. This may return null in some circumstances, + * for example if the geoip downloader task hasn't been created yet (which it wouldn't be if it's disabled). + * + * @param state the cluster state to read the task state from + * @return the geoip downloader's task state or null if there is not a state to read + */ + @Nullable + static EnterpriseGeoIpTaskState getEnterpriseGeoIpTaskState(ClusterState state) { + PersistentTasksCustomMetadata.PersistentTask task = getTaskWithId(state, EnterpriseGeoIpTask.ENTERPRISE_GEOIP_DOWNLOADER); + return (task == null) ? null : (EnterpriseGeoIpTaskState) task.getState(); + } + +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloader.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloader.java index 895c9315d2325..ee6f2f16f051b 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloader.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloader.java @@ -24,6 +24,7 @@ import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.core.Tuple; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.RangeQueryBuilder; @@ -170,23 +171,28 @@ private List fetchDatabasesOverview() throws IOException { } // visible for testing - void processDatabase(Map databaseInfo) { + void processDatabase(final Map databaseInfo) { String name = databaseInfo.get("name").toString().replace(".tgz", "") + ".mmdb"; String md5 = (String) databaseInfo.get("md5_hash"); - if (state.contains(name) && Objects.equals(md5, state.get(name).md5())) { - updateTimestamp(name, state.get(name)); - return; - } - logger.debug("downloading geoip database [{}]", name); String url = databaseInfo.get("url").toString(); if (url.startsWith("http") == false) { // relative url, add it after last slash (i.e. resolve sibling) or at the end if there's no slash after http[s]:// int lastSlash = endpoint.substring(8).lastIndexOf('/'); url = (lastSlash != -1 ? endpoint.substring(0, lastSlash + 8) : endpoint) + "/" + url; } + processDatabase(name, md5, url); + } + + private void processDatabase(final String name, final String md5, final String url) { + Metadata metadata = state.getDatabases().getOrDefault(name, Metadata.EMPTY); + if (Objects.equals(metadata.md5(), md5)) { + updateTimestamp(name, metadata); + return; + } + logger.debug("downloading geoip database [{}]", name); long start = System.currentTimeMillis(); try (InputStream is = httpClient.get(url)) { - int firstChunk = state.contains(name) ? state.get(name).lastChunk() + 1 : 0; + int firstChunk = metadata.lastChunk() + 1; // if there is no metadata, then Metadata.EMPTY.lastChunk() + 1 = 0 int lastChunk = indexChunks(name, is, firstChunk, md5, start); if (lastChunk > firstChunk) { state = state.put(name, new Metadata(start, firstChunk, lastChunk - 1, md5, start)); @@ -313,22 +319,20 @@ public void requestReschedule() { } private void cleanDatabases() { - long expiredDatabases = state.getDatabases() + List> expiredDatabases = state.getDatabases() .entrySet() .stream() - .filter(e -> e.getValue().isValid(clusterService.state().metadata().settings()) == false) - .peek(e -> { - String name = e.getKey(); - Metadata meta = e.getValue(); - deleteOldChunks(name, meta.lastChunk() + 1); - state = state.put( - name, - new Metadata(meta.lastUpdate(), meta.firstChunk(), meta.lastChunk(), meta.md5(), meta.lastCheck() - 1) - ); - updateTaskState(); - }) - .count(); - stats = stats.expiredDatabases((int) expiredDatabases); + .filter(e -> e.getValue().isNewEnough(clusterService.state().metadata().settings()) == false) + .map(entry -> Tuple.tuple(entry.getKey(), entry.getValue())) + .toList(); + expiredDatabases.forEach(e -> { + String name = e.v1(); + Metadata meta = e.v2(); + deleteOldChunks(name, meta.lastChunk() + 1); + state = state.put(name, new Metadata(meta.lastUpdate(), meta.firstChunk(), meta.lastChunk(), meta.md5(), meta.lastCheck() - 1)); + updateTaskState(); + }); + stats = stats.expiredDatabases(expiredDatabases.size()); } @Override diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTaskExecutor.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTaskExecutor.java index 09ac488f96e2d..3f89bb1dd5c50 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTaskExecutor.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTaskExecutor.java @@ -217,7 +217,7 @@ public void clusterChanged(ClusterChangedEvent event) { } boolean hasIndicesChanges = event.previousState().metadata().indices().equals(event.state().metadata().indices()) == false; - boolean hasIngestPipelineChanges = event.changedCustomMetadataSet().contains(IngestMetadata.TYPE); + boolean hasIngestPipelineChanges = event.metadataChanged() && event.changedCustomMetadataSet().contains(IngestMetadata.TYPE); if (hasIngestPipelineChanges || hasIndicesChanges) { boolean newAtLeastOneGeoipProcessor = hasAtLeastOneGeoipProcessor(event.state()); diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java index e39705a71f56c..82b9e930280b7 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java @@ -592,7 +592,7 @@ private Map retrieveEnterpriseGeoData(GeoIpDatabase geoIpDatabas } case ISP_ORGANIZATION_NAME -> { if (ispOrganization != null) { - geoData.put("isp_organization", ispOrganization); + geoData.put("isp_organization_name", ispOrganization); } } case MOBILE_COUNTRY_CODE -> { @@ -660,7 +660,7 @@ private Map retrieveIspGeoData(GeoIpDatabase geoIpDatabase, Inet } case ISP_ORGANIZATION_NAME -> { if (ispOrganization != null) { - geoData.put("isp_organization", ispOrganization); + geoData.put("isp_organization_name", ispOrganization); } } case MOBILE_COUNTRY_CODE -> { diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpTaskState.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpTaskState.java index d55f517b46e24..93dc345a80a2f 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpTaskState.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpTaskState.java @@ -42,6 +42,10 @@ class GeoIpTaskState implements PersistentTaskState, VersionedNamedWriteable { + private static boolean includeSha256(TransportVersion version) { + return version.onOrAfter(TransportVersions.ENTERPRISE_GEOIP_DOWNLOADER_BACKPORT_8_15); + } + private static final ParseField DATABASES = new ParseField("databases"); static final GeoIpTaskState EMPTY = new GeoIpTaskState(Map.of()); @@ -71,7 +75,16 @@ public static GeoIpTaskState fromXContent(XContentParser parser) throws IOExcept } GeoIpTaskState(StreamInput input) throws IOException { - databases = input.readImmutableMap(in -> new Metadata(in.readLong(), in.readVInt(), in.readVInt(), in.readString(), in.readLong())); + databases = input.readImmutableMap( + in -> new Metadata( + in.readLong(), + in.readVInt(), + in.readVInt(), + in.readString(), + in.readLong(), + includeSha256(in.getTransportVersion()) ? input.readOptionalString() : null + ) + ); } public GeoIpTaskState put(String name, Metadata metadata) { @@ -84,14 +97,6 @@ public Map getDatabases() { return databases; } - public boolean contains(String name) { - return databases.containsKey(name); - } - - public Metadata get(String name) { - return databases.get(name); - } - @Override public boolean equals(Object o) { if (this == o) return true; @@ -137,17 +142,29 @@ public void writeTo(StreamOutput out) throws IOException { o.writeVInt(v.lastChunk); o.writeString(v.md5); o.writeLong(v.lastCheck); + if (includeSha256(o.getTransportVersion())) { + o.writeOptionalString(v.sha256); + } }); } - record Metadata(long lastUpdate, int firstChunk, int lastChunk, String md5, long lastCheck) implements ToXContentObject { + record Metadata(long lastUpdate, int firstChunk, int lastChunk, String md5, long lastCheck, @Nullable String sha256) + implements + ToXContentObject { - static final String NAME = GEOIP_DOWNLOADER + "-metadata"; + /** + * An empty Metadata object useful for getOrDefault -type calls. Crucially, the 'lastChunk' is -1, so it's safe to use + * with logic that says the new firstChunk is the old lastChunk + 1. + */ + static Metadata EMPTY = new Metadata(-1, -1, -1, "", -1, null); + + private static final String NAME = GEOIP_DOWNLOADER + "-metadata"; private static final ParseField LAST_CHECK = new ParseField("last_check"); private static final ParseField LAST_UPDATE = new ParseField("last_update"); private static final ParseField FIRST_CHUNK = new ParseField("first_chunk"); private static final ParseField LAST_CHUNK = new ParseField("last_chunk"); private static final ParseField MD5 = new ParseField("md5"); + private static final ParseField SHA256 = new ParseField("sha256"); private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( NAME, @@ -157,7 +174,8 @@ record Metadata(long lastUpdate, int firstChunk, int lastChunk, String md5, long (int) args[1], (int) args[2], (String) args[3], - (long) (args[4] == null ? args[0] : args[4]) + (long) (args[4] == null ? args[0] : args[4]), + (String) args[5] ) ); @@ -167,6 +185,7 @@ record Metadata(long lastUpdate, int firstChunk, int lastChunk, String md5, long PARSER.declareInt(constructorArg(), LAST_CHUNK); PARSER.declareString(constructorArg(), MD5); PARSER.declareLong(optionalConstructorArg(), LAST_CHECK); + PARSER.declareString(optionalConstructorArg(), SHA256); } public static Metadata fromXContent(XContentParser parser) { @@ -181,11 +200,15 @@ public static Metadata fromXContent(XContentParser parser) { Objects.requireNonNull(md5); } + Metadata(long lastUpdate, int firstChunk, int lastChunk, String md5, long lastCheck) { + this(lastUpdate, firstChunk, lastChunk, md5, lastCheck, null); + } + public boolean isCloseToExpiration() { return Instant.ofEpochMilli(lastCheck).isBefore(Instant.now().minus(25, ChronoUnit.DAYS)); } - public boolean isValid(Settings settings) { + public boolean isNewEnough(Settings settings) { TimeValue valid = settings.getAsTime("ingest.geoip.database_validity", TimeValue.timeValueDays(30)); return Instant.ofEpochMilli(lastCheck).isAfter(Instant.now().minus(valid.getMillis(), ChronoUnit.MILLIS)); } @@ -199,6 +222,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(FIRST_CHUNK.getPreferredName(), firstChunk); builder.field(LAST_CHUNK.getPreferredName(), lastChunk); builder.field(MD5.getPreferredName(), md5); + if (sha256 != null) { // only serialize if not null, for prettiness reasons + builder.field(SHA256.getPreferredName(), sha256); + } } builder.endObject(); return builder; diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/HttpClient.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/HttpClient.java index 8efc4dc2e74bd..2f6bd6ef20fd0 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/HttpClient.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/HttpClient.java @@ -24,6 +24,7 @@ import java.security.AccessController; import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; +import java.util.Arrays; import java.util.Objects; import static java.net.HttpURLConnection.HTTP_MOVED_PERM; @@ -34,6 +35,31 @@ class HttpClient { + /** + * A PasswordAuthenticationHolder is just a wrapper around a PasswordAuthentication to implement AutoCloseable. + * This construction makes it possible to use a PasswordAuthentication in a try-with-resources statement, which + * makes it easier to ensure cleanup of the PasswordAuthentication is performed after it's finished being used. + */ + static final class PasswordAuthenticationHolder implements AutoCloseable { + private PasswordAuthentication auth; + + PasswordAuthenticationHolder(String username, char[] passwordChars) { + this.auth = new PasswordAuthentication(username, passwordChars); // clones the passed-in chars + } + + public PasswordAuthentication get() { + Objects.requireNonNull(auth); + return auth; + } + + @Override + public void close() { + final PasswordAuthentication clear = this.auth; + this.auth = null; // set to null and then clear it + Arrays.fill(clear.getPassword(), '\0'); // zero out the password chars + } + } + // a private sentinel value for representing the idea that there's no auth for some request. // this allows us to have a not-null requirement on the methods that do accept an auth. // if you don't want auth, then don't use those methods. ;) diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadata.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadata.java new file mode 100644 index 0000000000000..b6bfbf94fa8f7 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadata.java @@ -0,0 +1,157 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.TransportVersions; +import org.elasticsearch.cluster.Diff; +import org.elasticsearch.cluster.DiffableUtils; +import org.elasticsearch.cluster.NamedDiff; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.common.collect.Iterators; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ChunkedToXContentHelper; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.ToXContent; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.util.EnumSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.function.Function; +import java.util.stream.Collectors; + +/** + * Holds the ingest-geoip databases that are available in the cluster state. + */ +public final class IngestGeoIpMetadata implements Metadata.Custom { + + public static final String TYPE = "ingest_geoip"; + private static final ParseField DATABASES_FIELD = new ParseField("databases"); + + public static final IngestGeoIpMetadata EMPTY = new IngestGeoIpMetadata(Map.of()); + + @SuppressWarnings("unchecked") + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + "ingest_geoip_metadata", + a -> new IngestGeoIpMetadata( + ((List) a[0]).stream().collect(Collectors.toMap((m) -> m.database().id(), Function.identity())) + ) + ); + static { + PARSER.declareNamedObjects(ConstructingObjectParser.constructorArg(), (p, c, n) -> DatabaseConfigurationMetadata.parse(p, n), v -> { + throw new IllegalArgumentException("ordered " + DATABASES_FIELD.getPreferredName() + " are not supported"); + }, DATABASES_FIELD); + } + + private final Map databases; + + public IngestGeoIpMetadata(Map databases) { + this.databases = Map.copyOf(databases); + } + + @Override + public String getWriteableName() { + return TYPE; + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersions.ENTERPRISE_GEOIP_DOWNLOADER_BACKPORT_8_15; + } + + public Map getDatabases() { + return databases; + } + + public IngestGeoIpMetadata(StreamInput in) throws IOException { + this.databases = in.readMap(StreamInput::readString, DatabaseConfigurationMetadata::new); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeMap(databases, StreamOutput::writeWriteable); + } + + public static IngestGeoIpMetadata fromXContent(XContentParser parser) throws IOException { + return PARSER.parse(parser, null); + } + + @Override + public Iterator toXContentChunked(ToXContent.Params ignored) { + return Iterators.concat(ChunkedToXContentHelper.xContentValuesMap(DATABASES_FIELD.getPreferredName(), databases)); + } + + @Override + public EnumSet context() { + return Metadata.ALL_CONTEXTS; + } + + @Override + public Diff diff(Metadata.Custom before) { + return new GeoIpMetadataDiff((IngestGeoIpMetadata) before, this); + } + + static class GeoIpMetadataDiff implements NamedDiff { + + final Diff> databases; + + GeoIpMetadataDiff(IngestGeoIpMetadata before, IngestGeoIpMetadata after) { + this.databases = DiffableUtils.diff(before.databases, after.databases, DiffableUtils.getStringKeySerializer()); + } + + GeoIpMetadataDiff(StreamInput in) throws IOException { + databases = DiffableUtils.readJdkMapDiff( + in, + DiffableUtils.getStringKeySerializer(), + DatabaseConfigurationMetadata::new, + DatabaseConfigurationMetadata::readDiffFrom + ); + } + + @Override + public Metadata.Custom apply(Metadata.Custom part) { + return new IngestGeoIpMetadata(databases.apply(((IngestGeoIpMetadata) part).databases)); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + databases.writeTo(out); + } + + @Override + public String getWriteableName() { + return TYPE; + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersions.ENTERPRISE_GEOIP_DOWNLOADER_BACKPORT_8_15; + } + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + IngestGeoIpMetadata that = (IngestGeoIpMetadata) o; + return Objects.equals(databases, that.databases); + } + + @Override + public int hashCode() { + return Objects.hash(databases); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java index 9d0f9848d97b6..e606688ad60a0 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java @@ -12,8 +12,10 @@ import org.elasticsearch.action.ActionRequest; import org.elasticsearch.action.ActionResponse; import org.elasticsearch.client.internal.Client; +import org.elasticsearch.cluster.NamedDiff; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; @@ -25,8 +27,18 @@ import org.elasticsearch.common.settings.SettingsModule; import org.elasticsearch.features.NodeFeature; import org.elasticsearch.indices.SystemIndexDescriptor; +import org.elasticsearch.ingest.EnterpriseGeoIpTask.EnterpriseGeoIpTaskParams; import org.elasticsearch.ingest.IngestService; import org.elasticsearch.ingest.Processor; +import org.elasticsearch.ingest.geoip.direct.DeleteDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.GetDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.PutDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.RestDeleteDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.RestGetDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.RestPutDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.TransportDeleteDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.TransportGetDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.TransportPutDatabaseConfigurationAction; import org.elasticsearch.ingest.geoip.stats.GeoIpDownloaderStats; import org.elasticsearch.ingest.geoip.stats.GeoIpStatsAction; import org.elasticsearch.ingest.geoip.stats.GeoIpStatsTransportAction; @@ -38,6 +50,7 @@ import org.elasticsearch.plugins.IngestPlugin; import org.elasticsearch.plugins.PersistentTaskPlugin; import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.plugins.ReloadablePlugin; import org.elasticsearch.plugins.SystemIndexPlugin; import org.elasticsearch.rest.RestController; import org.elasticsearch.rest.RestHandler; @@ -57,13 +70,21 @@ import java.util.function.Supplier; import static org.elasticsearch.index.mapper.MapperService.SINGLE_MAPPING_NAME; +import static org.elasticsearch.ingest.EnterpriseGeoIpTask.ENTERPRISE_GEOIP_DOWNLOADER; import static org.elasticsearch.ingest.IngestService.INGEST_ORIGIN; import static org.elasticsearch.ingest.geoip.GeoIpDownloader.DATABASES_INDEX; import static org.elasticsearch.ingest.geoip.GeoIpDownloader.DATABASES_INDEX_PATTERN; import static org.elasticsearch.ingest.geoip.GeoIpDownloader.GEOIP_DOWNLOADER; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; -public class IngestGeoIpPlugin extends Plugin implements IngestPlugin, SystemIndexPlugin, Closeable, PersistentTaskPlugin, ActionPlugin { +public class IngestGeoIpPlugin extends Plugin + implements + IngestPlugin, + SystemIndexPlugin, + Closeable, + PersistentTaskPlugin, + ActionPlugin, + ReloadablePlugin { public static final Setting CACHE_SIZE = Setting.longSetting("ingest.geoip.cache_size", 1000, 0, Setting.Property.NodeScope); private static final int GEOIP_INDEX_MAPPINGS_VERSION = 1; /** @@ -78,6 +99,7 @@ public class IngestGeoIpPlugin extends Plugin implements IngestPlugin, SystemInd private final SetOnce ingestService = new SetOnce<>(); private final SetOnce databaseRegistry = new SetOnce<>(); private GeoIpDownloaderTaskExecutor geoIpDownloaderTaskExecutor; + private EnterpriseGeoIpDownloaderTaskExecutor enterpriseGeoIpDownloaderTaskExecutor; @Override public List> getSettings() { @@ -86,7 +108,8 @@ public List> getSettings() { GeoIpDownloaderTaskExecutor.EAGER_DOWNLOAD_SETTING, GeoIpDownloaderTaskExecutor.ENABLED_SETTING, GeoIpDownloader.ENDPOINT_SETTING, - GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING + GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING, + EnterpriseGeoIpDownloaderTaskExecutor.MAXMIND_LICENSE_KEY_SETTING ); } @@ -123,7 +146,16 @@ public Collection createComponents(PluginServices services) { services.threadPool() ); geoIpDownloaderTaskExecutor.init(); - return List.of(databaseRegistry.get(), geoIpDownloaderTaskExecutor); + + enterpriseGeoIpDownloaderTaskExecutor = new EnterpriseGeoIpDownloaderTaskExecutor( + services.client(), + new HttpClient(), + services.clusterService(), + services.threadPool() + ); + enterpriseGeoIpDownloaderTaskExecutor.init(); + + return List.of(databaseRegistry.get(), geoIpDownloaderTaskExecutor, enterpriseGeoIpDownloaderTaskExecutor); } @Override @@ -139,12 +171,17 @@ public List> getPersistentTasksExecutor( SettingsModule settingsModule, IndexNameExpressionResolver expressionResolver ) { - return List.of(geoIpDownloaderTaskExecutor); + return List.of(geoIpDownloaderTaskExecutor, enterpriseGeoIpDownloaderTaskExecutor); } @Override public List> getActions() { - return List.of(new ActionHandler<>(GeoIpStatsAction.INSTANCE, GeoIpStatsTransportAction.class)); + return List.of( + new ActionHandler<>(GeoIpStatsAction.INSTANCE, GeoIpStatsTransportAction.class), + new ActionHandler<>(GetDatabaseConfigurationAction.INSTANCE, TransportGetDatabaseConfigurationAction.class), + new ActionHandler<>(DeleteDatabaseConfigurationAction.INSTANCE, TransportDeleteDatabaseConfigurationAction.class), + new ActionHandler<>(PutDatabaseConfigurationAction.INSTANCE, TransportPutDatabaseConfigurationAction.class) + ); } @Override @@ -159,22 +196,41 @@ public List getRestHandlers( Supplier nodesInCluster, Predicate clusterSupportsFeature ) { - return List.of(new RestGeoIpStatsAction()); + return List.of( + new RestGeoIpStatsAction(), + new RestGetDatabaseConfigurationAction(), + new RestDeleteDatabaseConfigurationAction(), + new RestPutDatabaseConfigurationAction() + ); } @Override public List getNamedXContent() { return List.of( new NamedXContentRegistry.Entry(PersistentTaskParams.class, new ParseField(GEOIP_DOWNLOADER), GeoIpTaskParams::fromXContent), - new NamedXContentRegistry.Entry(PersistentTaskState.class, new ParseField(GEOIP_DOWNLOADER), GeoIpTaskState::fromXContent) + new NamedXContentRegistry.Entry(PersistentTaskState.class, new ParseField(GEOIP_DOWNLOADER), GeoIpTaskState::fromXContent), + new NamedXContentRegistry.Entry( + PersistentTaskParams.class, + new ParseField(ENTERPRISE_GEOIP_DOWNLOADER), + EnterpriseGeoIpTaskParams::fromXContent + ), + new NamedXContentRegistry.Entry( + PersistentTaskState.class, + new ParseField(ENTERPRISE_GEOIP_DOWNLOADER), + EnterpriseGeoIpTaskState::fromXContent + ) ); } @Override public List getNamedWriteables() { return List.of( + new NamedWriteableRegistry.Entry(Metadata.Custom.class, IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata::new), + new NamedWriteableRegistry.Entry(NamedDiff.class, IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.GeoIpMetadataDiff::new), new NamedWriteableRegistry.Entry(PersistentTaskState.class, GEOIP_DOWNLOADER, GeoIpTaskState::new), new NamedWriteableRegistry.Entry(PersistentTaskParams.class, GEOIP_DOWNLOADER, GeoIpTaskParams::new), + new NamedWriteableRegistry.Entry(PersistentTaskState.class, ENTERPRISE_GEOIP_DOWNLOADER, EnterpriseGeoIpTaskState::new), + new NamedWriteableRegistry.Entry(PersistentTaskParams.class, ENTERPRISE_GEOIP_DOWNLOADER, EnterpriseGeoIpTaskParams::new), new NamedWriteableRegistry.Entry(Task.Status.class, GEOIP_DOWNLOADER, GeoIpDownloaderStats::new) ); } @@ -235,4 +291,9 @@ private static XContentBuilder mappings() { throw new UncheckedIOException("Failed to build mappings for " + DATABASES_INDEX, e); } } + + @Override + public void reload(Settings settings) { + enterpriseGeoIpDownloaderTaskExecutor.reload(settings); + } } diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfiguration.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfiguration.java new file mode 100644 index 0000000000000..0a43d7a2d830b --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfiguration.java @@ -0,0 +1,209 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.cluster.metadata.MetadataCreateIndexService; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Objects; +import java.util.Set; +import java.util.regex.Pattern; + +/** + * A database configuration is an identified (has an id) configuration of a named geoip location database to download, + * and the identifying information/configuration to download the named database from some database provider. + *

+ * That is, it has an id e.g. "my_db_config_1" and it says "download the file named XXXX from SomeCompany, and here's the + * magic token to use to do that." + */ +public record DatabaseConfiguration(String id, String name, Maxmind maxmind) implements Writeable, ToXContentObject { + + // id is a user selected signifier like 'my_domain_db' + // name is the name of a file that can be downloaded (like 'GeoIP2-Domain') + + // a configuration will have a 'type' like "maxmind", and that might have some more details, + // for now, though the important thing is that the json has to have it even though we don't model it meaningfully in this class + + public DatabaseConfiguration { + // these are invariants, not actual validation + Objects.requireNonNull(id); + Objects.requireNonNull(name); + Objects.requireNonNull(maxmind); + } + + /** + * An alphanumeric, followed by 0-126 alphanumerics, dashes, or underscores. That is, 1-127 alphanumerics, dashes, or underscores, + * but a leading dash or underscore isn't allowed (we're reserving leading dashes and underscores [and other odd characters] for + * Elastic and the future). + */ + private static final Pattern ID_PATTERN = Pattern.compile("\\p{Alnum}[_\\-\\p{Alnum}]{0,126}"); + + public static final Set MAXMIND_NAMES = Set.of( + "GeoIP2-Anonymous-IP", + "GeoIP2-City", + "GeoIP2-Connection-Type", + "GeoIP2-Country", + "GeoIP2-Domain", + "GeoIP2-Enterprise", + "GeoIP2-ISP" + + // in order to prevent a conflict between the (ordinary) geoip downloader and the enterprise geoip downloader, + // the enterprise geoip downloader is limited only to downloading the commercial files that the (ordinary) geoip downloader + // doesn't support out of the box -- in the future if we would like to relax this constraint, then we'll need to resolve that + // conflict at the same time. + + // "GeoLite2-ASN", + // "GeoLite2-City", + // "GeoLite2-Country" + ); + + private static final ParseField NAME = new ParseField("name"); + private static final ParseField MAXMIND = new ParseField("maxmind"); + + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + "database", + false, + (a, id) -> { + String name = (String) a[0]; + Maxmind maxmind = (Maxmind) a[1]; + return new DatabaseConfiguration(id, name, maxmind); + } + ); + + static { + PARSER.declareString(ConstructingObjectParser.constructorArg(), NAME); + PARSER.declareObject(ConstructingObjectParser.constructorArg(), (parser, id) -> Maxmind.PARSER.apply(parser, null), MAXMIND); + } + + public DatabaseConfiguration(StreamInput in) throws IOException { + this(in.readString(), in.readString(), new Maxmind(in)); + } + + public static DatabaseConfiguration parse(XContentParser parser, String id) { + return PARSER.apply(parser, id); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(id); + out.writeString(name); + maxmind.writeTo(out); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field("name", name); + builder.field("maxmind", maxmind); + builder.endObject(); + return builder; + } + + /** + * An id is intended to be alphanumerics, dashes, and underscores (only), but we're reserving leading dashes and underscores for + * ourselves in the future, that is, they're not for the ones that users can PUT. + */ + static void validateId(String id) throws IllegalArgumentException { + if (Strings.isNullOrEmpty(id)) { + throw new IllegalArgumentException("invalid database configuration id [" + id + "]: must not be null or empty"); + } + MetadataCreateIndexService.validateIndexOrAliasName( + id, + (id1, description) -> new IllegalArgumentException("invalid database configuration id [" + id1 + "]: " + description) + ); + int byteCount = id.getBytes(StandardCharsets.UTF_8).length; + if (byteCount > 127) { + throw new IllegalArgumentException( + "invalid database configuration id [" + id + "]: id is too long, (" + byteCount + " > " + 127 + ")" + ); + } + if (ID_PATTERN.matcher(id).matches() == false) { + throw new IllegalArgumentException( + "invalid database configuration id [" + + id + + "]: id doesn't match required rules (alphanumerics, dashes, and underscores, only)" + ); + } + } + + public ActionRequestValidationException validate() { + ActionRequestValidationException err = new ActionRequestValidationException(); + + // how do we cross the id validation divide here? or do we? it seems unfortunate to not invoke it at all. + + // name validation + if (Strings.hasText(name) == false) { + err.addValidationError("invalid name [" + name + "]: cannot be empty"); + } + + if (MAXMIND_NAMES.contains(name) == false) { + err.addValidationError("invalid name [" + name + "]: must be a supported name ([" + MAXMIND_NAMES + "])"); + } + + // important: the name must be unique across all configurations of this same type, + // but we validate that in the cluster state update, not here. + try { + validateId(id); + } catch (IllegalArgumentException e) { + err.addValidationError(e.getMessage()); + } + return err.validationErrors().isEmpty() ? null : err; + } + + public record Maxmind(String accountId) implements Writeable, ToXContentObject { + + public Maxmind { + // this is an invariant, not actual validation + Objects.requireNonNull(accountId); + } + + private static final ParseField ACCOUNT_ID = new ParseField("account_id"); + + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("database", false, (a, id) -> { + String accountId = (String) a[0]; + return new Maxmind(accountId); + }); + + static { + PARSER.declareString(ConstructingObjectParser.constructorArg(), ACCOUNT_ID); + } + + public Maxmind(StreamInput in) throws IOException { + this(in.readString()); + } + + public static Maxmind parse(XContentParser parser) { + return PARSER.apply(parser, null); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(accountId); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field("account_id", accountId); + builder.endObject(); + return builder; + } + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadata.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadata.java new file mode 100644 index 0000000000000..574f97e4c5e64 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadata.java @@ -0,0 +1,84 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.cluster.Diff; +import org.elasticsearch.cluster.SimpleDiffable; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; + +/** + * {@code DatabaseConfigurationMetadata} encapsulates a {@link DatabaseConfiguration} as well as + * the additional meta information like version (a monotonically incrementing number), and last modified date. + */ +public record DatabaseConfigurationMetadata(DatabaseConfiguration database, long version, long modifiedDate) + implements + SimpleDiffable, + ToXContentObject { + + public static final ParseField DATABASE = new ParseField("database"); + public static final ParseField VERSION = new ParseField("version"); + public static final ParseField MODIFIED_DATE_MILLIS = new ParseField("modified_date_millis"); + public static final ParseField MODIFIED_DATE = new ParseField("modified_date"); + // later, things like this: + // static final ParseField LAST_SUCCESS = new ParseField("last_success"); + // static final ParseField LAST_FAILURE = new ParseField("last_failure"); + + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + "database_metadata", + true, + a -> { + DatabaseConfiguration database = (DatabaseConfiguration) a[0]; + return new DatabaseConfigurationMetadata(database, (long) a[1], (long) a[2]); + } + ); + static { + PARSER.declareObject(ConstructingObjectParser.constructorArg(), DatabaseConfiguration::parse, DATABASE); + PARSER.declareLong(ConstructingObjectParser.constructorArg(), VERSION); + PARSER.declareLong(ConstructingObjectParser.constructorArg(), MODIFIED_DATE_MILLIS); + } + + public static DatabaseConfigurationMetadata parse(XContentParser parser, String name) { + return PARSER.apply(parser, name); + } + + public DatabaseConfigurationMetadata(StreamInput in) throws IOException { + this(new DatabaseConfiguration(in), in.readVLong(), in.readVLong()); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + // this is cluster state serialization, the id is implicit and doesn't need to included here + // (we'll be a in a json map where the id is the key) + builder.startObject(); + builder.field(VERSION.getPreferredName(), version); + builder.timeField(MODIFIED_DATE_MILLIS.getPreferredName(), MODIFIED_DATE.getPreferredName(), modifiedDate); + builder.field(DATABASE.getPreferredName(), database); + builder.endObject(); + return builder; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + database.writeTo(out); + out.writeVLong(version); + out.writeVLong(modifiedDate); + } + + public static Diff readDiffFrom(StreamInput in) throws IOException { + return SimpleDiffable.readDiffFrom(DatabaseConfigurationMetadata::new, in); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DeleteDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DeleteDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..843cc986c47e7 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DeleteDatabaseConfigurationAction.java @@ -0,0 +1,70 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.support.master.AcknowledgedRequest; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.TimeValue; + +import java.io.IOException; +import java.util.Objects; + +public class DeleteDatabaseConfigurationAction extends ActionType { + public static final DeleteDatabaseConfigurationAction INSTANCE = new DeleteDatabaseConfigurationAction(); + public static final String NAME = "cluster:admin/ingest/geoip/database/delete"; + + protected DeleteDatabaseConfigurationAction() { + super(NAME); + } + + public static class Request extends AcknowledgedRequest { + + private final String databaseId; + + public Request(StreamInput in) throws IOException { + super(in); + databaseId = in.readString(); + } + + public Request(TimeValue masterNodeTimeout, TimeValue ackTimeout, String databaseId) { + super(masterNodeTimeout, ackTimeout); + this.databaseId = Objects.requireNonNull(databaseId, "id may not be null"); + } + + public String getDatabaseId() { + return this.databaseId; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeString(databaseId); + } + + @Override + public int hashCode() { + return databaseId.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (obj.getClass() != getClass()) { + return false; + } + Request other = (Request) obj; + return Objects.equals(databaseId, other.databaseId); + } + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/GetDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/GetDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..546c0c2df821d --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/GetDatabaseConfigurationAction.java @@ -0,0 +1,142 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.support.master.AcknowledgedRequest; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata.DATABASE; +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata.MODIFIED_DATE; +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata.MODIFIED_DATE_MILLIS; +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata.VERSION; + +public class GetDatabaseConfigurationAction extends ActionType { + public static final GetDatabaseConfigurationAction INSTANCE = new GetDatabaseConfigurationAction(); + public static final String NAME = "cluster:admin/ingest/geoip/database/get"; + + protected GetDatabaseConfigurationAction() { + super(NAME); + } + + public static class Request extends AcknowledgedRequest { + + private final String[] databaseIds; + + public Request(TimeValue masterNodeTimeout, TimeValue ackTimeout, String... databaseIds) { + super(masterNodeTimeout, ackTimeout); + this.databaseIds = Objects.requireNonNull(databaseIds, "ids may not be null"); + } + + public Request(StreamInput in) throws IOException { + super(in); + databaseIds = in.readStringArray(); + } + + public String[] getDatabaseIds() { + return this.databaseIds; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeStringArray(databaseIds); + } + + @Override + public int hashCode() { + return Arrays.hashCode(databaseIds); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (obj.getClass() != getClass()) { + return false; + } + Request other = (Request) obj; + return Arrays.equals(databaseIds, other.databaseIds); + } + } + + public static class Response extends ActionResponse implements ToXContentObject { + + private final List databases; + + public Response(List databases) { + this.databases = List.copyOf(databases); // defensive copy + } + + public Response(StreamInput in) throws IOException { + this(in.readCollectionAsList(DatabaseConfigurationMetadata::new)); + } + + public List getDatabases() { + return this.databases; + } + + @Override + public String toString() { + return Strings.toString(this); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.startArray("databases"); + for (DatabaseConfigurationMetadata item : databases) { + DatabaseConfiguration database = item.database(); + builder.startObject(); + builder.field("id", database.id()); // serialize including the id -- this is get response serialization + builder.field(VERSION.getPreferredName(), item.version()); + builder.timeField(MODIFIED_DATE_MILLIS.getPreferredName(), MODIFIED_DATE.getPreferredName(), item.modifiedDate()); + builder.field(DATABASE.getPreferredName(), database); + builder.endObject(); + } + builder.endArray(); + builder.endObject(); + return builder; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeCollection(databases); + } + + @Override + public int hashCode() { + return Objects.hash(databases); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (obj.getClass() != getClass()) { + return false; + } + Response other = (Response) obj; + return databases.equals(other.databases); + } + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/PutDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/PutDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..7bd5e1fa5cc68 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/PutDatabaseConfigurationAction.java @@ -0,0 +1,87 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.support.master.AcknowledgedRequest; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.util.Objects; + +public class PutDatabaseConfigurationAction extends ActionType { + public static final PutDatabaseConfigurationAction INSTANCE = new PutDatabaseConfigurationAction(); + public static final String NAME = "cluster:admin/ingest/geoip/database/put"; + + protected PutDatabaseConfigurationAction() { + super(NAME); + } + + public static class Request extends AcknowledgedRequest { + + private final DatabaseConfiguration database; + + public Request(TimeValue masterNodeTimeout, TimeValue ackTimeout, DatabaseConfiguration database) { + super(masterNodeTimeout, ackTimeout); + this.database = database; + } + + public Request(StreamInput in) throws IOException { + super(in); + database = new DatabaseConfiguration(in); + } + + public DatabaseConfiguration getDatabase() { + return this.database; + } + + public static Request parseRequest(TimeValue masterNodeTimeout, TimeValue ackTimeout, String id, XContentParser parser) { + return new Request(masterNodeTimeout, ackTimeout, DatabaseConfiguration.parse(parser, id)); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + database.writeTo(out); + } + + @Override + public ActionRequestValidationException validate() { + return database.validate(); + } + + @Override + public int hashCode() { + return Objects.hash(database); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (obj.getClass() != getClass()) { + return false; + } + Request other = (Request) obj; + return database.equals(other.database); + } + + @Override + public String toString() { + return Strings.toString((b, p) -> b.field(database.id(), database)); + } + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestDeleteDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestDeleteDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..4dc263224ad0a --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestDeleteDatabaseConfigurationAction.java @@ -0,0 +1,46 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.rest.BaseRestHandler; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.Scope; +import org.elasticsearch.rest.ServerlessScope; +import org.elasticsearch.rest.action.RestToXContentListener; + +import java.util.List; + +import static org.elasticsearch.rest.RestRequest.Method.DELETE; +import static org.elasticsearch.rest.RestUtils.getAckTimeout; +import static org.elasticsearch.rest.RestUtils.getMasterNodeTimeout; + +@ServerlessScope(Scope.INTERNAL) +public class RestDeleteDatabaseConfigurationAction extends BaseRestHandler { + + @Override + public List routes() { + return List.of(new Route(DELETE, "/_ingest/geoip/database/{id}")); + } + + @Override + public String getName() { + return "geoip_delete_database_configuration"; + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) { + final var req = new DeleteDatabaseConfigurationAction.Request( + getMasterNodeTimeout(request), + getAckTimeout(request), + request.param("id") + ); + return channel -> client.execute(DeleteDatabaseConfigurationAction.INSTANCE, req, new RestToXContentListener<>(channel)); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestGetDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestGetDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..b237ceb638918 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestGetDatabaseConfigurationAction.java @@ -0,0 +1,47 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.common.Strings; +import org.elasticsearch.rest.BaseRestHandler; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.Scope; +import org.elasticsearch.rest.ServerlessScope; +import org.elasticsearch.rest.action.RestToXContentListener; + +import java.util.List; + +import static org.elasticsearch.rest.RestRequest.Method.GET; +import static org.elasticsearch.rest.RestUtils.getAckTimeout; +import static org.elasticsearch.rest.RestUtils.getMasterNodeTimeout; + +@ServerlessScope(Scope.INTERNAL) +public class RestGetDatabaseConfigurationAction extends BaseRestHandler { + + @Override + public List routes() { + return List.of(new Route(GET, "/_ingest/geoip/database"), new Route(GET, "/_ingest/geoip/database/{id}")); + } + + @Override + public String getName() { + return "geoip_get_database_configuration"; + } + + @Override + protected RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) { + final var req = new GetDatabaseConfigurationAction.Request( + getMasterNodeTimeout(request), + getAckTimeout(request), + Strings.splitStringByCommaToArray(request.param("id")) + ); + return channel -> client.execute(GetDatabaseConfigurationAction.INSTANCE, req, new RestToXContentListener<>(channel)); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestPutDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestPutDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..62b01b930d5cd --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestPutDatabaseConfigurationAction.java @@ -0,0 +1,52 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.ingest.geoip.direct.PutDatabaseConfigurationAction.Request; +import org.elasticsearch.rest.BaseRestHandler; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.Scope; +import org.elasticsearch.rest.ServerlessScope; +import org.elasticsearch.rest.action.RestToXContentListener; + +import java.io.IOException; +import java.util.List; + +import static org.elasticsearch.rest.RestRequest.Method.PUT; +import static org.elasticsearch.rest.RestUtils.getAckTimeout; +import static org.elasticsearch.rest.RestUtils.getMasterNodeTimeout; + +@ServerlessScope(Scope.INTERNAL) +public class RestPutDatabaseConfigurationAction extends BaseRestHandler { + + @Override + public List routes() { + return List.of(new Route(PUT, "/_ingest/geoip/database/{id}")); + } + + @Override + public String getName() { + return "geoip_put_database_configuration"; + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException { + final Request req; + try (var parser = request.contentParser()) { + req = PutDatabaseConfigurationAction.Request.parseRequest( + getMasterNodeTimeout(request), + getAckTimeout(request), + request.param("id"), + parser + ); + } + return channel -> client.execute(PutDatabaseConfigurationAction.INSTANCE, req, new RestToXContentListener<>(channel)); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportDeleteDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportDeleteDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..43aacee956279 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportDeleteDatabaseConfigurationAction.java @@ -0,0 +1,128 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ResourceNotFoundException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.action.support.master.TransportMasterNodeAction; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.ClusterStateTaskListener; +import org.elasticsearch.cluster.SimpleBatchedExecutor; +import org.elasticsearch.cluster.block.ClusterBlockException; +import org.elasticsearch.cluster.block.ClusterBlockLevel; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.cluster.service.MasterServiceTaskQueue; +import org.elasticsearch.common.Priority; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.core.Strings; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.ingest.geoip.IngestGeoIpMetadata; +import org.elasticsearch.ingest.geoip.direct.DeleteDatabaseConfigurationAction.Request; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; + +import java.util.HashMap; +import java.util.Map; + +public class TransportDeleteDatabaseConfigurationAction extends TransportMasterNodeAction { + + private static final Logger logger = LogManager.getLogger(TransportDeleteDatabaseConfigurationAction.class); + + private static final SimpleBatchedExecutor DELETE_TASK_EXECUTOR = new SimpleBatchedExecutor<>() { + @Override + public Tuple executeTask(DeleteDatabaseConfigurationTask task, ClusterState clusterState) throws Exception { + return Tuple.tuple(task.execute(clusterState), null); + } + + @Override + public void taskSucceeded(DeleteDatabaseConfigurationTask task, Void unused) { + logger.trace("Updated cluster state for deletion of database configuration [{}]", task.databaseId); + task.listener.onResponse(AcknowledgedResponse.TRUE); + } + }; + + private final MasterServiceTaskQueue deleteDatabaseConfigurationTaskQueue; + + @Inject + public TransportDeleteDatabaseConfigurationAction( + TransportService transportService, + ClusterService clusterService, + ThreadPool threadPool, + ActionFilters actionFilters, + IndexNameExpressionResolver indexNameExpressionResolver + ) { + super( + DeleteDatabaseConfigurationAction.NAME, + transportService, + clusterService, + threadPool, + actionFilters, + Request::new, + indexNameExpressionResolver, + AcknowledgedResponse::readFrom, + EsExecutors.DIRECT_EXECUTOR_SERVICE + ); + this.deleteDatabaseConfigurationTaskQueue = clusterService.createTaskQueue( + "delete-geoip-database-configuration-state-update", + Priority.NORMAL, + DELETE_TASK_EXECUTOR + ); + } + + @Override + protected void masterOperation(Task task, Request request, ClusterState state, ActionListener listener) + throws Exception { + final String id = request.getDatabaseId(); + final IngestGeoIpMetadata geoIpMeta = state.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); + if (geoIpMeta.getDatabases().containsKey(id) == false) { + throw new ResourceNotFoundException("Database configuration not found: {}", id); + } + deleteDatabaseConfigurationTaskQueue.submitTask( + Strings.format("delete-geoip-database-configuration-[%s]", id), + new DeleteDatabaseConfigurationTask(listener, id), + null + ); + } + + private record DeleteDatabaseConfigurationTask(ActionListener listener, String databaseId) + implements + ClusterStateTaskListener { + + ClusterState execute(ClusterState currentState) throws Exception { + final IngestGeoIpMetadata geoIpMeta = currentState.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); + + logger.debug("deleting database configuration [{}]", databaseId); + Map databases = new HashMap<>(geoIpMeta.getDatabases()); + databases.remove(databaseId); + + Metadata currentMeta = currentState.metadata(); + return ClusterState.builder(currentState) + .metadata(Metadata.builder(currentMeta).putCustom(IngestGeoIpMetadata.TYPE, new IngestGeoIpMetadata(databases))) + .build(); + } + + @Override + public void onFailure(Exception e) { + listener.onFailure(e); + } + } + + @Override + protected ClusterBlockException checkBlock(Request request, ClusterState state) { + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_WRITE); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..a14a143e3f404 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationAction.java @@ -0,0 +1,109 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.ResourceNotFoundException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.master.TransportMasterNodeAction; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.block.ClusterBlockException; +import org.elasticsearch.cluster.block.ClusterBlockLevel; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.regex.Regex; +import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.ingest.geoip.IngestGeoIpMetadata; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class TransportGetDatabaseConfigurationAction extends TransportMasterNodeAction< + GetDatabaseConfigurationAction.Request, + GetDatabaseConfigurationAction.Response> { + + @Inject + public TransportGetDatabaseConfigurationAction( + TransportService transportService, + ClusterService clusterService, + ThreadPool threadPool, + ActionFilters actionFilters, + IndexNameExpressionResolver indexNameExpressionResolver + ) { + super( + GetDatabaseConfigurationAction.NAME, + transportService, + clusterService, + threadPool, + actionFilters, + GetDatabaseConfigurationAction.Request::new, + indexNameExpressionResolver, + GetDatabaseConfigurationAction.Response::new, + EsExecutors.DIRECT_EXECUTOR_SERVICE + ); + } + + @Override + protected void masterOperation( + final Task task, + final GetDatabaseConfigurationAction.Request request, + final ClusterState state, + final ActionListener listener + ) { + final Set ids; + if (request.getDatabaseIds().length == 0) { + // if we did not ask for a specific name, then return all databases + ids = Set.of("*"); + } else { + ids = new LinkedHashSet<>(Arrays.asList(request.getDatabaseIds())); + } + + if (ids.size() > 1 && ids.stream().anyMatch(Regex::isSimpleMatchPattern)) { + throw new IllegalArgumentException( + "wildcard only supports a single value, please use comma-separated values or a single wildcard value" + ); + } + + final IngestGeoIpMetadata geoIpMeta = state.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); + List results = new ArrayList<>(); + + for (String id : ids) { + if (Regex.isSimpleMatchPattern(id)) { + for (Map.Entry entry : geoIpMeta.getDatabases().entrySet()) { + if (Regex.simpleMatch(id, entry.getKey())) { + results.add(entry.getValue()); + } + } + } else { + DatabaseConfigurationMetadata meta = geoIpMeta.getDatabases().get(id); + if (meta == null) { + listener.onFailure(new ResourceNotFoundException("database configuration not found: {}", id)); + return; + } else { + results.add(meta); + } + } + } + + listener.onResponse(new GetDatabaseConfigurationAction.Response(results)); + } + + @Override + protected ClusterBlockException checkBlock(GetDatabaseConfigurationAction.Request request, ClusterState state) { + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_READ); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportPutDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportPutDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..540be68671d38 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportPutDatabaseConfigurationAction.java @@ -0,0 +1,178 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.action.support.master.TransportMasterNodeAction; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.ClusterStateTaskListener; +import org.elasticsearch.cluster.SimpleBatchedExecutor; +import org.elasticsearch.cluster.block.ClusterBlockException; +import org.elasticsearch.cluster.block.ClusterBlockLevel; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.cluster.service.MasterServiceTaskQueue; +import org.elasticsearch.common.Priority; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.Strings; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.ingest.geoip.IngestGeoIpMetadata; +import org.elasticsearch.ingest.geoip.direct.PutDatabaseConfigurationAction.Request; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; + +import java.time.Instant; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +public class TransportPutDatabaseConfigurationAction extends TransportMasterNodeAction { + + private static final Logger logger = LogManager.getLogger(TransportPutDatabaseConfigurationAction.class); + + private static final SimpleBatchedExecutor UPDATE_TASK_EXECUTOR = new SimpleBatchedExecutor<>() { + @Override + public Tuple executeTask(UpdateDatabaseConfigurationTask task, ClusterState clusterState) throws Exception { + return Tuple.tuple(task.execute(clusterState), null); + } + + @Override + public void taskSucceeded(UpdateDatabaseConfigurationTask task, Void unused) { + logger.trace("Updated cluster state for creation-or-update of database configuration [{}]", task.database.id()); + task.listener.onResponse(AcknowledgedResponse.TRUE); + } + }; + + private final MasterServiceTaskQueue updateDatabaseConfigurationTaskQueue; + + @Inject + public TransportPutDatabaseConfigurationAction( + TransportService transportService, + ClusterService clusterService, + ThreadPool threadPool, + ActionFilters actionFilters, + IndexNameExpressionResolver indexNameExpressionResolver + ) { + super( + PutDatabaseConfigurationAction.NAME, + transportService, + clusterService, + threadPool, + actionFilters, + Request::new, + indexNameExpressionResolver, + AcknowledgedResponse::readFrom, + EsExecutors.DIRECT_EXECUTOR_SERVICE + ); + this.updateDatabaseConfigurationTaskQueue = clusterService.createTaskQueue( + "update-geoip-database-configuration-state-update", + Priority.NORMAL, + UPDATE_TASK_EXECUTOR + ); + } + + @Override + protected void masterOperation(Task task, Request request, ClusterState state, ActionListener listener) { + final String id = request.getDatabase().id(); + updateDatabaseConfigurationTaskQueue.submitTask( + Strings.format("update-geoip-database-configuration-[%s]", id), + new UpdateDatabaseConfigurationTask(listener, request.getDatabase()), + null + ); + } + + /** + * Returns 'true' if the database configuration is effectually the same, and thus can be a no-op update. + */ + static boolean isNoopUpdate(@Nullable DatabaseConfigurationMetadata existingDatabase, DatabaseConfiguration newDatabase) { + if (existingDatabase == null) { + return false; + } else { + return newDatabase.equals(existingDatabase.database()); + } + } + + static void validatePrerequisites(DatabaseConfiguration database, ClusterState state) { + // we need to verify that the database represents a unique file (name) among the various databases for this same provider + IngestGeoIpMetadata geoIpMeta = state.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); + + Optional sameName = geoIpMeta.getDatabases() + .values() + .stream() + .map(DatabaseConfigurationMetadata::database) + // .filter(d -> d.type().equals(database.type())) // of the same type (right now the type is always just 'maxmind') + .filter(d -> d.id().equals(database.id()) == false) // and a different id + .filter(d -> d.name().equals(database.name())) // but has the same name! + .findFirst(); + + sameName.ifPresent(d -> { + throw new IllegalArgumentException( + Strings.format("database [%s] is already being downloaded via configuration [%s]", database.name(), d.id()) + ); + }); + } + + private record UpdateDatabaseConfigurationTask(ActionListener listener, DatabaseConfiguration database) + implements + ClusterStateTaskListener { + + ClusterState execute(ClusterState currentState) throws Exception { + IngestGeoIpMetadata geoIpMeta = currentState.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); + + String id = database.id(); + final DatabaseConfigurationMetadata existingDatabase = geoIpMeta.getDatabases().get(id); + // double-check for no-op in the state update task, in case it was changed/reset in the meantime + if (isNoopUpdate(existingDatabase, database)) { + return currentState; + } + + validatePrerequisites(database, currentState); + + Map databases = new HashMap<>(geoIpMeta.getDatabases()); + databases.put( + id, + new DatabaseConfigurationMetadata( + database, + existingDatabase == null ? 1 : existingDatabase.version() + 1, + Instant.now().toEpochMilli() + ) + ); + geoIpMeta = new IngestGeoIpMetadata(databases); + + if (existingDatabase == null) { + logger.debug("adding new database configuration [{}]", id); + } else { + logger.debug("updating existing database configuration [{}]", id); + } + + Metadata currentMeta = currentState.metadata(); + return ClusterState.builder(currentState) + .metadata(Metadata.builder(currentMeta).putCustom(IngestGeoIpMetadata.TYPE, geoIpMeta)) + .build(); + } + + @Override + public void onFailure(Exception e) { + listener.onFailure(e); + } + } + + @Override + protected ClusterBlockException checkBlock(Request request, ClusterState state) { + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_WRITE); + } +} diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTests.java new file mode 100644 index 0000000000000..58cb566165db2 --- /dev/null +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTests.java @@ -0,0 +1,538 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ActionRequest; +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.DocWriteRequest.OpType; +import org.elasticsearch.action.DocWriteResponse; +import org.elasticsearch.action.admin.indices.flush.FlushAction; +import org.elasticsearch.action.admin.indices.flush.FlushRequest; +import org.elasticsearch.action.admin.indices.refresh.RefreshAction; +import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.index.IndexResponse; +import org.elasticsearch.action.index.TransportIndexAction; +import org.elasticsearch.action.support.broadcast.BroadcastResponse; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.block.ClusterBlockException; +import org.elasticsearch.cluster.block.ClusterBlocks; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.hash.MessageDigests; +import org.elasticsearch.common.settings.ClusterSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.ingest.EnterpriseGeoIpTask; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration; +import org.elasticsearch.node.Node; +import org.elasticsearch.persistent.PersistentTasksCustomMetadata; +import org.elasticsearch.persistent.PersistentTasksService; +import org.elasticsearch.telemetry.metric.MeterRegistry; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.client.NoOpClient; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xcontent.XContentType; +import org.hamcrest.Matchers; +import org.junit.After; +import org.junit.Before; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.PasswordAuthentication; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.BiConsumer; + +import static org.elasticsearch.ingest.geoip.DatabaseNodeServiceTests.createClusterState; +import static org.elasticsearch.ingest.geoip.EnterpriseGeoIpDownloader.MAX_CHUNK_SIZE; +import static org.elasticsearch.tasks.TaskId.EMPTY_TASK_ID; +import static org.hamcrest.Matchers.equalTo; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verifyNoInteractions; +import static org.mockito.Mockito.when; + +public class EnterpriseGeoIpDownloaderTests extends ESTestCase { + + private HttpClient httpClient; + private ClusterService clusterService; + private ThreadPool threadPool; + private MockClient client; + private EnterpriseGeoIpDownloader geoIpDownloader; + + @Before + public void setup() throws IOException { + httpClient = mock(HttpClient.class); + when(httpClient.getBytes(any(), anyString())).thenReturn( + "e4a3411cdd7b21eaf18675da5a7f9f360d33c6882363b2c19c38715834c9e836 GeoIP2-City_20240709.tar.gz".getBytes(StandardCharsets.UTF_8) + ); + clusterService = mock(ClusterService.class); + threadPool = new ThreadPool(Settings.builder().put(Node.NODE_NAME_SETTING.getKey(), "test").build(), MeterRegistry.NOOP); + when(clusterService.getClusterSettings()).thenReturn( + new ClusterSettings(Settings.EMPTY, Set.of(GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING)) + ); + ClusterState state = createClusterState(new PersistentTasksCustomMetadata(1L, Map.of())); + when(clusterService.state()).thenReturn(state); + client = new MockClient(threadPool); + geoIpDownloader = new EnterpriseGeoIpDownloader( + client, + httpClient, + clusterService, + threadPool, + 1, + "", + "", + "", + EMPTY_TASK_ID, + Map.of(), + () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), + (input) -> new HttpClient.PasswordAuthenticationHolder("name", "password".toCharArray()) + ) { + { + EnterpriseGeoIpTask.EnterpriseGeoIpTaskParams geoIpTaskParams = mock(EnterpriseGeoIpTask.EnterpriseGeoIpTaskParams.class); + when(geoIpTaskParams.getWriteableName()).thenReturn(EnterpriseGeoIpTask.ENTERPRISE_GEOIP_DOWNLOADER); + init(new PersistentTasksService(clusterService, threadPool, client), null, null, 0); + } + }; + } + + @After + public void tearDown() throws Exception { + super.tearDown(); + threadPool.shutdownNow(); + } + + public void testGetChunkEndOfStream() throws IOException { + byte[] chunk = EnterpriseGeoIpDownloader.getChunk(new InputStream() { + @Override + public int read() { + return -1; + } + }); + assertArrayEquals(new byte[0], chunk); + chunk = EnterpriseGeoIpDownloader.getChunk(new ByteArrayInputStream(new byte[0])); + assertArrayEquals(new byte[0], chunk); + } + + public void testGetChunkLessThanChunkSize() throws IOException { + ByteArrayInputStream is = new ByteArrayInputStream(new byte[] { 1, 2, 3, 4 }); + byte[] chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(new byte[] { 1, 2, 3, 4 }, chunk); + chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(new byte[0], chunk); + + } + + public void testGetChunkExactlyChunkSize() throws IOException { + byte[] bigArray = new byte[MAX_CHUNK_SIZE]; + for (int i = 0; i < MAX_CHUNK_SIZE; i++) { + bigArray[i] = (byte) i; + } + ByteArrayInputStream is = new ByteArrayInputStream(bigArray); + byte[] chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(bigArray, chunk); + chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(new byte[0], chunk); + } + + public void testGetChunkMoreThanChunkSize() throws IOException { + byte[] bigArray = new byte[MAX_CHUNK_SIZE * 2]; + for (int i = 0; i < MAX_CHUNK_SIZE * 2; i++) { + bigArray[i] = (byte) i; + } + byte[] smallArray = new byte[MAX_CHUNK_SIZE]; + System.arraycopy(bigArray, 0, smallArray, 0, MAX_CHUNK_SIZE); + ByteArrayInputStream is = new ByteArrayInputStream(bigArray); + byte[] chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(smallArray, chunk); + System.arraycopy(bigArray, MAX_CHUNK_SIZE, smallArray, 0, MAX_CHUNK_SIZE); + chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(smallArray, chunk); + chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(new byte[0], chunk); + } + + public void testGetChunkRethrowsIOException() { + expectThrows(IOException.class, () -> EnterpriseGeoIpDownloader.getChunk(new InputStream() { + @Override + public int read() throws IOException { + throw new IOException(); + } + })); + } + + public void testIndexChunksNoData() throws IOException { + client.addHandler(FlushAction.INSTANCE, (FlushRequest request, ActionListener flushResponseActionListener) -> { + assertArrayEquals(new String[] { EnterpriseGeoIpDownloader.DATABASES_INDEX }, request.indices()); + flushResponseActionListener.onResponse(mock(BroadcastResponse.class)); + }); + client.addHandler( + RefreshAction.INSTANCE, + (RefreshRequest request, ActionListener flushResponseActionListener) -> { + assertArrayEquals(new String[] { EnterpriseGeoIpDownloader.DATABASES_INDEX }, request.indices()); + flushResponseActionListener.onResponse(mock(BroadcastResponse.class)); + } + ); + + InputStream empty = new ByteArrayInputStream(new byte[0]); + assertEquals( + Tuple.tuple(0, "d41d8cd98f00b204e9800998ecf8427e"), + geoIpDownloader.indexChunks( + "test", + empty, + 0, + MessageDigests.sha256(), + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + 0 + ) + ); + } + + public void testIndexChunksMd5Mismatch() { + client.addHandler(FlushAction.INSTANCE, (FlushRequest request, ActionListener flushResponseActionListener) -> { + assertArrayEquals(new String[] { EnterpriseGeoIpDownloader.DATABASES_INDEX }, request.indices()); + flushResponseActionListener.onResponse(mock(BroadcastResponse.class)); + }); + client.addHandler( + RefreshAction.INSTANCE, + (RefreshRequest request, ActionListener flushResponseActionListener) -> { + assertArrayEquals(new String[] { EnterpriseGeoIpDownloader.DATABASES_INDEX }, request.indices()); + flushResponseActionListener.onResponse(mock(BroadcastResponse.class)); + } + ); + + IOException exception = expectThrows( + IOException.class, + () -> geoIpDownloader.indexChunks("test", new ByteArrayInputStream(new byte[0]), 0, MessageDigests.sha256(), "123123", 0) + ); + assertEquals( + "checksum mismatch, expected [123123], actual [e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855]", + exception.getMessage() + ); + } + + public void testIndexChunks() throws IOException { + byte[] bigArray = new byte[MAX_CHUNK_SIZE + 20]; + for (int i = 0; i < MAX_CHUNK_SIZE + 20; i++) { + bigArray[i] = (byte) i; + } + byte[][] chunksData = new byte[2][]; + chunksData[0] = new byte[MAX_CHUNK_SIZE]; + System.arraycopy(bigArray, 0, chunksData[0], 0, MAX_CHUNK_SIZE); + chunksData[1] = new byte[20]; + System.arraycopy(bigArray, MAX_CHUNK_SIZE, chunksData[1], 0, 20); + + AtomicInteger chunkIndex = new AtomicInteger(); + + client.addHandler(TransportIndexAction.TYPE, (IndexRequest request, ActionListener listener) -> { + int chunk = chunkIndex.getAndIncrement(); + assertEquals(OpType.CREATE, request.opType()); + assertThat(request.id(), Matchers.startsWith("test_" + (chunk + 15) + "_")); + assertEquals(XContentType.SMILE, request.getContentType()); + Map source = request.sourceAsMap(); + assertEquals("test", source.get("name")); + assertArrayEquals(chunksData[chunk], (byte[]) source.get("data")); + assertEquals(chunk + 15, source.get("chunk")); + listener.onResponse(mock(IndexResponse.class)); + }); + client.addHandler(FlushAction.INSTANCE, (FlushRequest request, ActionListener flushResponseActionListener) -> { + assertArrayEquals(new String[] { EnterpriseGeoIpDownloader.DATABASES_INDEX }, request.indices()); + flushResponseActionListener.onResponse(mock(BroadcastResponse.class)); + }); + client.addHandler( + RefreshAction.INSTANCE, + (RefreshRequest request, ActionListener flushResponseActionListener) -> { + assertArrayEquals(new String[] { EnterpriseGeoIpDownloader.DATABASES_INDEX }, request.indices()); + flushResponseActionListener.onResponse(mock(BroadcastResponse.class)); + } + ); + + InputStream big = new ByteArrayInputStream(bigArray); + assertEquals( + Tuple.tuple(17, "a67563dfa8f3cba8b8cff61eb989a749"), + geoIpDownloader.indexChunks( + "test", + big, + 15, + MessageDigests.sha256(), + "f2304545f224ff9ffcc585cb0a993723f911e03beb552cc03937dd443e931eab", + 0 + ) + ); + + assertEquals(2, chunkIndex.get()); + } + + public void testProcessDatabaseNew() throws IOException { + ByteArrayInputStream bais = new ByteArrayInputStream(new byte[0]); + when(httpClient.get(any(), any())).thenReturn(bais); + AtomicBoolean indexedChunks = new AtomicBoolean(false); + geoIpDownloader = new EnterpriseGeoIpDownloader( + client, + httpClient, + clusterService, + threadPool, + 1, + "", + "", + "", + EMPTY_TASK_ID, + Map.of(), + () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), + (input) -> new HttpClient.PasswordAuthenticationHolder("name", "password".toCharArray()) + ) { + @Override + protected void updateTimestamp(String name, GeoIpTaskState.Metadata metadata) { + fail(); + } + + @Override + Tuple indexChunks( + String name, + InputStream is, + int chunk, + MessageDigest digest, + String expectedMd5, + long start + ) { + assertSame(bais, is); + assertEquals(0, chunk); + indexedChunks.set(true); + return Tuple.tuple(11, expectedMd5); + } + + @Override + void updateTaskState() { + assertEquals(0, state.getDatabases().get("test.mmdb").firstChunk()); + assertEquals(10, state.getDatabases().get("test.mmdb").lastChunk()); + } + + @Override + void deleteOldChunks(String name, int firstChunk) { + assertEquals("test.mmdb", name); + assertEquals(0, firstChunk); + } + }; + + geoIpDownloader.setState(EnterpriseGeoIpTaskState.EMPTY); + PasswordAuthentication auth = new PasswordAuthentication("name", "password".toCharArray()); + String id = randomIdentifier(); + DatabaseConfiguration databaseConfiguration = new DatabaseConfiguration(id, "test", new DatabaseConfiguration.Maxmind("name")); + geoIpDownloader.processDatabase(auth, databaseConfiguration); + assertThat(indexedChunks.get(), equalTo(true)); + } + + public void testProcessDatabaseUpdate() throws IOException { + ByteArrayInputStream bais = new ByteArrayInputStream(new byte[0]); + when(httpClient.get(any(), any())).thenReturn(bais); + AtomicBoolean indexedChunks = new AtomicBoolean(false); + geoIpDownloader = new EnterpriseGeoIpDownloader( + client, + httpClient, + clusterService, + threadPool, + 1, + "", + "", + "", + EMPTY_TASK_ID, + Map.of(), + () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), + (input) -> new HttpClient.PasswordAuthenticationHolder("name", "password".toCharArray()) + ) { + @Override + protected void updateTimestamp(String name, GeoIpTaskState.Metadata metadata) { + fail(); + } + + @Override + Tuple indexChunks( + String name, + InputStream is, + int chunk, + MessageDigest digest, + String expectedMd5, + long start + ) { + assertSame(bais, is); + assertEquals(9, chunk); + indexedChunks.set(true); + return Tuple.tuple(1, expectedMd5); + } + + @Override + void updateTaskState() { + assertEquals(9, state.getDatabases().get("test.mmdb").firstChunk()); + assertEquals(10, state.getDatabases().get("test.mmdb").lastChunk()); + } + + @Override + void deleteOldChunks(String name, int firstChunk) { + assertEquals("test.mmdb", name); + assertEquals(9, firstChunk); + } + }; + + geoIpDownloader.setState(EnterpriseGeoIpTaskState.EMPTY.put("test.mmdb", new GeoIpTaskState.Metadata(0, 5, 8, "0", 0))); + PasswordAuthentication auth = new PasswordAuthentication("name", "password".toCharArray()); + String id = randomIdentifier(); + DatabaseConfiguration databaseConfiguration = new DatabaseConfiguration(id, "test", new DatabaseConfiguration.Maxmind("name")); + geoIpDownloader.processDatabase(auth, databaseConfiguration); + assertThat(indexedChunks.get(), equalTo(true)); + } + + public void testProcessDatabaseSame() throws IOException { + GeoIpTaskState.Metadata metadata = new GeoIpTaskState.Metadata( + 0, + 4, + 10, + "1", + 0, + "e4a3411cdd7b21eaf18675da5a7f9f360d33c6882363b2c19c38715834c9e836" + ); + EnterpriseGeoIpTaskState taskState = EnterpriseGeoIpTaskState.EMPTY.put("test.mmdb", metadata); + ByteArrayInputStream bais = new ByteArrayInputStream(new byte[0]); + when(httpClient.get(any(), any())).thenReturn(bais); + + geoIpDownloader = new EnterpriseGeoIpDownloader( + client, + httpClient, + clusterService, + threadPool, + 1, + "", + "", + "", + EMPTY_TASK_ID, + Map.of(), + () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), + (input) -> new HttpClient.PasswordAuthenticationHolder("name", "password".toCharArray()) + ) { + @Override + protected void updateTimestamp(String name, GeoIpTaskState.Metadata newMetadata) { + assertEquals(metadata, newMetadata); + assertEquals("test.mmdb", name); + } + + @Override + Tuple indexChunks( + String name, + InputStream is, + int chunk, + MessageDigest digest, + String expectedChecksum, + long start + ) { + fail(); + return Tuple.tuple(0, expectedChecksum); + } + + @Override + void updateTaskState() { + fail(); + } + + @Override + void deleteOldChunks(String name, int firstChunk) { + fail(); + } + }; + geoIpDownloader.setState(taskState); + PasswordAuthentication auth = new PasswordAuthentication("name", "password".toCharArray()); + String id = randomIdentifier(); + DatabaseConfiguration databaseConfiguration = new DatabaseConfiguration(id, "test", new DatabaseConfiguration.Maxmind("name")); + geoIpDownloader.processDatabase(auth, databaseConfiguration); + } + + public void testUpdateDatabasesWriteBlock() { + ClusterState state = createClusterState(new PersistentTasksCustomMetadata(1L, Map.of())); + var geoIpIndex = state.getMetadata().getIndicesLookup().get(EnterpriseGeoIpDownloader.DATABASES_INDEX).getWriteIndex().getName(); + state = ClusterState.builder(state) + .blocks(new ClusterBlocks.Builder().addIndexBlock(geoIpIndex, IndexMetadata.INDEX_READ_ONLY_ALLOW_DELETE_BLOCK)) + .build(); + when(clusterService.state()).thenReturn(state); + var e = expectThrows(ClusterBlockException.class, () -> geoIpDownloader.updateDatabases()); + assertThat( + e.getMessage(), + equalTo( + "index [" + + geoIpIndex + + "] blocked by: [TOO_MANY_REQUESTS/12/disk usage exceeded flood-stage watermark, " + + "index has read-only-allow-delete block];" + ) + ); + verifyNoInteractions(httpClient); + } + + public void testUpdateDatabasesIndexNotReady() throws IOException { + ClusterState state = createClusterState(new PersistentTasksCustomMetadata(1L, Map.of()), true); + var geoIpIndex = state.getMetadata().getIndicesLookup().get(EnterpriseGeoIpDownloader.DATABASES_INDEX).getWriteIndex().getName(); + state = ClusterState.builder(state) + .blocks(new ClusterBlocks.Builder().addIndexBlock(geoIpIndex, IndexMetadata.INDEX_READ_ONLY_ALLOW_DELETE_BLOCK)) + .build(); + when(clusterService.state()).thenReturn(state); + geoIpDownloader.updateDatabases(); + verifyNoInteractions(httpClient); + } + + private GeoIpTaskState.Metadata newGeoIpTaskStateMetadata(boolean expired) { + Instant lastChecked; + if (expired) { + lastChecked = Instant.now().minus(randomIntBetween(31, 100), ChronoUnit.DAYS); + } else { + lastChecked = Instant.now().minus(randomIntBetween(0, 29), ChronoUnit.DAYS); + } + return new GeoIpTaskState.Metadata(0, 0, 0, randomAlphaOfLength(20), lastChecked.toEpochMilli()); + } + + private static class MockClient extends NoOpClient { + + private final Map, BiConsumer>> handlers = new HashMap<>(); + + private MockClient(ThreadPool threadPool) { + super(threadPool); + } + + public void addHandler( + ActionType action, + BiConsumer> listener + ) { + handlers.put(action, listener); + } + + @SuppressWarnings("unchecked") + @Override + protected void doExecute( + ActionType action, + Request request, + ActionListener listener + ) { + if (handlers.containsKey(action)) { + BiConsumer> biConsumer = (BiConsumer>) handlers.get( + action + ); + biConsumer.accept(request, listener); + } else { + throw new IllegalStateException("unexpected action called [" + action.name() + "]"); + } + } + } +} diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpTaskStateSerializationTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpTaskStateSerializationTests.java new file mode 100644 index 0000000000000..a136f90780989 --- /dev/null +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpTaskStateSerializationTests.java @@ -0,0 +1,72 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.test.AbstractXContentSerializingTestCase; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public class EnterpriseGeoIpTaskStateSerializationTests extends AbstractXContentSerializingTestCase { + @Override + protected GeoIpTaskState doParseInstance(XContentParser parser) throws IOException { + return GeoIpTaskState.fromXContent(parser); + } + + @Override + protected Writeable.Reader instanceReader() { + return GeoIpTaskState::new; + } + + @Override + protected GeoIpTaskState createTestInstance() { + GeoIpTaskState state = GeoIpTaskState.EMPTY; + int databaseCount = randomInt(20); + for (int i = 0; i < databaseCount; i++) { + state = state.put(randomAlphaOfLengthBetween(5, 10), createRandomMetadata()); + } + return state; + } + + @Override + protected GeoIpTaskState mutateInstance(GeoIpTaskState instance) { + Map databases = new HashMap<>(instance.getDatabases()); + switch (between(0, 2)) { + case 0: + String databaseName = randomValueOtherThanMany(databases::containsKey, () -> randomAlphaOfLengthBetween(5, 10)); + databases.put(databaseName, createRandomMetadata()); + return new GeoIpTaskState(databases); + case 1: + if (databases.size() > 0) { + String randomDatabaseName = databases.keySet().iterator().next(); + databases.put(randomDatabaseName, createRandomMetadata()); + } else { + databases.put(randomAlphaOfLengthBetween(5, 10), createRandomMetadata()); + } + return new GeoIpTaskState(databases); + case 2: + if (databases.size() > 0) { + String randomDatabaseName = databases.keySet().iterator().next(); + databases.remove(randomDatabaseName); + } else { + databases.put(randomAlphaOfLengthBetween(5, 10), createRandomMetadata()); + } + return new GeoIpTaskState(databases); + default: + throw new AssertionError("failure, got illegal switch case"); + } + } + + private GeoIpTaskState.Metadata createRandomMetadata() { + return new GeoIpTaskState.Metadata(randomLong(), randomInt(), randomInt(), randomAlphaOfLength(32), randomLong()); + } +} diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTests.java index 9cc5405c1b617..06b2605bd6d41 100644 --- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTests.java +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTests.java @@ -30,11 +30,17 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.reindex.BulkByScrollResponse; +import org.elasticsearch.index.reindex.DeleteByQueryAction; +import org.elasticsearch.index.reindex.DeleteByQueryRequest; import org.elasticsearch.ingest.geoip.stats.GeoIpDownloaderStats; import org.elasticsearch.node.Node; +import org.elasticsearch.persistent.PersistentTaskResponse; import org.elasticsearch.persistent.PersistentTaskState; import org.elasticsearch.persistent.PersistentTasksCustomMetadata; import org.elasticsearch.persistent.PersistentTasksCustomMetadata.PersistentTask; +import org.elasticsearch.persistent.PersistentTasksService; +import org.elasticsearch.persistent.UpdatePersistentTaskStatusAction; import org.elasticsearch.telemetry.metric.MeterRegistry; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.client.NoOpClient; @@ -49,6 +55,9 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.time.Instant; +import java.time.temporal.ChronoUnit; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -63,6 +72,8 @@ import static org.elasticsearch.ingest.geoip.GeoIpDownloader.MAX_CHUNK_SIZE; import static org.elasticsearch.tasks.TaskId.EMPTY_TASK_ID; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verifyNoInteractions; import static org.mockito.Mockito.when; @@ -76,8 +87,9 @@ public class GeoIpDownloaderTests extends ESTestCase { private GeoIpDownloader geoIpDownloader; @Before - public void setup() { + public void setup() throws IOException { httpClient = mock(HttpClient.class); + when(httpClient.getBytes(anyString())).thenReturn("[]".getBytes(StandardCharsets.UTF_8)); clusterService = mock(ClusterService.class); threadPool = new ThreadPool(Settings.builder().put(Node.NODE_NAME_SETTING.getKey(), "test").build(), MeterRegistry.NOOP); when(clusterService.getClusterSettings()).thenReturn( @@ -109,7 +121,13 @@ public void setup() { () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), () -> GeoIpDownloaderTaskExecutor.EAGER_DOWNLOAD_SETTING.getDefault(Settings.EMPTY), () -> true - ); + ) { + { + GeoIpTaskParams geoIpTaskParams = mock(GeoIpTaskParams.class); + when(geoIpTaskParams.getWriteableName()).thenReturn(GeoIpDownloader.GEOIP_DOWNLOADER); + init(new PersistentTasksService(clusterService, threadPool, client), null, null, 0); + } + }; } @After @@ -290,8 +308,8 @@ int indexChunks(String name, InputStream is, int chunk, String expectedMd5, long @Override void updateTaskState() { - assertEquals(0, state.get("test.mmdb").firstChunk()); - assertEquals(10, state.get("test.mmdb").lastChunk()); + assertEquals(0, state.getDatabases().get("test.mmdb").firstChunk()); + assertEquals(10, state.getDatabases().get("test.mmdb").lastChunk()); } @Override @@ -341,8 +359,8 @@ int indexChunks(String name, InputStream is, int chunk, String expectedMd5, long @Override void updateTaskState() { - assertEquals(9, state.get("test.mmdb").firstChunk()); - assertEquals(10, state.get("test.mmdb").lastChunk()); + assertEquals(9, state.getDatabases().get("test.mmdb").firstChunk()); + assertEquals(10, state.getDatabases().get("test.mmdb").lastChunk()); } @Override @@ -408,6 +426,55 @@ void deleteOldChunks(String name, int firstChunk) { assertEquals(0, stats.getFailedDownloads()); } + public void testCleanDatabases() throws IOException { + ByteArrayInputStream bais = new ByteArrayInputStream(new byte[0]); + when(httpClient.get("http://a.b/t1")).thenReturn(bais); + + final AtomicInteger count = new AtomicInteger(0); + + geoIpDownloader = new GeoIpDownloader( + client, + httpClient, + clusterService, + threadPool, + Settings.EMPTY, + 1, + "", + "", + "", + EMPTY_TASK_ID, + Map.of(), + () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), + () -> GeoIpDownloaderTaskExecutor.EAGER_DOWNLOAD_SETTING.getDefault(Settings.EMPTY), + () -> true + ) { + @Override + void updateDatabases() throws IOException { + // noop + } + + @Override + void deleteOldChunks(String name, int firstChunk) { + count.incrementAndGet(); + assertEquals("test.mmdb", name); + assertEquals(21, firstChunk); + } + + @Override + void updateTaskState() { + // noop + } + }; + + geoIpDownloader.setState(GeoIpTaskState.EMPTY.put("test.mmdb", new GeoIpTaskState.Metadata(10, 10, 20, "md5", 20))); + geoIpDownloader.runDownloader(); + geoIpDownloader.runDownloader(); + GeoIpDownloaderStats stats = geoIpDownloader.getStatus(); + assertEquals(1, stats.getExpiredDatabases()); + assertEquals(2, count.get()); // somewhat surprising, not necessarily wrong + assertEquals(18, geoIpDownloader.state.getDatabases().get("test.mmdb").lastCheck()); // highly surprising, seems wrong + } + @SuppressWarnings("unchecked") public void testUpdateTaskState() { geoIpDownloader = new GeoIpDownloader( @@ -541,6 +608,78 @@ public void testUpdateDatabasesIndexNotReady() { verifyNoInteractions(httpClient); } + public void testThatRunDownloaderDeletesExpiredDatabases() { + /* + * This test puts some expired databases and some non-expired ones into the GeoIpTaskState, and then calls runDownloader(), making + * sure that the expired databases have been deleted. + */ + AtomicInteger updatePersistentTaskStateCount = new AtomicInteger(0); + AtomicInteger deleteCount = new AtomicInteger(0); + int expiredDatabasesCount = randomIntBetween(1, 100); + int unexpiredDatabasesCount = randomIntBetween(0, 100); + Map databases = new HashMap<>(); + for (int i = 0; i < expiredDatabasesCount; i++) { + databases.put("expiredDatabase" + i, newGeoIpTaskStateMetadata(true)); + } + for (int i = 0; i < unexpiredDatabasesCount; i++) { + databases.put("unexpiredDatabase" + i, newGeoIpTaskStateMetadata(false)); + } + GeoIpTaskState geoIpTaskState = new GeoIpTaskState(databases); + geoIpDownloader.setState(geoIpTaskState); + client.addHandler( + UpdatePersistentTaskStatusAction.INSTANCE, + (UpdatePersistentTaskStatusAction.Request request, ActionListener taskResponseListener) -> { + PersistentTasksCustomMetadata.Assignment assignment = mock(PersistentTasksCustomMetadata.Assignment.class); + PersistentTasksCustomMetadata.PersistentTask persistentTask = new PersistentTasksCustomMetadata.PersistentTask<>( + GeoIpDownloader.GEOIP_DOWNLOADER, + GeoIpDownloader.GEOIP_DOWNLOADER, + new GeoIpTaskParams(), + request.getAllocationId(), + assignment + ); + updatePersistentTaskStateCount.incrementAndGet(); + taskResponseListener.onResponse(new PersistentTaskResponse(new PersistentTask<>(persistentTask, request.getState()))); + } + ); + client.addHandler( + DeleteByQueryAction.INSTANCE, + (DeleteByQueryRequest request, ActionListener flushResponseActionListener) -> { + deleteCount.incrementAndGet(); + } + ); + geoIpDownloader.runDownloader(); + assertThat(geoIpDownloader.getStatus().getExpiredDatabases(), equalTo(expiredDatabasesCount)); + for (int i = 0; i < expiredDatabasesCount; i++) { + // This currently fails because we subtract one millisecond from the lastChecked time + // assertThat(geoIpDownloader.state.getDatabases().get("expiredDatabase" + i).lastCheck(), equalTo(-1L)); + } + for (int i = 0; i < unexpiredDatabasesCount; i++) { + assertThat( + geoIpDownloader.state.getDatabases().get("unexpiredDatabase" + i).lastCheck(), + greaterThanOrEqualTo(Instant.now().minus(30, ChronoUnit.DAYS).toEpochMilli()) + ); + } + assertThat(deleteCount.get(), equalTo(expiredDatabasesCount)); + assertThat(updatePersistentTaskStateCount.get(), equalTo(expiredDatabasesCount)); + geoIpDownloader.runDownloader(); + /* + * The following two lines assert current behavior that might not be desirable -- we continue to delete expired databases every + * time that runDownloader runs. This seems unnecessary. + */ + assertThat(deleteCount.get(), equalTo(expiredDatabasesCount * 2)); + assertThat(updatePersistentTaskStateCount.get(), equalTo(expiredDatabasesCount * 2)); + } + + private GeoIpTaskState.Metadata newGeoIpTaskStateMetadata(boolean expired) { + Instant lastChecked; + if (expired) { + lastChecked = Instant.now().minus(randomIntBetween(31, 100), ChronoUnit.DAYS); + } else { + lastChecked = Instant.now().minus(randomIntBetween(0, 29), ChronoUnit.DAYS); + } + return new GeoIpTaskState.Metadata(0, 0, 0, randomAlphaOfLength(20), lastChecked.toEpochMilli()); + } + private static class MockClient extends NoOpClient { private final Map, BiConsumer>> handlers = new HashMap<>(); diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java index 6276155d9f083..87d1881a9e743 100644 --- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java @@ -463,7 +463,7 @@ public void testEnterprise() throws Exception { assertThat(geoData.get("residential_proxy"), equalTo(false)); assertThat(geoData.get("domain"), equalTo("frpt.net")); assertThat(geoData.get("isp"), equalTo("Fairpoint Communications")); - assertThat(geoData.get("isp_organization"), equalTo("Fairpoint Communications")); + assertThat(geoData.get("isp_organization_name"), equalTo("Fairpoint Communications")); assertThat(geoData.get("user_type"), equalTo("residential")); assertThat(geoData.get("connection_type"), equalTo("Cable/DSL")); } @@ -497,7 +497,7 @@ public void testIsp() throws Exception { assertThat(geoData.get("organization_name"), equalTo("CELLCO-PART")); assertThat(geoData.get("network"), equalTo("149.101.100.0/28")); assertThat(geoData.get("isp"), equalTo("Verizon Wireless")); - assertThat(geoData.get("isp_organization"), equalTo("Verizon Wireless")); + assertThat(geoData.get("isp_organization_name"), equalTo("Verizon Wireless")); assertThat(geoData.get("mobile_network_code"), equalTo("004")); assertThat(geoData.get("mobile_country_code"), equalTo("310")); } diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadataTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadataTests.java new file mode 100644 index 0000000000000..eca23cb13cd3d --- /dev/null +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadataTests.java @@ -0,0 +1,91 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata; +import org.elasticsearch.test.AbstractChunkedSerializingTestCase; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public class IngestGeoIpMetadataTests extends AbstractChunkedSerializingTestCase { + @Override + protected IngestGeoIpMetadata doParseInstance(XContentParser parser) throws IOException { + return IngestGeoIpMetadata.fromXContent(parser); + } + + @Override + protected Writeable.Reader instanceReader() { + return IngestGeoIpMetadata::new; + } + + @Override + protected IngestGeoIpMetadata createTestInstance() { + return randomIngestGeoIpMetadata(); + } + + @Override + protected IngestGeoIpMetadata mutateInstance(IngestGeoIpMetadata instance) throws IOException { + Map databases = new HashMap<>(instance.getDatabases()); + switch (between(0, 2)) { + case 0 -> { + String databaseId = randomValueOtherThanMany(databases::containsKey, ESTestCase::randomIdentifier); + databases.put(databaseId, randomDatabaseConfigurationMetadata(databaseId)); + return new IngestGeoIpMetadata(databases); + } + case 1 -> { + if (databases.size() > 0) { + String randomDatabaseId = databases.keySet().iterator().next(); + databases.put(randomDatabaseId, randomDatabaseConfigurationMetadata(randomDatabaseId)); + } else { + String databaseId = randomIdentifier(); + databases.put(databaseId, randomDatabaseConfigurationMetadata(databaseId)); + } + return new IngestGeoIpMetadata(databases); + } + case 2 -> { + if (databases.size() > 0) { + String randomDatabaseId = databases.keySet().iterator().next(); + databases.remove(randomDatabaseId); + } else { + String databaseId = randomIdentifier(); + databases.put(databaseId, randomDatabaseConfigurationMetadata(databaseId)); + } + return new IngestGeoIpMetadata(databases); + } + default -> throw new AssertionError("failure, got illegal switch case"); + } + } + + private IngestGeoIpMetadata randomIngestGeoIpMetadata() { + Map databases = new HashMap<>(); + for (int i = 0; i < randomIntBetween(0, 20); i++) { + String databaseId = randomIdentifier(); + databases.put(databaseId, randomDatabaseConfigurationMetadata(databaseId)); + } + return new IngestGeoIpMetadata(databases); + } + + private DatabaseConfigurationMetadata randomDatabaseConfigurationMetadata(String id) { + return new DatabaseConfigurationMetadata( + randomDatabaseConfiguration(id), + randomNonNegativeLong(), + randomPositiveTimeValue().millis() + ); + } + + private DatabaseConfiguration randomDatabaseConfiguration(String id) { + return new DatabaseConfiguration(id, randomAlphaOfLength(10), new DatabaseConfiguration.Maxmind(randomAlphaOfLength(10))); + } +} diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadataTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadataTests.java new file mode 100644 index 0000000000000..f035416d48068 --- /dev/null +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadataTests.java @@ -0,0 +1,74 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.test.AbstractXContentSerializingTestCase; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; + +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration.MAXMIND_NAMES; +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationTests.randomDatabaseConfiguration; + +public class DatabaseConfigurationMetadataTests extends AbstractXContentSerializingTestCase { + + private String id; + + @Override + protected DatabaseConfigurationMetadata doParseInstance(XContentParser parser) throws IOException { + return DatabaseConfigurationMetadata.parse(parser, id); + } + + @Override + protected DatabaseConfigurationMetadata createTestInstance() { + id = randomAlphaOfLength(5); + return randomDatabaseConfigurationMetadata(id); + } + + public static DatabaseConfigurationMetadata randomDatabaseConfigurationMetadata(String id) { + return new DatabaseConfigurationMetadata( + new DatabaseConfiguration(id, randomFrom(MAXMIND_NAMES), new DatabaseConfiguration.Maxmind(randomAlphaOfLength(5))), + randomNonNegativeLong(), + randomPositiveTimeValue().millis() + ); + } + + @Override + protected DatabaseConfigurationMetadata mutateInstance(DatabaseConfigurationMetadata instance) { + switch (between(0, 2)) { + case 0: + return new DatabaseConfigurationMetadata( + randomValueOtherThan(instance.database(), () -> randomDatabaseConfiguration(randomAlphaOfLength(5))), + instance.version(), + instance.modifiedDate() + ); + case 1: + return new DatabaseConfigurationMetadata( + instance.database(), + randomValueOtherThan(instance.version(), ESTestCase::randomNonNegativeLong), + instance.modifiedDate() + ); + case 2: + return new DatabaseConfigurationMetadata( + instance.database(), + instance.version(), + randomValueOtherThan(instance.modifiedDate(), () -> ESTestCase.randomPositiveTimeValue().millis()) + ); + default: + throw new AssertionError("failure, got illegal switch case"); + } + } + + @Override + protected Writeable.Reader instanceReader() { + return DatabaseConfigurationMetadata::new; + } +} diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationTests.java new file mode 100644 index 0000000000000..02c067561b49c --- /dev/null +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationTests.java @@ -0,0 +1,86 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration.Maxmind; +import org.elasticsearch.test.AbstractXContentSerializingTestCase; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.util.Set; + +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration.MAXMIND_NAMES; + +public class DatabaseConfigurationTests extends AbstractXContentSerializingTestCase { + + private String id; + + @Override + protected DatabaseConfiguration doParseInstance(XContentParser parser) throws IOException { + return DatabaseConfiguration.parse(parser, id); + } + + @Override + protected DatabaseConfiguration createTestInstance() { + id = randomAlphaOfLength(5); + return randomDatabaseConfiguration(id); + } + + public static DatabaseConfiguration randomDatabaseConfiguration(String id) { + return new DatabaseConfiguration(id, randomFrom(MAXMIND_NAMES), new Maxmind(randomAlphaOfLength(5))); + } + + @Override + protected DatabaseConfiguration mutateInstance(DatabaseConfiguration instance) { + switch (between(0, 2)) { + case 0: + return new DatabaseConfiguration(instance.id() + randomAlphaOfLength(2), instance.name(), instance.maxmind()); + case 1: + return new DatabaseConfiguration( + instance.id(), + randomValueOtherThan(instance.name(), () -> randomFrom(MAXMIND_NAMES)), + instance.maxmind() + ); + case 2: + return new DatabaseConfiguration( + instance.id(), + instance.name(), + new Maxmind(instance.maxmind().accountId() + randomAlphaOfLength(2)) + ); + default: + throw new AssertionError("failure, got illegal switch case"); + } + } + + @Override + protected Writeable.Reader instanceReader() { + return DatabaseConfiguration::new; + } + + public void testValidateId() { + Set invalidIds = Set.of("-foo", "_foo", "foo,bar", "foo bar", "foo*bar", "foo.bar"); + for (String id : invalidIds) { + expectThrows(IllegalArgumentException.class, "expected exception for " + id, () -> DatabaseConfiguration.validateId(id)); + } + Set validIds = Set.of("f-oo", "f_oo", "foobar"); + for (String id : validIds) { + DatabaseConfiguration.validateId(id); + } + // Note: the code checks for byte length, but randomAlphoOfLength is only using characters in the ascii subset + String longId = randomAlphaOfLength(128); + expectThrows(IllegalArgumentException.class, "expected exception for " + longId, () -> DatabaseConfiguration.validateId(longId)); + String longestAllowedId = randomAlphaOfLength(127); + DatabaseConfiguration.validateId(longestAllowedId); + String shortId = randomAlphaOfLengthBetween(1, 127); + DatabaseConfiguration.validateId(shortId); + expectThrows(IllegalArgumentException.class, "expected exception for empty string", () -> DatabaseConfiguration.validateId("")); + expectThrows(IllegalArgumentException.class, "expected exception for null string", () -> DatabaseConfiguration.validateId(null)); + } +} diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/TransportPutDatabaseConfigurationActionTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/TransportPutDatabaseConfigurationActionTests.java new file mode 100644 index 0000000000000..710c3ee23916d --- /dev/null +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/TransportPutDatabaseConfigurationActionTests.java @@ -0,0 +1,69 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.ingest.geoip.IngestGeoIpMetadata; +import org.elasticsearch.test.ESTestCase; + +import java.util.HashMap; +import java.util.Map; + +public class TransportPutDatabaseConfigurationActionTests extends ESTestCase { + + public void testValidatePrerequisites() { + // Test that we reject two configurations with the same database name but different ids: + String name = randomAlphaOfLengthBetween(1, 50); + IngestGeoIpMetadata ingestGeoIpMetadata = randomIngestGeoIpMetadata(name); + ClusterState state = ClusterState.builder(ClusterState.EMPTY_STATE) + .metadata(Metadata.builder(Metadata.EMPTY_METADATA).putCustom(IngestGeoIpMetadata.TYPE, ingestGeoIpMetadata)) + .build(); + DatabaseConfiguration databaseConfiguration = randomDatabaseConfiguration(randomIdentifier(), name); + expectThrows( + IllegalArgumentException.class, + () -> TransportPutDatabaseConfigurationAction.validatePrerequisites(databaseConfiguration, state) + ); + + // Test that we do not reject two configurations with different database names: + String differentName = randomValueOtherThan(name, () -> randomAlphaOfLengthBetween(1, 50)); + DatabaseConfiguration databaseConfigurationForDifferentName = randomDatabaseConfiguration(randomIdentifier(), differentName); + TransportPutDatabaseConfigurationAction.validatePrerequisites(databaseConfigurationForDifferentName, state); + + // Test that we do not reject a configuration if none already exists: + TransportPutDatabaseConfigurationAction.validatePrerequisites(databaseConfiguration, ClusterState.EMPTY_STATE); + + // Test that we do not reject a configuration if one with the same database name AND id already exists: + DatabaseConfiguration databaseConfigurationSameNameSameId = ingestGeoIpMetadata.getDatabases() + .values() + .iterator() + .next() + .database(); + TransportPutDatabaseConfigurationAction.validatePrerequisites(databaseConfigurationSameNameSameId, state); + } + + private IngestGeoIpMetadata randomIngestGeoIpMetadata(String name) { + Map databases = new HashMap<>(); + String databaseId = randomIdentifier(); + databases.put(databaseId, randomDatabaseConfigurationMetadata(databaseId, name)); + return new IngestGeoIpMetadata(databases); + } + + private DatabaseConfigurationMetadata randomDatabaseConfigurationMetadata(String id, String name) { + return new DatabaseConfigurationMetadata( + randomDatabaseConfiguration(id, name), + randomNonNegativeLong(), + randomPositiveTimeValue().millis() + ); + } + + private DatabaseConfiguration randomDatabaseConfiguration(String id, String name) { + return new DatabaseConfiguration(id, name, new DatabaseConfiguration.Maxmind(randomAlphaOfLength(10))); + } +} diff --git a/modules/ingest-geoip/src/yamlRestTest/java/org/elasticsearch/ingest/geoip/IngestGeoIpClientYamlTestSuiteIT.java b/modules/ingest-geoip/src/yamlRestTest/java/org/elasticsearch/ingest/geoip/IngestGeoIpClientYamlTestSuiteIT.java index 58a6e3771b30d..0f0a0c998bd75 100644 --- a/modules/ingest-geoip/src/yamlRestTest/java/org/elasticsearch/ingest/geoip/IngestGeoIpClientYamlTestSuiteIT.java +++ b/modules/ingest-geoip/src/yamlRestTest/java/org/elasticsearch/ingest/geoip/IngestGeoIpClientYamlTestSuiteIT.java @@ -46,7 +46,12 @@ public class IngestGeoIpClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase .module("reindex") .module("ingest-geoip") .systemProperty("ingest.geoip.downloader.enabled.default", "true") + // sets the plain (geoip.elastic.co) downloader endpoint, which is used in these tests .setting("ingest.geoip.downloader.endpoint", () -> fixture.getAddress(), s -> useFixture) + // also sets the enterprise downloader maxmind endpoint, to make sure we do not accidentally hit the real endpoint from tests + // note: it's not important that the downloading actually work at this point -- the rest tests (so far) don't exercise + // the downloading code because of license reasons -- but if they did, then it would be important that we're hitting a fixture + .systemProperty("ingest.geoip.downloader.maxmind.endpoint.default", () -> fixture.getAddress(), s -> useFixture) .build(); @ClassRule diff --git a/modules/ingest-geoip/src/yamlRestTest/resources/rest-api-spec/test/ingest_geoip/40_geoip_databases.yml b/modules/ingest-geoip/src/yamlRestTest/resources/rest-api-spec/test/ingest_geoip/40_geoip_databases.yml new file mode 100644 index 0000000000000..6809443fdfbc3 --- /dev/null +++ b/modules/ingest-geoip/src/yamlRestTest/resources/rest-api-spec/test/ingest_geoip/40_geoip_databases.yml @@ -0,0 +1,72 @@ +setup: + - requires: + cluster_features: ["geoip.downloader.database.configuration"] + reason: "geoip downloader database configuration APIs added in 8.15" + +--- +"Test adding, getting, and removing geoip databases": + - do: + ingest.put_geoip_database: + id: "my_database_1" + body: > + { + "name": "GeoIP2-City", + "maxmind": { + "account_id": "1234" + } + } + - match: { acknowledged: true } + + - do: + ingest.put_geoip_database: + id: "my_database_1" + body: > + { + "name": "GeoIP2-Country", + "maxmind": { + "account_id": "4321" + } + } + - match: { acknowledged: true } + + - do: + ingest.put_geoip_database: + id: "my_database_2" + body: > + { + "name": "GeoIP2-City", + "maxmind": { + "account_id": "1234" + } + } + - match: { acknowledged: true } + + - do: + ingest.get_geoip_database: + id: "my_database_1" + - length: { databases: 1 } + - match: { databases.0.id: "my_database_1" } + - gte: { databases.0.modified_date_millis: 0 } + - match: { databases.0.database.name: "GeoIP2-Country" } + - match: { databases.0.database.maxmind.account_id: "4321" } + + - do: + ingest.get_geoip_database: {} + - length: { databases: 2 } + + - do: + ingest.get_geoip_database: + id: "my_database_1,my_database_2" + - length: { databases: 2 } + + - do: + ingest.delete_geoip_database: + id: "my_database_1" + + - do: + ingest.get_geoip_database: {} + - length: { databases: 1 } + - match: { databases.0.id: "my_database_2" } + - gte: { databases.0.modified_date_millis: 0 } + - match: { databases.0.database.name: "GeoIP2-City" } + - match: { databases.0.database.maxmind.account_id: "1234" } diff --git a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/146_dense_vector_bit_basic.yml b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/146_dense_vector_bit_basic.yml index 3eb686bda2174..4c195a0e32623 100644 --- a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/146_dense_vector_bit_basic.yml +++ b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/146_dense_vector_bit_basic.yml @@ -8,6 +8,8 @@ setup: indices.create: index: test-index body: + settings: + number_of_shards: 1 mappings: properties: vector: @@ -107,7 +109,6 @@ setup: headers: Content-Type: application/json search: - rest_total_hits_as_int: true body: query: script_score: @@ -138,7 +139,6 @@ setup: headers: Content-Type: application/json search: - rest_total_hits_as_int: true body: query: script_score: @@ -152,7 +152,6 @@ setup: headers: Content-Type: application/json search: - rest_total_hits_as_int: true body: query: script_score: @@ -167,7 +166,6 @@ setup: headers: Content-Type: application/json search: - rest_total_hits_as_int: true body: query: script_score: diff --git a/muted-tests.yml b/muted-tests.yml index d8eba8ad2dba6..f6078b93cfe55 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -44,12 +44,6 @@ tests: - class: "org.elasticsearch.xpack.test.rest.XPackRestIT" issue: "https://github.com/elastic/elasticsearch/issues/109687" method: "test {p0=sql/translate/Translate SQL}" -- class: org.elasticsearch.action.search.SearchProgressActionListenerIT - method: testSearchProgressWithHits - issue: https://github.com/elastic/elasticsearch/issues/109830 -- class: "org.elasticsearch.xpack.security.ScrollHelperIntegTests" - issue: "https://github.com/elastic/elasticsearch/issues/109905" - method: "testFetchAllEntities" - class: "org.elasticsearch.xpack.esql.action.AsyncEsqlQueryActionIT" issue: "https://github.com/elastic/elasticsearch/issues/109944" method: "testBasicAsyncExecution" @@ -67,24 +61,12 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/110211 - class: "org.elasticsearch.rest.RestControllerIT" issue: "https://github.com/elastic/elasticsearch/issues/110225" -- class: "org.elasticsearch.xpack.security.authz.store.NativePrivilegeStoreCacheTests" - issue: "https://github.com/elastic/elasticsearch/issues/110227" - method: "testGetPrivilegesUsesCache" - class: org.elasticsearch.upgrades.SecurityIndexRolesMetadataMigrationIT method: testMetadataMigratedAfterUpgrade issue: https://github.com/elastic/elasticsearch/issues/110232 - class: org.elasticsearch.compute.lucene.ValueSourceReaderTypeConversionTests method: testLoadAll issue: https://github.com/elastic/elasticsearch/issues/110244 -- class: org.elasticsearch.painless.LangPainlessClientYamlTestSuiteIT - method: test {yaml=painless/146_dense_vector_bit_basic/Cosine Similarity is not supported} - issue: https://github.com/elastic/elasticsearch/issues/110290 -- class: org.elasticsearch.painless.LangPainlessClientYamlTestSuiteIT - method: test {yaml=painless/146_dense_vector_bit_basic/Dot Product is not supported} - issue: https://github.com/elastic/elasticsearch/issues/110291 -- class: org.elasticsearch.action.search.SearchProgressActionListenerIT - method: testSearchProgressWithQuery - issue: https://github.com/elastic/elasticsearch/issues/109867 - class: org.elasticsearch.backwards.SearchWithMinCompatibleSearchNodeIT method: testMinVersionAsNewVersion issue: https://github.com/elastic/elasticsearch/issues/95384 @@ -94,30 +76,22 @@ tests: - class: org.elasticsearch.backwards.SearchWithMinCompatibleSearchNodeIT method: testMinVersionAsOldVersion issue: https://github.com/elastic/elasticsearch/issues/109454 -- class: org.elasticsearch.xpack.esql.tree.EsqlNodeSubclassTests - method: testReplaceChildren {class org.elasticsearch.xpack.esql.expression.function.aggregate.ToPartial} - issue: https://github.com/elastic/elasticsearch/issues/110310 -- class: org.elasticsearch.xpack.esql.tree.EsqlNodeSubclassTests - method: testInfoParameters {class org.elasticsearch.xpack.esql.expression.function.aggregate.ToPartial} - issue: https://github.com/elastic/elasticsearch/issues/110310 -- class: org.elasticsearch.search.vectors.ExactKnnQueryBuilderTests - method: testToQuery - issue: https://github.com/elastic/elasticsearch/issues/110357 -- class: org.elasticsearch.search.aggregations.bucket.terms.RareTermsIT - method: testSingleValuedString - issue: https://github.com/elastic/elasticsearch/issues/110388 - class: "org.elasticsearch.xpack.searchablesnapshots.FrozenSearchableSnapshotsIntegTests" issue: "https://github.com/elastic/elasticsearch/issues/110408" method: "testCreateAndRestorePartialSearchableSnapshot" -- class: "org.elasticsearch.xpack.security.role.RoleWithDescriptionRestIT" - issue: "https://github.com/elastic/elasticsearch/issues/110416" - method: "testCreateOrUpdateRoleWithDescription" -- class: "org.elasticsearch.xpack.security.role.RoleWithDescriptionRestIT" - issue: "https://github.com/elastic/elasticsearch/issues/110417" - method: "testCreateOrUpdateRoleWithDescription" -- class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT - method: test {p0=search.vectors/41_knn_search_half_byte_quantized/Test create, merge, and search cosine} - issue: https://github.com/elastic/elasticsearch/issues/109978 +- class: "org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT" + issue: "https://github.com/elastic/elasticsearch/issues/110591" +- class: org.elasticsearch.packaging.test.DockerTests + method: test021InstallPlugin + issue: https://github.com/elastic/elasticsearch/issues/110343 +- class: org.elasticsearch.multi_node.GlobalCheckpointSyncActionIT + issue: https://github.com/elastic/elasticsearch/issues/111124 +- class: org.elasticsearch.packaging.test.DockerTests + method: test600Interrupt + issue: https://github.com/elastic/elasticsearch/issues/111324 +- class: org.elasticsearch.xpack.transform.integration.TransformIT + method: testStopWaitForCheckpoint + issue: https://github.com/elastic/elasticsearch/issues/106113 # Examples: # diff --git a/plugins/examples/gradle/wrapper/gradle-wrapper.properties b/plugins/examples/gradle/wrapper/gradle-wrapper.properties index 515ab9d5f1822..efe2ff3449216 100644 --- a/plugins/examples/gradle/wrapper/gradle-wrapper.properties +++ b/plugins/examples/gradle/wrapper/gradle-wrapper.properties @@ -1,7 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionSha256Sum=f8b4f4772d302c8ff580bc40d0f56e715de69b163546944f787c87abf209c961 -distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-all.zip +distributionSha256Sum=258e722ec21e955201e31447b0aed14201765a3bfbae296a46cf60b70e66db70 +distributionUrl=https\://services.gradle.org/distributions/gradle-8.9-all.zip networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME diff --git a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/ConfigurationTests.java b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/ConfigurationTests.java index 1925b1e8f36ab..2ce9eef29d903 100644 --- a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/ConfigurationTests.java +++ b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/ConfigurationTests.java @@ -20,7 +20,6 @@ import static java.nio.file.attribute.PosixFilePermissions.fromString; import static org.elasticsearch.packaging.util.FileUtils.append; -import static org.hamcrest.Matchers.equalTo; import static org.junit.Assume.assumeFalse; public class ConfigurationTests extends PackagingTestCase { @@ -50,13 +49,15 @@ public void test20HostnameSubstitution() throws Exception { // security auto-config requires that the archive owner and the node process user be the same Platforms.onWindows(() -> sh.chown(confPath, installation.getOwner())); assertWhileRunning(() -> { - final String nameResponse = ServerUtils.makeRequest( - Request.Get("https://localhost:9200/_cat/nodes?h=name"), - "test_superuser", - "test_superuser_password", - ServerUtils.getCaCert(confPath) - ).strip(); - assertThat(nameResponse, equalTo("mytesthost")); + assertBusy(() -> { + final String nameResponse = ServerUtils.makeRequest( + Request.Get("https://localhost:9200/_cat/nodes?h=name"), + "test_superuser", + "test_superuser_password", + ServerUtils.getCaCert(confPath) + ).strip(); + assertEquals("mytesthost", nameResponse); + }); }); Platforms.onWindows(() -> sh.chown(confPath)); }); diff --git a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/DockerTests.java b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/DockerTests.java index f9723f30cc371..18668b842b2d3 100644 --- a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/DockerTests.java +++ b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/DockerTests.java @@ -1231,8 +1231,7 @@ public void test500Readiness() throws Exception { assertBusy(() -> assertTrue(readinessProbe(9399))); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/99508") - public void test600Interrupt() { + public void test600Interrupt() throws Exception { waitForElasticsearch(installation, "elastic", PASSWORD); final Result containerLogs = getContainerLogs(); @@ -1242,10 +1241,12 @@ public void test600Interrupt() { final int maxPid = infos.stream().map(i -> i.pid()).max(Integer::compareTo).get(); sh.run("bash -c 'kill -int " + maxPid + "'"); // send ctrl+c to all java processes - final Result containerLogsAfter = getContainerLogs(); - assertThat("Container logs should contain stopping ...", containerLogsAfter.stdout(), containsString("stopping ...")); - assertThat("No errors stdout", containerLogsAfter.stdout(), not(containsString("java.security.AccessControlException:"))); - assertThat("No errors stderr", containerLogsAfter.stderr(), not(containsString("java.security.AccessControlException:"))); + assertBusy(() -> { + final Result containerLogsAfter = getContainerLogs(); + assertThat("Container logs should contain stopping ...", containerLogsAfter.stdout(), containsString("stopping ...")); + assertThat("No errors stdout", containerLogsAfter.stdout(), not(containsString("java.security.AccessControlException:"))); + assertThat("No errors stderr", containerLogsAfter.stderr(), not(containsString("java.security.AccessControlException:"))); + }); } } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.delete_geoip_database.json b/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.delete_geoip_database.json new file mode 100644 index 0000000000000..ef6dc94dd27a6 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.delete_geoip_database.json @@ -0,0 +1,31 @@ +{ + "ingest.delete_geoip_database":{ + "documentation":{ + "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/TODO.html", + "description":"Deletes a geoip database configuration" + }, + "stability":"stable", + "visibility":"public", + "headers":{ + "accept": [ "application/json"] + }, + "url":{ + "paths":[ + { + "path":"/_ingest/geoip/database/{id}", + "methods":[ + "DELETE" + ], + "parts":{ + "id":{ + "type":"list", + "description":"A comma-separated list of geoip database configurations to delete" + } + } + } + ] + }, + "params":{ + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.get_geoip_database.json b/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.get_geoip_database.json new file mode 100644 index 0000000000000..96f028e2e5251 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.get_geoip_database.json @@ -0,0 +1,37 @@ +{ + "ingest.get_geoip_database":{ + "documentation":{ + "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/TODO.html", + "description":"Returns geoip database configuration." + }, + "stability":"stable", + "visibility":"public", + "headers":{ + "accept": [ "application/json"] + }, + "url":{ + "paths":[ + { + "path":"/_ingest/geoip/database", + "methods":[ + "GET" + ] + }, + { + "path":"/_ingest/geoip/database/{id}", + "methods":[ + "GET" + ], + "parts":{ + "id":{ + "type":"list", + "description":"A comma-separated list of geoip database configurations to get; use `*` to get all geoip database configurations" + } + } + } + ] + }, + "params":{ + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.put_geoip_database.json b/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.put_geoip_database.json new file mode 100644 index 0000000000000..07f9e37740279 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.put_geoip_database.json @@ -0,0 +1,35 @@ +{ + "ingest.put_geoip_database":{ + "documentation":{ + "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/TODO.html", + "description":"Puts the configuration for a geoip database to be downloaded" + }, + "stability":"stable", + "visibility":"public", + "headers":{ + "accept": [ "application/json"] + }, + "url":{ + "paths":[ + { + "path":"/_ingest/geoip/database/{id}", + "methods":[ + "PUT" + ], + "parts":{ + "id":{ + "type":"string", + "description":"The id of the database configuration" + } + } + } + ] + }, + "params":{ + }, + "body":{ + "description":"The database configuration definition", + "required":true + } + } +} diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml index 4976e5e15adbe..07cb154449a70 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml @@ -5,8 +5,8 @@ setup: capabilities: - method: PUT path: /{index} - capabilities: [logs_index_mode] - reason: "Support for 'logs' index mode capability required" + capabilities: [logsdb_index_mode] + reason: "Support for 'logsdb' index mode capability required" --- create logs index: @@ -15,8 +15,8 @@ create logs index: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: indices.create: @@ -24,7 +24,7 @@ create logs index: body: settings: index: - mode: logs + mode: logsdb number_of_replicas: 0 number_of_shards: 2 mappings: @@ -75,7 +75,7 @@ create logs index: index: test - is_true: test - - match: { test.settings.index.mode: "logs" } + - match: { test.settings.index.mode: "logsdb" } - do: indices.get_mapping: @@ -89,8 +89,8 @@ using default timestamp field mapping: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: indices.create: @@ -98,7 +98,7 @@ using default timestamp field mapping: body: settings: index: - mode: logs + mode: logsdb number_of_replicas: 0 number_of_shards: 2 mappings: @@ -121,17 +121,16 @@ missing hostname field: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: - catch: bad_request indices.create: index: test-hostname-missing body: settings: index: - mode: logs + mode: logsdb number_of_replicas: 0 number_of_shards: 2 mappings: @@ -147,9 +146,12 @@ missing hostname field: message: type: text - - match: { error.root_cause.0.type: "illegal_argument_exception" } - - match: { error.type: "illegal_argument_exception" } - - match: { error.reason: "unknown index sort field:[host.name]" } + - do: + indices.get_settings: + index: test-hostname-missing + + - is_true: test-hostname-missing + - match: { test-hostname-missing.settings.index.mode: "logsdb" } --- missing sort field: @@ -158,8 +160,8 @@ missing sort field: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: catch: bad_request @@ -168,7 +170,7 @@ missing sort field: body: settings: index: - mode: logs + mode: logsdb number_of_replicas: 0 number_of_shards: 2 sort: @@ -199,8 +201,8 @@ non-default sort settings: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: indices.create: @@ -209,7 +211,7 @@ non-default sort settings: settings: index: - mode: logs + mode: logsdb number_of_shards: 2 number_of_replicas: 0 sort: @@ -235,7 +237,7 @@ non-default sort settings: index: test-sort - is_true: test-sort - - match: { test-sort.settings.index.mode: "logs" } + - match: { test-sort.settings.index.mode: "logsdb" } - match: { test-sort.settings.index.sort.field.0: "agent_id" } - match: { test-sort.settings.index.sort.field.1: "@timestamp" } - match: { test-sort.settings.index.sort.order.0: "asc" } @@ -252,8 +254,8 @@ override sort order settings: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: indices.create: @@ -262,7 +264,7 @@ override sort order settings: settings: index: - mode: logs + mode: logsdb number_of_shards: 2 number_of_replicas: 0 sort: @@ -287,7 +289,7 @@ override sort order settings: index: test-sort-order - is_true: test-sort-order - - match: { test-sort-order.settings.index.mode: "logs" } + - match: { test-sort-order.settings.index.mode: "logsdb" } - match: { test-sort-order.settings.index.sort.field.0: null } - match: { test-sort-order.settings.index.sort.field.1: null } - match: { test-sort-order.settings.index.sort.order.0: "asc" } @@ -300,8 +302,8 @@ override sort missing settings: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: indices.create: @@ -310,7 +312,7 @@ override sort missing settings: settings: index: - mode: logs + mode: logsdb number_of_shards: 2 number_of_replicas: 0 sort: @@ -335,7 +337,7 @@ override sort missing settings: index: test-sort-missing - is_true: test-sort-missing - - match: { test-sort-missing.settings.index.mode: "logs" } + - match: { test-sort-missing.settings.index.mode: "logsdb" } - match: { test-sort-missing.settings.index.sort.field.0: null } - match: { test-sort-missing.settings.index.sort.field.1: null } - match: { test-sort-missing.settings.index.sort.missing.0: "_last" } @@ -348,8 +350,8 @@ override sort mode settings: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: indices.create: @@ -358,7 +360,7 @@ override sort mode settings: settings: index: - mode: logs + mode: logsdb number_of_shards: 2 number_of_replicas: 0 sort: @@ -383,7 +385,7 @@ override sort mode settings: index: test-sort-mode - is_true: test-sort-mode - - match: { test-sort-mode.settings.index.mode: "logs" } + - match: { test-sort-mode.settings.index.mode: "logsdb" } - match: { test-sort-mode.settings.index.sort.field.0: null } - match: { test-sort-mode.settings.index.sort.field.1: null } - match: { test-sort-mode.settings.index.sort.mode.0: "max" } @@ -397,8 +399,8 @@ override sort field using nested field type in sorting: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: catch: bad_request @@ -407,7 +409,7 @@ override sort field using nested field type in sorting: body: settings: index: - mode: logs + mode: logsdb number_of_replicas: 0 number_of_shards: 2 sort: @@ -444,8 +446,8 @@ override sort field using nested field type: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: indices.create: @@ -453,7 +455,7 @@ override sort field using nested field type: body: settings: index: - mode: logs + mode: logsdb number_of_replicas: 0 number_of_shards: 2 mappings: @@ -484,8 +486,8 @@ routing path not allowed in logs mode: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: catch: bad_request @@ -494,7 +496,7 @@ routing path not allowed in logs mode: body: settings: index: - mode: logs + mode: logsdb number_of_replicas: 0 number_of_shards: 2 routing_path: [ "host.name", "agent_id" ] @@ -524,8 +526,8 @@ start time not allowed in logs mode: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: catch: bad_request @@ -534,7 +536,7 @@ start time not allowed in logs mode: body: settings: index: - mode: logs + mode: logsdb number_of_replicas: 0 number_of_shards: 2 time_series: @@ -565,8 +567,8 @@ end time not allowed in logs mode: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: catch: bad_request @@ -575,7 +577,7 @@ end time not allowed in logs mode: body: settings: index: - mode: logs + mode: logsdb number_of_replicas: 0 number_of_shards: 2 time_series: diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/100_knn_nested_search.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/100_knn_nested_search.yml index 72c6abab22600..d255a644183dc 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/100_knn_nested_search.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/100_knn_nested_search.yml @@ -411,3 +411,53 @@ setup: - match: {hits.total.value: 1} - match: {hits.hits.0._id: "2"} +--- +"nested Knn search with required similarity appropriately filters inner_hits": + - requires: + cluster_features: "gte_v8.15.0" + reason: 'bugfix for 8.15' + + - do: + search: + index: test + body: + query: + nested: + path: nested + inner_hits: + size: 3 + _source: false + fields: + - nested.paragraph_id + query: + knn: + field: nested.vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + num_candidates: 3 + similarity: 10.5 + + - match: {hits.total.value: 1} + - match: {hits.hits.0._id: "2"} + - length: {hits.hits.0.inner_hits.nested.hits.hits: 1} + - match: {hits.hits.0.inner_hits.nested.hits.hits.0.fields.nested.0.paragraph_id.0: "0"} + + - do: + search: + index: test + body: + knn: + field: nested.vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + num_candidates: 3 + k: 3 + similarity: 10.5 + inner_hits: + size: 3 + _source: false + fields: + - nested.paragraph_id + + - match: {hits.total.value: 1} + - match: {hits.hits.0._id: "2"} + - length: {hits.hits.0.inner_hits.nested.hits.hits: 1} + - match: {hits.hits.0.inner_hits.nested.hits.hits.0.fields.nested.0.paragraph_id.0: "0"} diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/40_knn_search.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/40_knn_search.yml index 7f0c24e217d14..42472088cf76d 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/40_knn_search.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/40_knn_search.yml @@ -29,14 +29,24 @@ setup: m: 16 ef_construction: 200 + - do: + indices.create: + index: test_empty + body: + mappings: + properties: + vector: + type: dense_vector + + - do: index: index: test id: "1" body: name: cow.jpg - vector: [230.0, 300.33, -34.8988, 15.555, -200.0] - another_vector: [130.0, 115.0, -1.02, 15.555, -100.0] + vector: [ 230.0, 300.33, -34.8988, 15.555, -200.0 ] + another_vector: [ 130.0, 115.0, -1.02, 15.555, -100.0 ] - do: index: @@ -44,8 +54,8 @@ setup: id: "2" body: name: moose.jpg - vector: [-0.5, 100.0, -13, 14.8, -156.0] - another_vector: [-0.5, 50.0, -1, 1, 120] + vector: [ -0.5, 100.0, -13, 14.8, -156.0 ] + another_vector: [ -0.5, 50.0, -1, 1, 120 ] - do: index: @@ -53,11 +63,11 @@ setup: id: "3" body: name: rabbit.jpg - vector: [0.5, 111.3, -13.0, 14.8, -156.0] - another_vector: [-0.5, 11.0, 0, 12, 111.0] + vector: [ 0.5, 111.3, -13.0, 14.8, -156.0 ] + another_vector: [ -0.5, 11.0, 0, 12, 111.0 ] - do: - indices.refresh: {} + indices.refresh: { } --- "kNN search only": @@ -71,15 +81,15 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 - - match: {hits.hits.0._id: "2"} - - match: {hits.hits.0.fields.name.0: "moose.jpg"} + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.0.fields.name.0: "moose.jpg" } - - match: {hits.hits.1._id: "3"} - - match: {hits.hits.1.fields.name.0: "rabbit.jpg"} + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.1.fields.name.0: "rabbit.jpg" } --- "kNN multi-field search only": - requires: @@ -91,14 +101,14 @@ setup: body: fields: [ "name" ] knn: - - {field: vector, query_vector: [-0.5, 90.0, -10, 14.8, -156.0], k: 2, num_candidates: 3} - - {field: another_vector, query_vector: [-0.5, 11.0, 0, 12, 111.0], k: 2, num_candidates: 3} + - { field: vector, query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ], k: 2, num_candidates: 3 } + - { field: another_vector, query_vector: [ -0.5, 11.0, 0, 12, 111.0 ], k: 2, num_candidates: 3 } - - match: {hits.hits.0._id: "3"} - - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.0.fields.name.0: "rabbit.jpg" } - - match: {hits.hits.1._id: "2"} - - match: {hits.hits.1.fields.name.0: "moose.jpg"} + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1.fields.name.0: "moose.jpg" } --- "kNN search plus query": - requires: @@ -111,21 +121,21 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 query: term: name: cow.jpg - - match: {hits.hits.0._id: "1"} - - match: {hits.hits.0.fields.name.0: "cow.jpg"} + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0.fields.name.0: "cow.jpg" } - - match: {hits.hits.1._id: "2"} - - match: {hits.hits.1.fields.name.0: "moose.jpg"} + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1.fields.name.0: "moose.jpg" } - - match: {hits.hits.2._id: "3"} - - match: {hits.hits.2.fields.name.0: "rabbit.jpg"} + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2.fields.name.0: "rabbit.jpg" } --- "kNN multi-field search with query": - requires: @@ -137,20 +147,20 @@ setup: body: fields: [ "name" ] knn: - - {field: vector, query_vector: [-0.5, 90.0, -10, 14.8, -156.0], k: 2, num_candidates: 3} - - {field: another_vector, query_vector: [-0.5, 11.0, 0, 12, 111.0], k: 2, num_candidates: 3} + - { field: vector, query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ], k: 2, num_candidates: 3 } + - { field: another_vector, query_vector: [ -0.5, 11.0, 0, 12, 111.0 ], k: 2, num_candidates: 3 } query: term: name: cow.jpg - - match: {hits.hits.0._id: "3"} - - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.0.fields.name.0: "rabbit.jpg" } - - match: {hits.hits.1._id: "1"} - - match: {hits.hits.1.fields.name.0: "cow.jpg"} + - match: { hits.hits.1._id: "1" } + - match: { hits.hits.1.fields.name.0: "cow.jpg" } - - match: {hits.hits.2._id: "2"} - - match: {hits.hits.2.fields.name.0: "moose.jpg"} + - match: { hits.hits.2._id: "2" } + - match: { hits.hits.2.fields.name.0: "moose.jpg" } --- "kNN search with filter": - requires: @@ -163,16 +173,16 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 filter: term: name: "rabbit.jpg" - - match: {hits.total.value: 1} - - match: {hits.hits.0._id: "3"} - - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.0.fields.name.0: "rabbit.jpg" } - do: search: @@ -181,7 +191,7 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 filter: @@ -190,7 +200,7 @@ setup: - term: _id: 2 - - match: {hits.total.value: 0} + - match: { hits.total.value: 0 } --- "kNN search with explicit search_type": @@ -206,7 +216,7 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 @@ -216,7 +226,7 @@ setup: --- "kNN search in _knn_search endpoint": - skip: - features: ["allowed_warnings"] + features: [ "allowed_warnings" ] - do: allowed_warnings: - "The kNN search API has been replaced by the `knn` option in the search API." @@ -226,22 +236,22 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 - - match: {hits.hits.0._id: "2"} - - match: {hits.hits.0.fields.name.0: "moose.jpg"} + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.0.fields.name.0: "moose.jpg" } - - match: {hits.hits.1._id: "3"} - - match: {hits.hits.1.fields.name.0: "rabbit.jpg"} + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.1.fields.name.0: "rabbit.jpg" } --- "kNN search with filter in _knn_search endpoint": - requires: cluster_features: "gte_v8.2.0" reason: 'kNN with filtering added in 8.2' - test_runner_features: ["allowed_warnings"] + test_runner_features: [ "allowed_warnings" ] - do: allowed_warnings: - "The kNN search API has been replaced by the `knn` option in the search API." @@ -251,16 +261,16 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 filter: term: name: "rabbit.jpg" - - match: {hits.total.value: 1} - - match: {hits.hits.0._id: "3"} - - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.0.fields.name.0: "rabbit.jpg" } - do: allowed_warnings: @@ -271,7 +281,7 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 filter: @@ -280,7 +290,7 @@ setup: - term: _id: 2 - - match: {hits.total.value: 0} + - match: { hits.total.value: 0 } --- "Test nonexistent field": @@ -316,12 +326,12 @@ setup: k: 3 field: vector similarity: 11 - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] - - length: {hits.hits: 1} + - length: { hits.hits: 1 } - - match: {hits.hits.0._id: "2"} - - match: {hits.hits.0.fields.name.0: "moose.jpg"} + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.0.fields.name.0: "moose.jpg" } --- "Vector similarity with filter only": - requires: @@ -337,13 +347,13 @@ setup: k: 3 field: vector similarity: 11 - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] - filter: {"term": {"name": "moose.jpg"}} + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] + filter: { "term": { "name": "moose.jpg" } } - - length: {hits.hits: 1} + - length: { hits.hits: 1 } - - match: {hits.hits.0._id: "2"} - - match: {hits.hits.0.fields.name.0: "moose.jpg"} + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.0.fields.name.0: "moose.jpg" } - do: search: @@ -355,10 +365,10 @@ setup: k: 3 field: vector similarity: 110 - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] - filter: {"term": {"name": "cow.jpg"}} + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] + filter: { "term": { "name": "cow.jpg" } } - - length: {hits.hits: 0} + - length: { hits.hits: 0 } --- "Knn search with mip": - requires: @@ -390,7 +400,7 @@ setup: id: "1" body: name: cow.jpg - vector: [230.0, 300.33, -34.8988, 15.555, -200.0] + vector: [ 230.0, 300.33, -34.8988, 15.555, -200.0 ] - do: index: @@ -398,7 +408,7 @@ setup: id: "2" body: name: moose.jpg - vector: [-0.5, 100.0, -13, 14.8, -156.0] + vector: [ -0.5, 100.0, -13, 14.8, -156.0 ] - do: index: @@ -406,10 +416,10 @@ setup: id: "3" body: name: rabbit.jpg - vector: [0.5, 111.3, -13.0, 14.8, -156.0] + vector: [ 0.5, 111.3, -13.0, 14.8, -156.0 ] - do: - indices.refresh: {} + indices.refresh: { } - do: search: @@ -420,16 +430,16 @@ setup: num_candidates: 3 k: 3 field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] - - length: {hits.hits: 3} - - match: {hits.hits.0._id: "1"} - - close_to: {hits.hits.0._score: {value: 58694.902, error: 0.01}} - - match: {hits.hits.1._id: "3"} - - close_to: {hits.hits.1._score: {value: 34702.79, error: 0.01}} - - match: {hits.hits.2._id: "2"} - - close_to: {hits.hits.2._score: {value: 33686.29, error: 0.01}} + - length: { hits.hits: 3 } + - match: { hits.hits.0._id: "1" } + - close_to: { hits.hits.0._score: { value: 58694.902, error: 0.01 } } + - match: { hits.hits.1._id: "3" } + - close_to: { hits.hits.1._score: { value: 34702.79, error: 0.01 } } + - match: { hits.hits.2._id: "2" } + - close_to: { hits.hits.2._score: { value: 33686.29, error: 0.01 } } - do: search: @@ -440,14 +450,14 @@ setup: num_candidates: 3 k: 3 field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] filter: { "term": { "name": "moose.jpg" } } - - length: {hits.hits: 1} - - match: {hits.hits.0._id: "2"} - - close_to: {hits.hits.0._score: {value: 33686.29, error: 0.01}} + - length: { hits.hits: 1 } + - match: { hits.hits.0._id: "2" } + - close_to: { hits.hits.0._score: { value: 33686.29, error: 0.01 } } --- "Knn search with _name": - requires: @@ -462,7 +472,7 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 3 num_candidates: 3 _name: "my_knn_query" @@ -473,15 +483,34 @@ setup: _name: "my_query" - - match: {hits.hits.0._id: "1"} - - match: {hits.hits.0.fields.name.0: "cow.jpg"} - - match: {hits.hits.0.matched_queries.0: "my_knn_query"} - - match: {hits.hits.0.matched_queries.1: "my_query"} + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0.fields.name.0: "cow.jpg" } + - match: { hits.hits.0.matched_queries.0: "my_knn_query" } + - match: { hits.hits.0.matched_queries.1: "my_query" } + + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1.fields.name.0: "moose.jpg" } + - match: { hits.hits.1.matched_queries.0: "my_knn_query" } + + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2.fields.name.0: "rabbit.jpg" } + - match: { hits.hits.2.matched_queries.0: "my_knn_query" } + +--- +"kNN search on empty index should return 0 results and not an error": + - requires: + cluster_features: "gte_v8.15.1" + reason: 'Error fixed in 8.15.1' + - do: + search: + index: test_empty + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] + k: 2 + num_candidates: 3 - - match: {hits.hits.1._id: "2"} - - match: {hits.hits.1.fields.name.0: "moose.jpg"} - - match: {hits.hits.1.matched_queries.0: "my_knn_query"} + - match: { hits.total.value: 0 } - - match: {hits.hits.2._id: "3"} - - match: {hits.hits.2.fields.name.0: "rabbit.jpg"} - - match: {hits.hits.2.matched_queries.0: "my_knn_query"} diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized.yml index cb5aae482507a..5f1af2ca5c52f 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized.yml @@ -428,7 +428,7 @@ setup: index: hnsw_byte_quantized_merge_cosine id: "1" body: - embedding: [1.0, 1.0, 1.0, 1.0] + embedding: [0.5, 0.5, 0.5, 0.5, 0.5, 1.0] # Flush in order to provoke a merge later - do: @@ -439,7 +439,7 @@ setup: index: hnsw_byte_quantized_merge_cosine id: "2" body: - embedding: [1.0, 1.0, 1.0, 2.0] + embedding: [0.0, 0.0, 0.0, 1.0, 1.0, 0.5] # Flush in order to provoke a merge later - do: @@ -450,7 +450,7 @@ setup: index: hnsw_byte_quantized_merge_cosine id: "3" body: - embedding: [1.0, 1.0, 1.0, 3.0] + embedding: [0.0, 0.0, 0.0, 0.0, 0.0, 10.5] - do: indices.forcemerge: @@ -468,7 +468,7 @@ setup: query: knn: field: embedding - query_vector: [1.0, 1.0, 1.0, 1.0] + query_vector: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] num_candidates: 10 - length: { hits.hits: 3 } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/230_interval_query.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/230_interval_query.yml index 82fb18a879346..99bd001bd95e2 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/230_interval_query.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/230_interval_query.yml @@ -21,6 +21,10 @@ setup: - '{"text" : "Baby its cold there outside"}' - '{"index": {"_index": "test", "_id": "4"}}' - '{"text" : "Outside it is cold and wet"}' + - '{"index": {"_index": "test", "_id": "5"}}' + - '{"text" : "the big bad wolf"}' + - '{"index": {"_index": "test", "_id": "6"}}' + - '{"text" : "the big wolf"}' --- "Test ordered matching": @@ -444,4 +448,31 @@ setup: prefix: out - match: { hits.total.value: 3 } +--- +"Test rewrite disjunctions": + - do: + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - "match": + "query": "the" + - "any_of": + "intervals": + - "match": + "query": "big" + - "match": + "query": "big bad" + - "match": + "query": "wolf" + max_gaps: 0 + ordered: true + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "6" } + - match: { hits.hits.1._id: "5" } diff --git a/server/build.gradle b/server/build.gradle index e62abed2bc75a..2628158c7d14a 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -40,8 +40,7 @@ dependencies { implementation project(":libs:elasticsearch-simdvec") implementation project(':libs:elasticsearch-plugin-classloader') - // no compile dependency by server, but server defines security policy for this codebase so it i> - runtimeOnly project(":libs:elasticsearch-preallocate") + implementation project(":libs:elasticsearch-preallocate") // lucene api "org.apache.lucene:lucene-core:${versions.lucene}" diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/bulk/BulkAfterWriteFsyncFailureIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/bulk/BulkAfterWriteFsyncFailureIT.java index 5adc0b090ed37..d531686bb5207 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/action/bulk/BulkAfterWriteFsyncFailureIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/action/bulk/BulkAfterWriteFsyncFailureIT.java @@ -29,6 +29,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import static org.elasticsearch.index.IndexSettings.INDEX_REFRESH_INTERVAL_SETTING; +import static org.elasticsearch.indices.IndicesService.WRITE_DANGLING_INDICES_INFO_SETTING; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.is; @@ -48,6 +49,11 @@ public static void removeDisruptFSyncFS() { PathUtilsForTesting.teardown(); } + @Override + protected Settings nodeSettings() { + return Settings.builder().put(WRITE_DANGLING_INDICES_INFO_SETTING.getKey(), false).build(); + } + public void testFsyncFailureDoesNotAdvanceLocalCheckpoints() { String indexName = randomIdentifier(); client().admin() diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/CollapseSearchResultsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/CollapseSearchResultsIT.java index f5fdd752a6f57..aa721122c2160 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/CollapseSearchResultsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/CollapseSearchResultsIT.java @@ -61,4 +61,27 @@ public void testCollapseWithDocValueFields() { } ); } + + public void testCollapseWithFields() { + final String indexName = "test_collapse"; + createIndex(indexName); + final String collapseField = "collapse_field"; + final String otherField = "other_field"; + assertAcked(indicesAdmin().preparePutMapping(indexName).setSource(collapseField, "type=keyword", otherField, "type=keyword")); + index(indexName, "id_1_0", Map.of(collapseField, "value1", otherField, "other_value1")); + index(indexName, "id_1_1", Map.of(collapseField, "value1", otherField, "other_value2")); + index(indexName, "id_2_0", Map.of(collapseField, "value2", otherField, "other_value3")); + refresh(indexName); + + assertNoFailuresAndResponse( + prepareSearch(indexName).setQuery(new MatchAllQueryBuilder()) + .setFetchSource(false) + .addFetchField(otherField) + .setCollapse(new CollapseBuilder(collapseField).setInnerHits(new InnerHitBuilder("ih").setSize(2))), + searchResponse -> { + assertEquals(collapseField, searchResponse.getHits().getCollapseField()); + assertEquals(Set.of(new BytesRef("value1"), new BytesRef("value2")), Set.of(searchResponse.getHits().getCollapseValues())); + } + ); + } } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java index 0a6fceea9a3f1..d9d6979ffd710 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java @@ -2177,6 +2177,15 @@ public void testHighlightNoMatchSize() throws IOException { field.highlighterType("unified"); assertNotHighlighted(prepareSearch("test").highlighter(new HighlightBuilder().field(field)), 0, "text"); + + // Check when the requested fragment size equals the size of the string + var anotherText = "I am unusual and don't end with your regular )token)"; + indexDoc("test", "1", "text", anotherText); + refresh(); + for (String type : new String[] { "plain", "unified", "fvh" }) { + field.highlighterType(type).noMatchSize(anotherText.length()).numOfFragments(0); + assertHighlight(prepareSearch("test").highlighter(new HighlightBuilder().field(field)), 0, "text", 0, 1, equalTo(anotherText)); + } } public void testHighlightNoMatchSizeWithMultivaluedFields() throws IOException { diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index aaf8b3d0c8d84..bf5f88d264612 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -33,6 +33,7 @@ requires org.elasticsearch.grok; requires org.elasticsearch.tdigest; requires org.elasticsearch.simdvec; + requires org.elasticsearch.preallocate; requires com.sun.jna; requires hppc; @@ -431,6 +432,7 @@ org.elasticsearch.indices.IndicesFeatures, org.elasticsearch.action.admin.cluster.allocation.AllocationStatsFeatures, org.elasticsearch.index.mapper.MapperFeatures, + org.elasticsearch.ingest.IngestGeoIpFeatures, org.elasticsearch.search.SearchFeatures, org.elasticsearch.script.ScriptFeatures, org.elasticsearch.search.retriever.RetrieversFeatures, @@ -464,4 +466,5 @@ org.elasticsearch.serverless.shardhealth, org.elasticsearch.serverless.apifiltering; exports org.elasticsearch.lucene.spatial; + } diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 2004c6fda8ce5..9d57c50b17aaf 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -208,6 +208,9 @@ static TransportVersion def(int id) { public static final TransportVersion TEXT_SIMILARITY_RERANKER_RETRIEVER = def(8_699_00_0); public static final TransportVersion ML_INFERENCE_GOOGLE_VERTEX_AI_RERANKING_ADDED = def(8_700_00_0); public static final TransportVersion VERSIONED_MASTER_NODE_REQUESTS = def(8_701_00_0); + public static final TransportVersion ML_INFERENCE_AMAZON_BEDROCK_ADDED = def(8_702_00_0); + public static final TransportVersion ENTERPRISE_GEOIP_DOWNLOADER_BACKPORT_8_15 = def(8_702_00_1); + public static final TransportVersion FIX_VECTOR_SIMILARITY_INNER_HITS_BACKPORT_8_15 = def(8_702_00_2); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/Version.java b/server/src/main/java/org/elasticsearch/Version.java index b2c78453d9c75..54b9629365c90 100644 --- a/server/src/main/java/org/elasticsearch/Version.java +++ b/server/src/main/java/org/elasticsearch/Version.java @@ -123,6 +123,7 @@ public class Version implements VersionId, ToXContentFragment { public static final Version V_7_17_21 = new Version(7_17_21_99); public static final Version V_7_17_22 = new Version(7_17_22_99); public static final Version V_7_17_23 = new Version(7_17_23_99); + public static final Version V_7_17_24 = new Version(7_17_24_99); public static final Version V_8_0_0 = new Version(8_00_00_99); public static final Version V_8_0_1 = new Version(8_00_01_99); @@ -178,8 +179,10 @@ public class Version implements VersionId, ToXContentFragment { public static final Version V_8_14_0 = new Version(8_14_00_99); public static final Version V_8_14_1 = new Version(8_14_01_99); public static final Version V_8_14_2 = new Version(8_14_02_99); + public static final Version V_8_14_3 = new Version(8_14_03_99); public static final Version V_8_15_0 = new Version(8_15_00_99); - public static final Version CURRENT = V_8_15_0; + public static final Version V_8_15_1 = new Version(8_15_01_99); + public static final Version CURRENT = V_8_15_1; private static final NavigableMap VERSION_IDS; private static final Map VERSION_STRINGS; diff --git a/server/src/main/java/org/elasticsearch/action/ActionModule.java b/server/src/main/java/org/elasticsearch/action/ActionModule.java index b550755ce7bdd..a9c6894355cb6 100644 --- a/server/src/main/java/org/elasticsearch/action/ActionModule.java +++ b/server/src/main/java/org/elasticsearch/action/ActionModule.java @@ -30,6 +30,7 @@ import org.elasticsearch.action.admin.cluster.migration.TransportGetFeatureUpgradeStatusAction; import org.elasticsearch.action.admin.cluster.migration.TransportPostFeatureUpgradeAction; import org.elasticsearch.action.admin.cluster.node.capabilities.TransportNodesCapabilitiesAction; +import org.elasticsearch.action.admin.cluster.node.features.TransportNodesFeaturesAction; import org.elasticsearch.action.admin.cluster.node.hotthreads.TransportNodesHotThreadsAction; import org.elasticsearch.action.admin.cluster.node.info.TransportNodesInfoAction; import org.elasticsearch.action.admin.cluster.node.reload.TransportNodesReloadSecureSettingsAction; @@ -621,6 +622,7 @@ public void reg actions.register(TransportNodesInfoAction.TYPE, TransportNodesInfoAction.class); actions.register(TransportRemoteInfoAction.TYPE, TransportRemoteInfoAction.class); actions.register(TransportNodesCapabilitiesAction.TYPE, TransportNodesCapabilitiesAction.class); + actions.register(TransportNodesFeaturesAction.TYPE, TransportNodesFeaturesAction.class); actions.register(RemoteClusterNodesAction.TYPE, RemoteClusterNodesAction.TransportAction.class); actions.register(TransportNodesStatsAction.TYPE, TransportNodesStatsAction.class); actions.register(TransportNodesUsageAction.TYPE, TransportNodesUsageAction.class); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/NodeFeatures.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/NodeFeatures.java new file mode 100644 index 0000000000000..b33520624d114 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/NodeFeatures.java @@ -0,0 +1,42 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.node.features; + +import org.elasticsearch.action.support.nodes.BaseNodeResponse; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; + +import java.io.IOException; +import java.util.Set; + +public class NodeFeatures extends BaseNodeResponse { + + private final Set features; + + public NodeFeatures(StreamInput in) throws IOException { + super(in); + features = in.readCollectionAsImmutableSet(StreamInput::readString); + } + + public NodeFeatures(Set features, DiscoveryNode node) { + super(node); + this.features = Set.copyOf(features); + } + + public Set nodeFeatures() { + return features; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeCollection(features, StreamOutput::writeString); + } +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/NodesFeaturesRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/NodesFeaturesRequest.java new file mode 100644 index 0000000000000..83b6fff7cf2b2 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/NodesFeaturesRequest.java @@ -0,0 +1,17 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.node.features; + +import org.elasticsearch.action.support.nodes.BaseNodesRequest; + +public class NodesFeaturesRequest extends BaseNodesRequest { + public NodesFeaturesRequest(String... nodes) { + super(nodes); + } +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/NodesFeaturesResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/NodesFeaturesResponse.java new file mode 100644 index 0000000000000..0fca588216b15 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/NodesFeaturesResponse.java @@ -0,0 +1,35 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.node.features; + +import org.elasticsearch.action.FailedNodeException; +import org.elasticsearch.action.support.TransportAction; +import org.elasticsearch.action.support.nodes.BaseNodesResponse; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; + +import java.io.IOException; +import java.util.List; + +public class NodesFeaturesResponse extends BaseNodesResponse { + public NodesFeaturesResponse(ClusterName clusterName, List nodes, List failures) { + super(clusterName, nodes, failures); + } + + @Override + protected List readNodesFrom(StreamInput in) throws IOException { + return TransportAction.localOnly(); + } + + @Override + protected void writeNodesTo(StreamOutput out, List nodes) throws IOException { + TransportAction.localOnly(); + } +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/TransportNodesFeaturesAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/TransportNodesFeaturesAction.java new file mode 100644 index 0000000000000..d1b7a4f1b7e95 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/TransportNodesFeaturesAction.java @@ -0,0 +1,91 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.node.features; + +import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.FailedNodeException; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.nodes.TransportNodesAction; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.core.UpdateForV9; +import org.elasticsearch.features.FeatureService; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportRequest; +import org.elasticsearch.transport.TransportService; + +import java.io.IOException; +import java.util.List; + +@UpdateForV9 +// @UpdateForV10 // this can be removed in v10. It may be called by v8 nodes to v9 nodes. +public class TransportNodesFeaturesAction extends TransportNodesAction< + NodesFeaturesRequest, + NodesFeaturesResponse, + TransportNodesFeaturesAction.NodeFeaturesRequest, + NodeFeatures> { + + public static final ActionType TYPE = new ActionType<>("cluster:monitor/nodes/features"); + + private final FeatureService featureService; + + @Inject + public TransportNodesFeaturesAction( + ThreadPool threadPool, + ClusterService clusterService, + TransportService transportService, + ActionFilters actionFilters, + FeatureService featureService + ) { + super( + TYPE.name(), + clusterService, + transportService, + actionFilters, + NodeFeaturesRequest::new, + threadPool.executor(ThreadPool.Names.MANAGEMENT) + ); + this.featureService = featureService; + } + + @Override + protected NodesFeaturesResponse newResponse( + NodesFeaturesRequest request, + List responses, + List failures + ) { + return new NodesFeaturesResponse(clusterService.getClusterName(), responses, failures); + } + + @Override + protected NodeFeaturesRequest newNodeRequest(NodesFeaturesRequest request) { + return new NodeFeaturesRequest(); + } + + @Override + protected NodeFeatures newNodeResponse(StreamInput in, DiscoveryNode node) throws IOException { + return new NodeFeatures(in); + } + + @Override + protected NodeFeatures nodeOperation(NodeFeaturesRequest request, Task task) { + return new NodeFeatures(featureService.getNodeFeatures().keySet(), transportService.getLocalNode()); + } + + public static class NodeFeaturesRequest extends TransportRequest { + public NodeFeaturesRequest(StreamInput in) throws IOException { + super(in); + } + + public NodeFeaturesRequest() {} + } +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java index 28f970eb8c9fe..7cf1d55622b7c 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java @@ -331,8 +331,7 @@ private void loadRepositoryData( final SnapshotsInProgress.State state = switch (snapshotInfo.state()) { case FAILED -> SnapshotsInProgress.State.FAILED; case SUCCESS, PARTIAL -> - // Translating both PARTIAL and SUCCESS to SUCCESS for now - // TODO: add the differentiation on the metadata level in the next major release + // Both of these means the snapshot has completed. SnapshotsInProgress.State.SUCCESS; default -> throw new IllegalArgumentException("Unexpected snapshot state " + snapshotInfo.state()); }; diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/segments/IndicesSegmentResponse.java b/server/src/main/java/org/elasticsearch/action/admin/indices/segments/IndicesSegmentResponse.java index bd12cfdbc7962..429ebe365bbe1 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/segments/IndicesSegmentResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/segments/IndicesSegmentResponse.java @@ -36,7 +36,7 @@ public class IndicesSegmentResponse extends ChunkedBroadcastResponse { private volatile Map indicesSegments; - IndicesSegmentResponse( + public IndicesSegmentResponse( ShardSegments[] shards, int totalShards, int successfulShards, diff --git a/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java b/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java index 9ddac7f13eb51..7a33eaa59eb03 100644 --- a/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java +++ b/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java @@ -155,7 +155,8 @@ ShardSearchRequest rewriteShardSearchRequest(ShardSearchRequest request) { QueryBuilder query = new KnnScoreDocQueryBuilder( scoreDocs.toArray(Lucene.EMPTY_SCORE_DOCS), source.knnSearch().get(i).getField(), - source.knnSearch().get(i).getQueryVector() + source.knnSearch().get(i).getQueryVector(), + source.knnSearch().get(i).getSimilarity() ).boost(source.knnSearch().get(i).boost()).queryName(source.knnSearch().get(i).queryName()); if (nestedPath != null) { query = new NestedQueryBuilder(nestedPath, query, ScoreMode.Max).innerHit(source.knnSearch().get(i).innerHit()); diff --git a/server/src/main/java/org/elasticsearch/action/search/ExpandSearchPhase.java b/server/src/main/java/org/elasticsearch/action/search/ExpandSearchPhase.java index e8470ba77632f..e2385745149c1 100644 --- a/server/src/main/java/org/elasticsearch/action/search/ExpandSearchPhase.java +++ b/server/src/main/java/org/elasticsearch/action/search/ExpandSearchPhase.java @@ -100,6 +100,10 @@ private void doRun() { if (hit.getInnerHits() == null) { hit.setInnerHits(Maps.newMapWithExpectedSize(innerHitBuilders.size())); } + if (hit.isPooled() == false) { + // TODO: make this work pooled by forcing the hit itself to become pooled as needed here + innerHits = innerHits.asUnpooled(); + } hit.getInnerHits().put(innerHitBuilder.getName(), innerHits); assert innerHits.isPooled() == false || hit.isPooled() : "pooled inner hits can only be added to a pooled hit"; innerHits.mustIncRef(); diff --git a/server/src/main/java/org/elasticsearch/bootstrap/Elasticsearch.java b/server/src/main/java/org/elasticsearch/bootstrap/Elasticsearch.java index 082e1dd9257e0..072e2eed42c20 100644 --- a/server/src/main/java/org/elasticsearch/bootstrap/Elasticsearch.java +++ b/server/src/main/java/org/elasticsearch/bootstrap/Elasticsearch.java @@ -40,6 +40,7 @@ import org.elasticsearch.nativeaccess.NativeAccess; import org.elasticsearch.node.Node; import org.elasticsearch.node.NodeValidationException; +import org.elasticsearch.preallocate.Preallocate; import java.io.IOException; import java.io.InputStream; @@ -195,7 +196,8 @@ private static void initPhase2(Bootstrap bootstrap) throws IOException { SubscribableListener.class, RunOnce.class, // We eagerly initialize to work around log4j permissions & JDK-8309727 - VectorUtil.class + VectorUtil.class, + Preallocate.class ); // install SM after natives, shutdown hooks, etc. @@ -209,7 +211,7 @@ private static void initPhase2(Bootstrap bootstrap) throws IOException { private static void ensureInitialized(Class... classes) { for (final var clazz : classes) { try { - MethodHandles.publicLookup().ensureInitialized(clazz); + MethodHandles.lookup().ensureInitialized(clazz); } catch (IllegalAccessException unexpected) { throw new AssertionError(unexpected); } diff --git a/server/src/main/java/org/elasticsearch/cluster/ClusterState.java b/server/src/main/java/org/elasticsearch/cluster/ClusterState.java index f9294210e0a6a..c54269da68507 100644 --- a/server/src/main/java/org/elasticsearch/cluster/ClusterState.java +++ b/server/src/main/java/org/elasticsearch/cluster/ClusterState.java @@ -884,6 +884,11 @@ public Map> nodeFeatures() { return Collections.unmodifiableMap(this.nodeFeatures); } + public Builder putNodeFeatures(String node, Set features) { + this.nodeFeatures.put(node, features); + return this; + } + public Builder routingTable(RoutingTable.Builder routingTableBuilder) { return routingTable(routingTableBuilder.build()); } diff --git a/server/src/main/java/org/elasticsearch/cluster/features/NodeFeaturesFixupListener.java b/server/src/main/java/org/elasticsearch/cluster/features/NodeFeaturesFixupListener.java new file mode 100644 index 0000000000000..c8b2555c0f15d --- /dev/null +++ b/server/src/main/java/org/elasticsearch/cluster/features/NodeFeaturesFixupListener.java @@ -0,0 +1,217 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.cluster.features; + +import org.elasticsearch.Version; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.FailedNodeException; +import org.elasticsearch.action.admin.cluster.node.features.NodeFeatures; +import org.elasticsearch.action.admin.cluster.node.features.NodesFeaturesRequest; +import org.elasticsearch.action.admin.cluster.node.features.NodesFeaturesResponse; +import org.elasticsearch.action.admin.cluster.node.features.TransportNodesFeaturesAction; +import org.elasticsearch.client.internal.ClusterAdminClient; +import org.elasticsearch.cluster.ClusterChangedEvent; +import org.elasticsearch.cluster.ClusterFeatures; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.ClusterStateListener; +import org.elasticsearch.cluster.ClusterStateTaskExecutor; +import org.elasticsearch.cluster.ClusterStateTaskListener; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.cluster.service.MasterServiceTaskQueue; +import org.elasticsearch.common.Priority; +import org.elasticsearch.common.util.set.Sets; +import org.elasticsearch.core.SuppressForbidden; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.core.UpdateForV9; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; +import org.elasticsearch.threadpool.Scheduler; +import org.elasticsearch.threadpool.ThreadPool; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.Executor; +import java.util.stream.Collectors; + +@UpdateForV9 // this can be removed in v9 +public class NodeFeaturesFixupListener implements ClusterStateListener { + + private static final Logger logger = LogManager.getLogger(NodeFeaturesFixupListener.class); + + private static final TimeValue RETRY_TIME = TimeValue.timeValueSeconds(30); + + private final MasterServiceTaskQueue taskQueue; + private final ClusterAdminClient client; + private final Scheduler scheduler; + private final Executor executor; + private final Set pendingNodes = Collections.synchronizedSet(new HashSet<>()); + + public NodeFeaturesFixupListener(ClusterService service, ClusterAdminClient client, ThreadPool threadPool) { + // there tends to be a lot of state operations on an upgrade - this one is not time-critical, + // so use LOW priority. It just needs to be run at some point after upgrade. + this( + service.createTaskQueue("fix-node-features", Priority.LOW, new NodesFeaturesUpdater()), + client, + threadPool, + threadPool.executor(ThreadPool.Names.CLUSTER_COORDINATION) + ); + } + + NodeFeaturesFixupListener( + MasterServiceTaskQueue taskQueue, + ClusterAdminClient client, + Scheduler scheduler, + Executor executor + ) { + this.taskQueue = taskQueue; + this.client = client; + this.scheduler = scheduler; + this.executor = executor; + } + + class NodesFeaturesTask implements ClusterStateTaskListener { + private final Map> results; + private final int retryNum; + + NodesFeaturesTask(Map> results, int retryNum) { + this.results = results; + this.retryNum = retryNum; + } + + @Override + public void onFailure(Exception e) { + logger.error("Could not apply features for nodes {} to cluster state", results.keySet(), e); + scheduleRetry(results.keySet(), retryNum); + } + + public Map> results() { + return results; + } + } + + static class NodesFeaturesUpdater implements ClusterStateTaskExecutor { + @Override + public ClusterState execute(BatchExecutionContext context) { + ClusterState.Builder builder = ClusterState.builder(context.initialState()); + var existingFeatures = builder.nodeFeatures(); + + boolean modified = false; + for (var c : context.taskContexts()) { + for (var e : c.getTask().results().entrySet()) { + // double check there are still no features for the node + if (existingFeatures.getOrDefault(e.getKey(), Set.of()).isEmpty()) { + builder.putNodeFeatures(e.getKey(), e.getValue()); + modified = true; + } + } + c.success(() -> {}); + } + return modified ? builder.build() : context.initialState(); + } + } + + @Override + public void clusterChanged(ClusterChangedEvent event) { + if (event.nodesDelta().masterNodeChanged() && event.localNodeMaster()) { + /* + * Execute this if we have just become master. + * Check if there are any nodes that should have features in cluster state, but don't. + * This can happen if the master was upgraded from before 8.13, and one or more non-master nodes + * were already upgraded. They don't re-join the cluster with the new master, so never get their features + * (which the master now understands) added to cluster state. + * So we need to do a separate transport call to get the node features and add them to cluster state. + * We can't use features to determine when this should happen, as the features are incorrect. + * We also can't use transport version, as that is unreliable for upgrades + * from versions before 8.8 (see TransportVersionFixupListener). + * So the only thing we can use is release version. + * This is ok here, as Serverless will never hit this case, so the node feature fetch action will never be called on Serverless. + * This whole class will be removed in ES v9. + */ + ClusterFeatures nodeFeatures = event.state().clusterFeatures(); + Set queryNodes = event.state() + .nodes() + .stream() + .filter(n -> n.getVersion().onOrAfter(Version.V_8_15_0)) + .map(DiscoveryNode::getId) + .filter(n -> getNodeFeatures(nodeFeatures, n).isEmpty()) + .collect(Collectors.toSet()); + + if (queryNodes.isEmpty() == false) { + logger.debug("Fetching actual node features for nodes {}", queryNodes); + queryNodesFeatures(queryNodes, 0); + } + } + } + + @SuppressForbidden(reason = "Need to access a specific node's features") + private static Set getNodeFeatures(ClusterFeatures features, String nodeId) { + return features.nodeFeatures().getOrDefault(nodeId, Set.of()); + } + + private void scheduleRetry(Set nodes, int thisRetryNum) { + // just keep retrying until this succeeds + logger.debug("Scheduling retry {} for nodes {}", thisRetryNum + 1, nodes); + scheduler.schedule(() -> queryNodesFeatures(nodes, thisRetryNum + 1), RETRY_TIME, executor); + } + + private void queryNodesFeatures(Set nodes, int retryNum) { + // some might already be in-progress + Set outstandingNodes = Sets.newHashSetWithExpectedSize(nodes.size()); + synchronized (pendingNodes) { + for (String n : nodes) { + if (pendingNodes.add(n)) { + outstandingNodes.add(n); + } + } + } + if (outstandingNodes.isEmpty()) { + // all nodes already have in-progress requests + return; + } + + NodesFeaturesRequest request = new NodesFeaturesRequest(outstandingNodes.toArray(String[]::new)); + client.execute(TransportNodesFeaturesAction.TYPE, request, new ActionListener<>() { + @Override + public void onResponse(NodesFeaturesResponse response) { + pendingNodes.removeAll(outstandingNodes); + handleResponse(response, retryNum); + } + + @Override + public void onFailure(Exception e) { + pendingNodes.removeAll(outstandingNodes); + logger.warn("Could not read features for nodes {}", outstandingNodes, e); + scheduleRetry(outstandingNodes, retryNum); + } + }); + } + + private void handleResponse(NodesFeaturesResponse response, int retryNum) { + if (response.hasFailures()) { + Set failedNodes = new HashSet<>(); + for (FailedNodeException fne : response.failures()) { + logger.warn("Failed to read features from node {}", fne.nodeId(), fne); + failedNodes.add(fne.nodeId()); + } + scheduleRetry(failedNodes, retryNum); + } + // carry on and read what we can + + Map> results = response.getNodes() + .stream() + .collect(Collectors.toUnmodifiableMap(n -> n.getNode().getId(), NodeFeatures::nodeFeatures)); + + if (results.isEmpty() == false) { + taskQueue.submitTask("fix-node-features", new NodesFeaturesTask(results, retryNum), null); + } + } +} diff --git a/server/src/main/java/org/elasticsearch/common/filesystem/FileSystemNatives.java b/server/src/main/java/org/elasticsearch/common/filesystem/FileSystemNatives.java index 00502d64b3896..3bcd76778e032 100644 --- a/server/src/main/java/org/elasticsearch/common/filesystem/FileSystemNatives.java +++ b/server/src/main/java/org/elasticsearch/common/filesystem/FileSystemNatives.java @@ -37,8 +37,8 @@ private static Provider loadJnaProvider() { Class.forName("com.sun.jna.Native"); if (Constants.WINDOWS) { return WindowsFileSystemNatives.getInstance(); - } else if (Constants.LINUX && Constants.JRE_IS_64BIT) { - return LinuxFileSystemNatives.getInstance(); + } else if (Constants.JRE_IS_64BIT) { + return PosixFileSystemNatives.getInstance(); } } catch (ClassNotFoundException e) { logger.warn("JNA not found. FileSystemNatives methods will be disabled.", e); diff --git a/server/src/main/java/org/elasticsearch/common/filesystem/LinuxFileSystemNatives.java b/server/src/main/java/org/elasticsearch/common/filesystem/LinuxFileSystemNatives.java deleted file mode 100644 index b40fb5c2e145b..0000000000000 --- a/server/src/main/java/org/elasticsearch/common/filesystem/LinuxFileSystemNatives.java +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the Server Side Public License, v 1; you may not use this file except - * in compliance with, at your election, the Elastic License 2.0 or the Server - * Side Public License, v 1. - */ - -package org.elasticsearch.common.filesystem; - -import com.sun.jna.LastErrorException; -import com.sun.jna.Native; -import com.sun.jna.Platform; -import com.sun.jna.Structure; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.lucene.util.Constants; - -import java.nio.file.Files; -import java.nio.file.Path; -import java.time.Instant; -import java.util.OptionalLong; - -import static org.elasticsearch.core.Strings.format; - -/** - * {@link FileSystemNatives.Provider} implementation for Linux x86-64bits - */ -final class LinuxFileSystemNatives implements FileSystemNatives.Provider { - - private static final Logger logger = LogManager.getLogger(LinuxFileSystemNatives.class); - - private static final LinuxFileSystemNatives INSTANCE = new LinuxFileSystemNatives(); - - /** st_blocks field indicates the number of blocks allocated to the file, 512-byte units **/ - private static final long ST_BLOCKS_UNIT = 512L; - - /** - * Version of the `struct stat' data structure. - * - * To allow the `struct stat' structure bits to vary without changing shared library major version number, the `stat' function is often - * an inline wrapper around `xstat' which takes a leading version-number argument designating the data structure and bits used. - * - * In glibc this version is defined in bits/stat.h (or bits/struct_stat.h in glibc 2.33, or bits/xstatver.h in more recent versions). - * - * For x86-64 the _STAT_VER used is: - * # define _STAT_VER_LINUX 1 - * # define _STAT_VER _STAT_VER_LINUX - * - * For other architectures the _STAT_VER used is: - * # define _STAT_VER_LINUX 0 - * # define _STAT_VER _STAT_VER_LINUX - **/ - private static int loadStatVersion() { - return "aarch64".equalsIgnoreCase(Constants.OS_ARCH) ? 0 : 1; - } - - private static final int STAT_VER = loadStatVersion(); - - private LinuxFileSystemNatives() { - assert Constants.LINUX : Constants.OS_NAME; - assert Constants.JRE_IS_64BIT : Constants.OS_ARCH; - try { - Native.register(XStatLibrary.class, Platform.C_LIBRARY_NAME); - logger.debug("C library loaded"); - } catch (LinkageError e) { - logger.warn("unable to link C library. native methods and handlers will be disabled.", e); - throw e; - } - } - - static LinuxFileSystemNatives getInstance() { - return INSTANCE; - } - - public static class XStatLibrary { - public static native int __xstat(int version, String path, Stat stats) throws LastErrorException; - } - - /** - * Retrieves the actual number of bytes of disk storage used to store a specified file. - * - * @param path the path to the file - * @return an {@link OptionalLong} that contains the number of allocated bytes on disk for the file, or empty if the size is invalid - */ - @Override - public OptionalLong allocatedSizeInBytes(Path path) { - assert Files.isRegularFile(path) : path; - try { - final Stat stats = new Stat(); - final int rc = XStatLibrary.__xstat(STAT_VER, path.toString(), stats); - if (logger.isTraceEnabled()) { - logger.trace("executing native method __xstat() returned {} with error code [{}] for file [{}]", stats, rc, path); - } - return OptionalLong.of(stats.st_blocks * ST_BLOCKS_UNIT); - } catch (LastErrorException e) { - logger.warn( - () -> format( - "error when executing native method __xstat(int vers, const char *name, struct stat *buf) for file [%s]", - path - ), - e - ); - } - return OptionalLong.empty(); - } - - @Structure.FieldOrder( - { - "st_dev", - "st_ino", - "st_nlink", - "st_mode", - "st_uid", - "st_gid", - "__pad0", - "st_rdev", - "st_size", - "st_blksize", - "st_blocks", - "st_atim", - "st_mtim", - "st_ctim", - "__glibc_reserved0", - "__glibc_reserved1", - "__glibc_reserved2" } - ) - public static class Stat extends Structure { - - /** - * The stat structure varies across architectures in the glibc and kernel source codes. For example some fields might be ordered - * differently and/or some padding bytes might be present between some fields. - * - * The struct implemented here refers to the Linux x86 architecture in the glibc source files: - * - glibc version 2.23: sysdeps/unix/sysv/linux/x86/bits/stat.h - * - glibc version 2.33: sysdeps/unix/sysv/linux/x86/bits/struct_stat.h - * - * The following command is useful to compile the stat struct on a given system: - * echo "#include <sys/stat.h>" | gcc -xc - -E -dD | grep -ve '^$' | grep -A23 '^struct stat' - */ - public long st_dev; // __dev_t st_dev; /* Device. */ - public long st_ino; // __ino_t st_ino; /* File serial number. */ - public long st_nlink; // __nlink_t st_nlink; /* Link count. */ - public int st_mode; // __mode_t st_mode; /* File mode. */ - public int st_uid; // __uid_t st_uid; /* User ID of the file's owner. */ - public int st_gid; // __gid_t st_gid; /* Group ID of the file's group. */ - public int __pad0; - public long st_rdev; // __dev_t st_rdev; /* Device number, if device. */ - public long st_size; // __off_t st_size; /* Size of file, in bytes. */ - public long st_blksize; // __blksize_t st_blksize; /* Optimal block size for I/O. */ - public long st_blocks; // __blkcnt_t st_blocks; /* Number 512-byte blocks allocated. */ - public Time st_atim; // struct timespec st_atim; /* Time of last access. */ - public Time st_mtim; // struct timespec st_mtim; /* Time of last modification. */ - public Time st_ctim; // struct timespec st_ctim; /* Time of last status change. */ - public long __glibc_reserved0; // __syscall_slong_t - public long __glibc_reserved1; // __syscall_slong_t - public long __glibc_reserved2; // __syscall_slong_t - - @Override - public String toString() { - return "[st_dev=" - + st_dev - + ", st_ino=" - + st_ino - + ", st_nlink=" - + st_nlink - + ", st_mode=" - + st_mode - + ", st_uid=" - + st_uid - + ", st_gid=" - + st_gid - + ", st_rdev=" - + st_rdev - + ", st_size=" - + st_size - + ", st_blksize=" - + st_blksize - + ", st_blocks=" - + st_blocks - + ", st_atim=" - + Instant.ofEpochSecond(st_atim.tv_sec, st_atim.tv_nsec) - + ", st_mtim=" - + Instant.ofEpochSecond(st_mtim.tv_sec, st_mtim.tv_nsec) - + ", st_ctim=" - + Instant.ofEpochSecond(st_ctim.tv_sec, st_ctim.tv_nsec) - + ']'; - } - } - - @Structure.FieldOrder({ "tv_sec", "tv_nsec" }) - public static class Time extends Structure { - public long tv_sec; - public long tv_nsec; - } -} diff --git a/server/src/main/java/org/elasticsearch/common/filesystem/PosixFileSystemNatives.java b/server/src/main/java/org/elasticsearch/common/filesystem/PosixFileSystemNatives.java new file mode 100644 index 0000000000000..0ebe31ed32c83 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/common/filesystem/PosixFileSystemNatives.java @@ -0,0 +1,137 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.common.filesystem; + +import com.sun.jna.LastErrorException; +import com.sun.jna.Library; +import com.sun.jna.Memory; +import com.sun.jna.Native; +import com.sun.jna.NativeLibrary; +import com.sun.jna.Platform; +import com.sun.jna.Pointer; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.util.Constants; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.OptionalLong; + +import static org.elasticsearch.core.Strings.format; + +/** + * {@link FileSystemNatives.Provider} implementation for Linux x86-64bits + */ +final class PosixFileSystemNatives implements FileSystemNatives.Provider { + + private static final Logger logger = LogManager.getLogger(PosixFileSystemNatives.class); + + private static final PosixFileSystemNatives INSTANCE = new PosixFileSystemNatives(); + + /** st_blocks field indicates the number of blocks allocated to the file, 512-byte units **/ + private static final long ST_BLOCKS_UNIT = 512L; + + /** + * Version of the `struct stat' data structure. + * + * To allow the `struct stat' structure bits to vary without changing shared library major version number, the `stat' function is often + * an inline wrapper around `xstat' which takes a leading version-number argument designating the data structure and bits used. + * + * In glibc this version is defined in bits/stat.h (or bits/struct_stat.h in glibc 2.33, or bits/xstatver.h in more recent versions). + * + * For x86-64 the _STAT_VER used is: + * # define _STAT_VER_LINUX 1 + * # define _STAT_VER _STAT_VER_LINUX + * + * For other architectures the _STAT_VER used is: + * # define _STAT_VER_LINUX 0 + * # define _STAT_VER _STAT_VER_LINUX + **/ + private static int loadStatVersion() { + return "aarch64".equalsIgnoreCase(Constants.OS_ARCH) ? 0 : 1; + } + + private final Stat64Library lib; + + private PosixFileSystemNatives() { + assert Constants.JRE_IS_64BIT : Constants.OS_ARCH; + Stat64Library statFunction; + try { + var libc = NativeLibrary.getInstance(Platform.C_LIBRARY_NAME); + libc.getFunction("stat64"); + // getfunction didn't fail, so the symbol is available + statFunction = Native.load(Platform.C_LIBRARY_NAME, Stat64Library.class); + } catch (UnsatisfiedLinkError e) { + var xstat = Native.load(Platform.C_LIBRARY_NAME, XStatLibrary.class); + var version = loadStatVersion(); + statFunction = (path, stats) -> xstat.__xstat(version, path, stats); + } + lib = statFunction; + logger.debug("C library loaded"); + } + + static PosixFileSystemNatives getInstance() { + return INSTANCE; + } + + public interface Stat64Library extends Library { + int stat64(String path, Pointer stats) throws LastErrorException; + } + + public interface XStatLibrary extends Library { + int __xstat(int version, String path, Pointer stats) throws LastErrorException; + } + + /** + * Retrieves the actual number of bytes of disk storage used to store a specified file. + * + * @param path the path to the file + * @return an {@link OptionalLong} that contains the number of allocated bytes on disk for the file, or empty if the size is invalid + */ + @Override + public OptionalLong allocatedSizeInBytes(Path path) { + assert Files.isRegularFile(path) : path; + try { + final Stat stats = new Stat(Constants.LINUX ? 64 : 104); + final int rc = lib.stat64(path.toString(), stats.memory); + if (logger.isTraceEnabled()) { + logger.trace("executing native method __xstat() returned {} with error code [{}] for file [{}]", stats, rc, path); + } + return OptionalLong.of(stats.getBlocks() * ST_BLOCKS_UNIT); + } catch (LastErrorException e) { + logger.warn( + () -> format( + "error when executing native method __xstat(int vers, const char *name, struct stat *buf) for file [%s]", + path + ), + e + ); + } + return OptionalLong.empty(); + } + + public static class Stat { + final Memory memory = new Memory(144); + final int blocksOffset; + + Stat(int blocksOffset) { + this.blocksOffset = blocksOffset; + } + + public long getBlocks() { + return memory.getLong(blocksOffset); + } + + @Override + public String toString() { + return "Stat [blocks=" + getBlocks() + "]"; + } + } +} diff --git a/server/src/main/java/org/elasticsearch/index/IndexMode.java b/server/src/main/java/org/elasticsearch/index/IndexMode.java index 3df5b3fe288a2..bc8bf8103fa08 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexMode.java +++ b/server/src/main/java/org/elasticsearch/index/IndexMode.java @@ -20,6 +20,7 @@ import org.elasticsearch.index.mapper.DocumentDimensions; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.IdFieldMapper; +import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MappingLookup; import org.elasticsearch.index.mapper.MetadataFieldMapper; @@ -222,7 +223,7 @@ public boolean isSyntheticSourceEnabled() { return true; } }, - LOGS("logs") { + LOGSDB("logsdb") { @Override void validateWithOtherSettings(Map, Object> settings) { IndexMode.validateTimeSeriesSettings(settings); @@ -345,6 +346,10 @@ protected static String tsdbMode() { .startObject(DataStreamTimestampFieldMapper.DEFAULT_PATH) .field("type", DateFieldMapper.CONTENT_TYPE) .endObject() + .startObject("host.name") + .field("type", KeywordFieldMapper.CONTENT_TYPE) + .field("ignore_above", 1024) + .endObject() .endObject() .endObject()) ); @@ -464,7 +469,7 @@ public static IndexMode fromString(String value) { return switch (value) { case "standard" -> IndexMode.STANDARD; case "time_series" -> IndexMode.TIME_SERIES; - case "logs" -> IndexMode.LOGS; + case "logsdb" -> IndexMode.LOGSDB; default -> throw new IllegalArgumentException( "[" + value diff --git a/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java b/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java index f190462d6d1e9..a11a51ef7ad62 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSortConfig.java @@ -153,14 +153,14 @@ public IndexSortConfig(IndexSettings indexSettings) { } List fields = INDEX_SORT_FIELD_SETTING.get(settings); - if (this.indexMode == IndexMode.LOGS && fields.isEmpty()) { + if (this.indexMode == IndexMode.LOGSDB && fields.isEmpty()) { fields = List.of("host.name", DataStream.TIMESTAMP_FIELD_NAME); } this.sortSpecs = fields.stream().map(FieldSortSpec::new).toArray(FieldSortSpec[]::new); if (INDEX_SORT_ORDER_SETTING.exists(settings)) { List orders = INDEX_SORT_ORDER_SETTING.get(settings); - if (this.indexMode == IndexMode.LOGS && orders.isEmpty()) { + if (this.indexMode == IndexMode.LOGSDB && orders.isEmpty()) { orders = List.of(SortOrder.DESC, SortOrder.DESC); } if (orders.size() != sortSpecs.length) { @@ -175,7 +175,7 @@ public IndexSortConfig(IndexSettings indexSettings) { if (INDEX_SORT_MODE_SETTING.exists(settings)) { List modes = INDEX_SORT_MODE_SETTING.get(settings); - if (this.indexMode == IndexMode.LOGS && modes.isEmpty()) { + if (this.indexMode == IndexMode.LOGSDB && modes.isEmpty()) { modes = List.of(MultiValueMode.MIN, MultiValueMode.MIN); } if (modes.size() != sortSpecs.length) { @@ -188,7 +188,7 @@ public IndexSortConfig(IndexSettings indexSettings) { if (INDEX_SORT_MISSING_SETTING.exists(settings)) { List missingValues = INDEX_SORT_MISSING_SETTING.get(settings); - if (this.indexMode == IndexMode.LOGS && missingValues.isEmpty()) { + if (this.indexMode == IndexMode.LOGSDB && missingValues.isEmpty()) { missingValues = List.of("_first", "_first"); } if (missingValues.size() != sortSpecs.length) { diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java index 0b4bb9dfc10ae..1228c908f7c18 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java @@ -123,7 +123,7 @@ private boolean isTimeSeriesModeIndex() { } private boolean isLogsModeIndex() { - return mapperService != null && IndexMode.LOGS == mapperService.getIndexSettings().getMode(); + return mapperService != null && IndexMode.LOGSDB == mapperService.getIndexSettings().getMode(); } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorsFormat.java index 659cc89bfe46d..de91833c99842 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorsFormat.java @@ -16,11 +16,11 @@ import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.VectorSimilarityFunction; -import org.apache.lucene.util.VectorUtil; import org.apache.lucene.util.hnsw.RandomAccessVectorValues; import org.apache.lucene.util.hnsw.RandomVectorScorer; import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier; import org.apache.lucene.util.quantization.RandomAccessQuantizedByteVectorValues; +import org.elasticsearch.script.field.vectors.ESVectorUtil; import java.io.IOException; @@ -100,7 +100,7 @@ public RandomVectorScorer getRandomVectorScorer( } static float hammingScore(byte[] a, byte[] b) { - return ((a.length * Byte.SIZE) - VectorUtil.xorBitCount(a, b)) / (float) (a.length * Byte.SIZE); + return ((a.length * Byte.SIZE) - ESVectorUtil.xorBitCount(a, b)) / (float) (a.length * Byte.SIZE); } static class HammingVectorScorer extends RandomVectorScorer.AbstractRandomVectorScorer { diff --git a/server/src/main/java/org/elasticsearch/index/engine/Engine.java b/server/src/main/java/org/elasticsearch/index/engine/Engine.java index 6f4511483126f..b07132eea75e8 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/Engine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/Engine.java @@ -61,7 +61,6 @@ import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.VersionType; import org.elasticsearch.index.mapper.DocumentParser; -import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.FieldNamesFieldMapper; import org.elasticsearch.index.mapper.LuceneDocument; import org.elasticsearch.index.mapper.Mapper; @@ -69,6 +68,7 @@ import org.elasticsearch.index.mapper.MappingLookup; import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.Uid; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper; import org.elasticsearch.index.merge.MergeStats; import org.elasticsearch.index.seqno.SeqNoStats; @@ -242,19 +242,32 @@ protected final DocsStats docsStats(IndexReader indexReader) { /** * Returns the {@link DenseVectorStats} for this engine */ - public DenseVectorStats denseVectorStats() { + public DenseVectorStats denseVectorStats(MappingLookup mappingLookup) { + if (mappingLookup == null) { + return new DenseVectorStats(0); + } + + List fields = new ArrayList<>(); + for (Mapper mapper : mappingLookup.fieldMappers()) { + if (mapper instanceof DenseVectorFieldMapper) { + fields.add(mapper.fullPath()); + } + } + if (fields.isEmpty()) { + return new DenseVectorStats(0); + } try (Searcher searcher = acquireSearcher(DOC_STATS_SOURCE, SearcherScope.INTERNAL)) { - return denseVectorStats(searcher.getIndexReader()); + return denseVectorStats(searcher.getIndexReader(), fields); } } - protected final DenseVectorStats denseVectorStats(IndexReader indexReader) { + protected final DenseVectorStats denseVectorStats(IndexReader indexReader, List fields) { long valueCount = 0; // we don't wait for a pending refreshes here since it's a stats call instead we mark it as accessed only which will cause // the next scheduled refresh to go through and refresh the stats as well for (LeafReaderContext readerContext : indexReader.leaves()) { try { - valueCount += getDenseVectorValueCount(readerContext.reader()); + valueCount += getDenseVectorValueCount(readerContext.reader(), fields); } catch (IOException e) { logger.trace(() -> "failed to get dense vector stats for [" + readerContext + "]", e); } @@ -262,9 +275,10 @@ protected final DenseVectorStats denseVectorStats(IndexReader indexReader) { return new DenseVectorStats(valueCount); } - private long getDenseVectorValueCount(final LeafReader atomicReader) throws IOException { + private long getDenseVectorValueCount(final LeafReader atomicReader, List fields) throws IOException { long count = 0; - for (FieldInfo info : atomicReader.getFieldInfos()) { + for (var field : fields) { + var info = atomicReader.getFieldInfos().fieldInfo(field); if (info.getVectorDimension() > 0) { switch (info.getVectorEncoding()) { case FLOAT32 -> { @@ -285,23 +299,31 @@ private long getDenseVectorValueCount(final LeafReader atomicReader) throws IOEx * Returns the {@link SparseVectorStats} for this engine */ public SparseVectorStats sparseVectorStats(MappingLookup mappingLookup) { + if (mappingLookup == null) { + return new SparseVectorStats(0); + } + List fields = new ArrayList<>(); + for (Mapper mapper : mappingLookup.fieldMappers()) { + if (mapper instanceof SparseVectorFieldMapper) { + fields.add(new BytesRef(mapper.fullPath())); + } + } + if (fields.isEmpty()) { + return new SparseVectorStats(0); + } + Collections.sort(fields); try (Searcher searcher = acquireSearcher(DOC_STATS_SOURCE, SearcherScope.INTERNAL)) { - return sparseVectorStats(searcher.getIndexReader(), mappingLookup); + return sparseVectorStats(searcher.getIndexReader(), fields); } } - protected final SparseVectorStats sparseVectorStats(IndexReader indexReader, MappingLookup mappingLookup) { + protected final SparseVectorStats sparseVectorStats(IndexReader indexReader, List fields) { long valueCount = 0; - - if (mappingLookup == null) { - return new SparseVectorStats(valueCount); - } - // we don't wait for a pending refreshes here since it's a stats call instead we mark it as accessed only which will cause // the next scheduled refresh to go through and refresh the stats as well for (LeafReaderContext readerContext : indexReader.leaves()) { try { - valueCount += getSparseVectorValueCount(readerContext.reader(), mappingLookup); + valueCount += getSparseVectorValueCount(readerContext.reader(), fields); } catch (IOException e) { logger.trace(() -> "failed to get sparse vector stats for [" + readerContext + "]", e); } @@ -309,28 +331,16 @@ protected final SparseVectorStats sparseVectorStats(IndexReader indexReader, Map return new SparseVectorStats(valueCount); } - private long getSparseVectorValueCount(final LeafReader atomicReader, MappingLookup mappingLookup) throws IOException { + private long getSparseVectorValueCount(final LeafReader atomicReader, List fields) throws IOException { long count = 0; - - Map mappers = new HashMap<>(); - for (Mapper mapper : mappingLookup.fieldMappers()) { - if (mapper instanceof FieldMapper fieldMapper) { - if (fieldMapper.fieldType() instanceof SparseVectorFieldMapper.SparseVectorFieldType) { - mappers.put(fieldMapper.fullPath(), fieldMapper); - } - } - } - - for (FieldInfo info : atomicReader.getFieldInfos()) { - String name = info.name; - if (mappers.containsKey(name)) { - Terms terms = atomicReader.terms(FieldNamesFieldMapper.NAME); - if (terms != null) { - TermsEnum termsEnum = terms.iterator(); - if (termsEnum.seekExact(new BytesRef(name))) { - count += termsEnum.docFreq(); - } - } + Terms terms = atomicReader.terms(FieldNamesFieldMapper.NAME); + if (terms == null) { + return count; + } + TermsEnum termsEnum = terms.iterator(); + for (var fieldName : fields) { + if (termsEnum.seekExact(fieldName)) { + count += termsEnum.docFreq(); } } return count; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java index 67e457907f8cc..908108bce31da 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java @@ -69,12 +69,12 @@ private enum Mode { IndexMode.TIME_SERIES ); - private static final SourceFieldMapper LOGS_DEFAULT = new SourceFieldMapper( + private static final SourceFieldMapper LOGSDB_DEFAULT = new SourceFieldMapper( Mode.SYNTHETIC, Explicit.IMPLICIT_TRUE, Strings.EMPTY_ARRAY, Strings.EMPTY_ARRAY, - IndexMode.LOGS + IndexMode.LOGSDB ); /* @@ -184,7 +184,7 @@ public SourceFieldMapper build() { if (isDefault()) { return switch (indexMode) { case TIME_SERIES -> TSDB_DEFAULT; - case LOGS -> LOGS_DEFAULT; + case LOGSDB -> LOGSDB_DEFAULT; default -> DEFAULT; }; } @@ -234,8 +234,8 @@ public SourceFieldMapper build() { } else { return TSDB_LEGACY_DEFAULT; } - } else if (indexMode == IndexMode.LOGS) { - return LOGS_DEFAULT; + } else if (indexMode == IndexMode.LOGSDB) { + return LOGSDB_DEFAULT; } } return DEFAULT; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 989c92e909ce2..1ba80b203d9df 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -98,7 +98,7 @@ public class DenseVectorFieldMapper extends FieldMapper { public static final String COSINE_MAGNITUDE_FIELD_SUFFIX = "._magnitude"; private static final float EPS = 1e-3f; - static boolean isNotUnitVector(float magnitude) { + public static boolean isNotUnitVector(float magnitude) { return Math.abs(magnitude - 1.0f) > EPS; } @@ -934,8 +934,8 @@ int parseDimensionCount(DocumentParserContext context) throws IOException { } @Override - public void checkDimensions(int dvDims, int qvDims) { - if (dvDims != qvDims * Byte.SIZE) { + public void checkDimensions(Integer dvDims, int qvDims) { + if (dvDims != null && dvDims != qvDims * Byte.SIZE) { throw new IllegalArgumentException( "The query vector has a different number of dimensions [" + qvDims * Byte.SIZE @@ -969,8 +969,8 @@ abstract void checkVectorMagnitude( float squaredMagnitude ); - public void checkDimensions(int dvDims, int qvDims) { - if (dvDims != qvDims) { + public void checkDimensions(Integer dvDims, int qvDims) { + if (dvDims != null && dvDims != qvDims) { throw new IllegalArgumentException( "The query vector has a different number of dimensions [" + qvDims + "] than the document vectors [" + dvDims + "]." ); @@ -1711,17 +1711,21 @@ public Query termQuery(Object value, SearchExecutionContext context) { throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support term queries"); } - public Query createExactKnnQuery(VectorData queryVector) { + public Query createExactKnnQuery(VectorData queryVector, Float vectorSimilarity) { if (isIndexed() == false) { throw new IllegalArgumentException( "to perform knn search on field [" + name() + "], its mapping must have [index] set to [true]" ); } - return switch (elementType) { + Query knnQuery = switch (elementType) { case BYTE -> createExactKnnByteQuery(queryVector.asByteVector()); case FLOAT -> createExactKnnFloatQuery(queryVector.asFloatVector()); case BIT -> createExactKnnBitQuery(queryVector.asByteVector()); }; + if (vectorSimilarity != null) { + knnQuery = new VectorSimilarityQuery(knnQuery, vectorSimilarity, similarity.score(vectorSimilarity, elementType, dims)); + } + return knnQuery; } private Query createExactKnnBitQuery(byte[] queryVector) { diff --git a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java index 881f4602be1c7..b15d3e08a0bb8 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -1425,7 +1425,8 @@ public CompletionStats completionStats(String... fields) { public DenseVectorStats denseVectorStats() { readAllowed(); - return getEngine().denseVectorStats(); + MappingLookup mappingLookup = mapperService != null ? mapperService.mappingLookup() : null; + return getEngine().denseVectorStats(mappingLookup); } public SparseVectorStats sparseVectorStats() { diff --git a/server/src/main/java/org/elasticsearch/inference/ModelConfigurations.java b/server/src/main/java/org/elasticsearch/inference/ModelConfigurations.java index 4b6f436460fdc..0df0378c4a5f4 100644 --- a/server/src/main/java/org/elasticsearch/inference/ModelConfigurations.java +++ b/server/src/main/java/org/elasticsearch/inference/ModelConfigurations.java @@ -20,7 +20,11 @@ public class ModelConfigurations implements ToFilteredXContentObject, VersionedNamedWriteable { - public static final String MODEL_ID = "model_id"; + // Due to refactoring, we now have different field names for the inference ID when it is serialized and stored to an index vs when it + // is returned as part of a GetInferenceModelAction + public static final String INDEX_ONLY_ID_FIELD_NAME = "model_id"; + public static final String INFERENCE_ID_FIELD_NAME = "inference_id"; + public static final String USE_ID_FOR_INDEX = "for_index"; public static final String SERVICE = "service"; public static final String SERVICE_SETTINGS = "service_settings"; public static final String TASK_SETTINGS = "task_settings"; @@ -119,7 +123,11 @@ public TaskSettings getTaskSettings() { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); - builder.field(MODEL_ID, inferenceEntityId); + if (params.paramAsBoolean(USE_ID_FOR_INDEX, false)) { + builder.field(INDEX_ONLY_ID_FIELD_NAME, inferenceEntityId); + } else { + builder.field(INFERENCE_ID_FIELD_NAME, inferenceEntityId); + } builder.field(TaskType.NAME, taskType.toString()); builder.field(SERVICE, service); builder.field(SERVICE_SETTINGS, serviceSettings); @@ -131,7 +139,11 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws @Override public XContentBuilder toFilteredXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); - builder.field(MODEL_ID, inferenceEntityId); + if (params.paramAsBoolean(USE_ID_FOR_INDEX, false)) { + builder.field(INDEX_ONLY_ID_FIELD_NAME, inferenceEntityId); + } else { + builder.field(INFERENCE_ID_FIELD_NAME, inferenceEntityId); + } builder.field(TaskType.NAME, taskType.toString()); builder.field(SERVICE, service); builder.field(SERVICE_SETTINGS, serviceSettings.getFilteredXContentObject()); diff --git a/server/src/main/java/org/elasticsearch/ingest/EnterpriseGeoIpTask.java b/server/src/main/java/org/elasticsearch/ingest/EnterpriseGeoIpTask.java new file mode 100644 index 0000000000000..a55553c8f0fd7 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/ingest/EnterpriseGeoIpTask.java @@ -0,0 +1,86 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.TransportVersions; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.features.NodeFeature; +import org.elasticsearch.persistent.PersistentTaskParams; +import org.elasticsearch.xcontent.ObjectParser; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; + +/** + * As a relatively minor hack, this class holds the string constant that defines both the id + * and the name of the task for the new ip geolocation database downloader feature. It also provides the + * PersistentTaskParams that are necessary to start the task and to run it. + *

+ * Defining this in Elasticsearch itself gives us a reasonably tidy version of things where we don't + * end up with strange inter-module dependencies. It's not ideal, but it works fine. + */ +public final class EnterpriseGeoIpTask { + + private EnterpriseGeoIpTask() { + // utility class + } + + public static final String ENTERPRISE_GEOIP_DOWNLOADER = "enterprise-geoip-downloader"; + public static final NodeFeature GEOIP_DOWNLOADER_DATABASE_CONFIGURATION = new NodeFeature("geoip.downloader.database.configuration"); + + public static class EnterpriseGeoIpTaskParams implements PersistentTaskParams { + + public static final ObjectParser PARSER = new ObjectParser<>( + ENTERPRISE_GEOIP_DOWNLOADER, + true, + EnterpriseGeoIpTaskParams::new + ); + + public EnterpriseGeoIpTaskParams() {} + + public EnterpriseGeoIpTaskParams(StreamInput in) {} + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.endObject(); + return builder; + } + + @Override + public String getWriteableName() { + return ENTERPRISE_GEOIP_DOWNLOADER; + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersions.ENTERPRISE_GEOIP_DOWNLOADER_BACKPORT_8_15; + } + + @Override + public void writeTo(StreamOutput out) {} + + public static EnterpriseGeoIpTaskParams fromXContent(XContentParser parser) { + return PARSER.apply(parser, null); + } + + @Override + public int hashCode() { + return 0; + } + + @Override + public boolean equals(Object obj) { + return obj instanceof EnterpriseGeoIpTaskParams; + } + } +} diff --git a/server/src/main/java/org/elasticsearch/ingest/IngestGeoIpFeatures.java b/server/src/main/java/org/elasticsearch/ingest/IngestGeoIpFeatures.java new file mode 100644 index 0000000000000..0d989ad9f7ab2 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/ingest/IngestGeoIpFeatures.java @@ -0,0 +1,22 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest; + +import org.elasticsearch.features.FeatureSpecification; +import org.elasticsearch.features.NodeFeature; + +import java.util.Set; + +import static org.elasticsearch.ingest.EnterpriseGeoIpTask.GEOIP_DOWNLOADER_DATABASE_CONFIGURATION; + +public class IngestGeoIpFeatures implements FeatureSpecification { + public Set getFeatures() { + return Set.of(GEOIP_DOWNLOADER_DATABASE_CONFIGURATION); + } +} diff --git a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java index aa0f9b8552d22..83e23d9a60b58 100644 --- a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java +++ b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java @@ -40,6 +40,7 @@ import org.elasticsearch.cluster.coordination.Coordinator; import org.elasticsearch.cluster.coordination.MasterHistoryService; import org.elasticsearch.cluster.coordination.StableMasterHealthIndicatorService; +import org.elasticsearch.cluster.features.NodeFeaturesFixupListener; import org.elasticsearch.cluster.metadata.DataStreamFactoryRetention; import org.elasticsearch.cluster.metadata.DataStreamGlobalRetentionResolver; import org.elasticsearch.cluster.metadata.IndexMetadataVerifier; @@ -752,6 +753,7 @@ private void construct( clusterService.addListener( new TransportVersionsFixupListener(clusterService, client.admin().cluster(), featureService, threadPool) ); + clusterService.addListener(new NodeFeaturesFixupListener(clusterService, client.admin().cluster(), threadPool)); } SourceFieldMetrics sourceFieldMetrics = new SourceFieldMetrics( diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/indices/CreateIndexCapabilities.java b/server/src/main/java/org/elasticsearch/rest/action/admin/indices/CreateIndexCapabilities.java index 700baac09865e..218348325e0a4 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/admin/indices/CreateIndexCapabilities.java +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/indices/CreateIndexCapabilities.java @@ -18,7 +18,7 @@ public class CreateIndexCapabilities { /** * Support for using the 'logs' index mode. */ - private static final String LOGS_INDEX_MODE_CAPABILITY = "logs_index_mode"; + private static final String LOGSDB_INDEX_MODE_CAPABILITY = "logsdb_index_mode"; - public static Set CAPABILITIES = Set.of(LOGS_INDEX_MODE_CAPABILITY); + public static Set CAPABILITIES = Set.of(LOGSDB_INDEX_MODE_CAPABILITY); } diff --git a/server/src/main/java/org/elasticsearch/script/field/vectors/ByteBinaryDenseVector.java b/server/src/main/java/org/elasticsearch/script/field/vectors/ByteBinaryDenseVector.java index f2ff8fbccd2fb..e5c2d6a370f12 100644 --- a/server/src/main/java/org/elasticsearch/script/field/vectors/ByteBinaryDenseVector.java +++ b/server/src/main/java/org/elasticsearch/script/field/vectors/ByteBinaryDenseVector.java @@ -102,7 +102,7 @@ public double l1Norm(List queryVector) { @Override public int hamming(byte[] queryVector) { - return VectorUtil.xorBitCount(queryVector, vectorValue); + return ESVectorUtil.xorBitCount(queryVector, vectorValue); } @Override diff --git a/server/src/main/java/org/elasticsearch/script/field/vectors/ByteKnnDenseVector.java b/server/src/main/java/org/elasticsearch/script/field/vectors/ByteKnnDenseVector.java index e0ba032826aa1..0145eb3eae04b 100644 --- a/server/src/main/java/org/elasticsearch/script/field/vectors/ByteKnnDenseVector.java +++ b/server/src/main/java/org/elasticsearch/script/field/vectors/ByteKnnDenseVector.java @@ -103,7 +103,7 @@ public double l1Norm(List queryVector) { @Override public int hamming(byte[] queryVector) { - return VectorUtil.xorBitCount(queryVector, docVector); + return ESVectorUtil.xorBitCount(queryVector, docVector); } @Override diff --git a/server/src/main/java/org/elasticsearch/script/field/vectors/ESVectorUtil.java b/server/src/main/java/org/elasticsearch/script/field/vectors/ESVectorUtil.java new file mode 100644 index 0000000000000..7d9542bccf357 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/script/field/vectors/ESVectorUtil.java @@ -0,0 +1,72 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.script.field.vectors; + +import org.apache.lucene.util.BitUtil; +import org.apache.lucene.util.Constants; + +/** + * This class consists of a single utility method that provides XOR bit count computed over signed bytes. + * Remove this class when Lucene version > 9.11 is released, and replace with Lucene's VectorUtil directly. + */ +public class ESVectorUtil { + + /** + * For xorBitCount we stride over the values as either 64-bits (long) or 32-bits (int) at a time. + * On ARM Long::bitCount is not vectorized, and therefore produces less than optimal code, when + * compared to Integer::bitCount. While Long::bitCount is optimal on x64. + */ + static final boolean XOR_BIT_COUNT_STRIDE_AS_INT = Constants.OS_ARCH.equals("aarch64"); + + /** + * XOR bit count computed over signed bytes. + * + * @param a bytes containing a vector + * @param b bytes containing another vector, of the same dimension + * @return the value of the XOR bit count of the two vectors + */ + public static int xorBitCount(byte[] a, byte[] b) { + if (a.length != b.length) { + throw new IllegalArgumentException("vector dimensions differ: " + a.length + "!=" + b.length); + } + if (XOR_BIT_COUNT_STRIDE_AS_INT) { + return xorBitCountInt(a, b); + } else { + return xorBitCountLong(a, b); + } + } + + /** XOR bit count striding over 4 bytes at a time. */ + static int xorBitCountInt(byte[] a, byte[] b) { + int distance = 0, i = 0; + for (final int upperBound = a.length & -Integer.BYTES; i < upperBound; i += Integer.BYTES) { + distance += Integer.bitCount((int) BitUtil.VH_NATIVE_INT.get(a, i) ^ (int) BitUtil.VH_NATIVE_INT.get(b, i)); + } + // tail: + for (; i < a.length; i++) { + distance += Integer.bitCount((a[i] ^ b[i]) & 0xFF); + } + return distance; + } + + /** XOR bit count striding over 8 bytes at a time. */ + static int xorBitCountLong(byte[] a, byte[] b) { + int distance = 0, i = 0; + for (final int upperBound = a.length & -Long.BYTES; i < upperBound; i += Long.BYTES) { + distance += Long.bitCount((long) BitUtil.VH_NATIVE_LONG.get(a, i) ^ (long) BitUtil.VH_NATIVE_LONG.get(b, i)); + } + // tail: + for (; i < a.length; i++) { + distance += Integer.bitCount((a[i] ^ b[i]) & 0xFF); + } + return distance; + } + + private ESVectorUtil() {} +} diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/InternalAggregations.java b/server/src/main/java/org/elasticsearch/search/aggregations/InternalAggregations.java index 297bb81b27b25..4f234c33b13a6 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/InternalAggregations.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/InternalAggregations.java @@ -226,34 +226,11 @@ public static InternalAggregations topLevelReduce(List agg } if (context.isFinalReduce()) { List reducedInternalAggs = reduced.getInternalAggregations(); - List internalAggregations = null; - for (int i = 0; i < reducedInternalAggs.size(); i++) { - InternalAggregation agg = reducedInternalAggs.get(i); - InternalAggregation internalAggregation = agg.reducePipelines( - agg, - context, - context.pipelineTreeRoot().subTree(agg.getName()) - ); - if (internalAggregation.equals(agg) == false) { - if (internalAggregations == null) { - internalAggregations = new ArrayList<>(reducedInternalAggs); - } - internalAggregations.set(i, internalAggregation); - } - } + reducedInternalAggs = reducedInternalAggs.stream() + .map(agg -> agg.reducePipelines(agg, context, context.pipelineTreeRoot().subTree(agg.getName()))) + .collect(Collectors.toCollection(ArrayList::new)); - var pipelineAggregators = context.pipelineTreeRoot().aggregators(); - if (pipelineAggregators.isEmpty()) { - if (internalAggregations == null) { - return reduced; - } - return from(internalAggregations); - } - if (internalAggregations != null) { - reducedInternalAggs = internalAggregations; - } - reducedInternalAggs = new ArrayList<>(reducedInternalAggs); - for (PipelineAggregator pipelineAggregator : pipelineAggregators) { + for (PipelineAggregator pipelineAggregator : context.pipelineTreeRoot().aggregators()) { SiblingPipelineAggregator sib = (SiblingPipelineAggregator) pipelineAggregator; InternalAggregation newAgg = sib.doReduce(from(reducedInternalAggs), context); reducedInternalAggs.add(newAgg); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/InternalMultiBucketAggregation.java b/server/src/main/java/org/elasticsearch/search/aggregations/InternalMultiBucketAggregation.java index e046b5fc9244c..de19c26daff92 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/InternalMultiBucketAggregation.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/InternalMultiBucketAggregation.java @@ -207,31 +207,16 @@ public void forEachBucket(Consumer consumer) { } private List reducePipelineBuckets(AggregationReduceContext reduceContext, PipelineTree pipelineTree) { - List reducedBuckets = null; - var buckets = getBuckets(); - for (int bucketIndex = 0; bucketIndex < buckets.size(); bucketIndex++) { - B bucket = buckets.get(bucketIndex); - List aggs = null; - int aggIndex = 0; - for (InternalAggregation agg : bucket.getAggregations()) { + List reducedBuckets = new ArrayList<>(); + for (B bucket : getBuckets()) { + List aggs = new ArrayList<>(); + for (Aggregation agg : bucket.getAggregations()) { PipelineTree subTree = pipelineTree.subTree(agg.getName()); - var reduced = agg.reducePipelines(agg, reduceContext, subTree); - if (reduced.equals(agg) == false) { - if (aggs == null) { - aggs = bucket.getAggregations().copyResults(); - } - aggs.set(aggIndex, reduced); - } - aggIndex++; - } - if (aggs != null) { - if (reducedBuckets == null) { - reducedBuckets = new ArrayList<>(buckets); - } - reducedBuckets.set(bucketIndex, createBucket(InternalAggregations.from(aggs), bucket)); + aggs.add(((InternalAggregation) agg).reducePipelines((InternalAggregation) agg, reduceContext, subTree)); } + reducedBuckets.add(createBucket(InternalAggregations.from(aggs), bucket)); } - return reducedBuckets == null ? buckets : reducedBuckets; + return reducedBuckets; } public abstract static class InternalBucket implements Bucket, Writeable { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AbstractHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AbstractHistogramAggregator.java index 04028de5656ca..62b7a0747ca00 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AbstractHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AbstractHistogramAggregator.java @@ -84,9 +84,6 @@ public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws I double key = roundKey * interval + offset; return new InternalHistogram.Bucket(key, docCount, keyed, formatter, subAggregationResults); }, (owningBucketOrd, buckets) -> { - if (buckets.isEmpty()) { - return buildEmptyAggregation(); - } // the contract of the histogram aggregation is that shards must return buckets ordered by key in ascending order CollectionUtil.introSort(buckets, BucketOrder.key(true).comparator()); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index cb01aa5a31a9a..2c57bd4b38a04 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -340,9 +340,6 @@ public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws I return buildAggregationsForVariableBuckets(owningBucketOrds, bucketOrds, (bucketValue, docCount, subAggregationResults) -> { return new InternalDateHistogram.Bucket(bucketValue, docCount, keyed, formatter, subAggregationResults); }, (owningBucketOrd, buckets) -> { - if (buckets.isEmpty()) { - return buildEmptyAggregation(); - } // the contract of the histogram aggregation is that shards must return buckets ordered by key in ascending order CollectionUtil.introSort(buckets, BucketOrder.key(true).comparator()); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalHistogram.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalHistogram.java index 2404de76fdd35..e27a09802d6c4 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalHistogram.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/InternalHistogram.java @@ -259,17 +259,11 @@ BucketOrder getOrder() { @Override public InternalHistogram create(List buckets) { - if (this.buckets.equals(buckets)) { - return this; - } return new InternalHistogram(name, buckets, order, minDocCount, emptyBucketInfo, format, keyed, metadata); } @Override public Bucket createBucket(InternalAggregations aggregations, Bucket prototype) { - if (prototype.aggregations.equals(aggregations)) { - return prototype; - } return new Bucket(prototype.key, prototype.docCount, prototype.keyed, prototype.format, aggregations); } @@ -456,9 +450,6 @@ public InternalAggregation get() { CollectionUtil.introSort(reducedBuckets, order.comparator()); } } - if (reducedBuckets.equals(buckets)) { - return InternalHistogram.this; - } return new InternalHistogram(getName(), reducedBuckets, order, minDocCount, emptyBucketInfo, format, keyed, getMetadata()); } }; @@ -504,9 +495,14 @@ public Number getKey(MultiBucketsAggregation.Bucket bucket) { } @Override - @SuppressWarnings({ "rawtypes", "unchecked" }) public InternalAggregation createAggregation(List buckets) { - return new InternalHistogram(name, (List) buckets, order, minDocCount, emptyBucketInfo, format, keyed, getMetadata()); + // convert buckets to the right type + List buckets2 = new ArrayList<>(buckets.size()); + for (Object b : buckets) { + buckets2.add((Bucket) b); + } + buckets2 = Collections.unmodifiableList(buckets2); + return new InternalHistogram(name, buckets2, order, minDocCount, emptyBucketInfo, format, keyed, getMetadata()); } @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java index 9cea884667325..936fcf2edc225 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java @@ -9,7 +9,10 @@ import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.PriorityQueue; @@ -419,25 +422,66 @@ void collectZeroDocEntriesIfNeeded(long owningBucketOrd, boolean excludeDeletedD } // we need to fill-in the blanks for (LeafReaderContext ctx : searcher().getTopReaderContext().leaves()) { - SortedBinaryDocValues values = valuesSource.bytesValues(ctx); - // brute force - for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) { - if (excludeDeletedDocs && ctx.reader().getLiveDocs() != null && ctx.reader().getLiveDocs().get(docId) == false) { - continue; + final Bits liveDocs = excludeDeletedDocs ? ctx.reader().getLiveDocs() : null; + if (liveDocs == null && valuesSource.hasOrdinals()) { + final SortedSetDocValues values = ((ValuesSource.Bytes.WithOrdinals) valuesSource).ordinalsValues(ctx); + collectZeroDocEntries(values, owningBucketOrd); + } else { + final SortedBinaryDocValues values = valuesSource.bytesValues(ctx); + final BinaryDocValues singleton = FieldData.unwrapSingleton(values); + if (singleton != null) { + collectZeroDocEntries(singleton, liveDocs, ctx.reader().maxDoc(), owningBucketOrd); + } else { + collectZeroDocEntries(values, liveDocs, ctx.reader().maxDoc(), owningBucketOrd); } - if (values.advanceExact(docId)) { - int valueCount = values.docValueCount(); - for (int i = 0; i < valueCount; ++i) { - BytesRef term = values.nextValue(); - if (includeExclude == null || includeExclude.accept(term)) { - bucketOrds.add(owningBucketOrd, term); - } + } + } + } + + private void collectZeroDocEntries(SortedSetDocValues values, long owningBucketOrd) throws IOException { + final TermsEnum termsEnum = values.termsEnum(); + BytesRef term; + while ((term = termsEnum.next()) != null) { + if (includeExclude == null || includeExclude.accept(term)) { + bucketOrds.add(owningBucketOrd, term); + } + } + } + + private void collectZeroDocEntries(SortedBinaryDocValues values, Bits liveDocs, int maxDoc, long owningBucketOrd) + throws IOException { + // brute force + for (int docId = 0; docId < maxDoc; ++docId) { + if (liveDocs != null && liveDocs.get(docId) == false) { + continue; + } + if (values.advanceExact(docId)) { + final int valueCount = values.docValueCount(); + for (int i = 0; i < valueCount; ++i) { + final BytesRef term = values.nextValue(); + if (includeExclude == null || includeExclude.accept(term)) { + bucketOrds.add(owningBucketOrd, term); } } } } } + private void collectZeroDocEntries(BinaryDocValues values, Bits liveDocs, int maxDoc, long owningBucketOrd) throws IOException { + // brute force + for (int docId = 0; docId < maxDoc; ++docId) { + if (liveDocs != null && liveDocs.get(docId) == false) { + continue; + } + if (values.advanceExact(docId)) { + final BytesRef term = values.binaryValue(); + if (includeExclude == null || includeExclude.accept(term)) { + bucketOrds.add(owningBucketOrd, term); + } + } + } + } + @Override Supplier emptyBucketBuilder(long owningBucketOrd) { return () -> new StringTerms.Bucket(new BytesRef(), 0, null, showTermDocCountError, 0, format); diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java index e7fa0e67cb453..3d180dd094b18 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java @@ -218,6 +218,9 @@ private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, Analyzer an // Can't split on term boundaries without offsets return -1; } + if (contents.length() <= noMatchSize) { + return contents.length(); + } int end = -1; tokenStream.reset(); while (tokenStream.incrementToken()) { diff --git a/server/src/main/java/org/elasticsearch/search/vectors/ExactKnnQueryBuilder.java b/server/src/main/java/org/elasticsearch/search/vectors/ExactKnnQueryBuilder.java index 1f05b215699b1..7e23075b1932e 100644 --- a/server/src/main/java/org/elasticsearch/search/vectors/ExactKnnQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/vectors/ExactKnnQueryBuilder.java @@ -32,6 +32,7 @@ public class ExactKnnQueryBuilder extends AbstractQueryBuilder rewrittenQueries = new ArrayList<>(filterQueries.size()); diff --git a/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification b/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification index a9d9c6a5a1938..054eb7964e9ef 100644 --- a/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification +++ b/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification @@ -14,6 +14,7 @@ org.elasticsearch.rest.RestFeatures org.elasticsearch.indices.IndicesFeatures org.elasticsearch.action.admin.cluster.allocation.AllocationStatsFeatures org.elasticsearch.index.mapper.MapperFeatures +org.elasticsearch.ingest.IngestGeoIpFeatures org.elasticsearch.search.SearchFeatures org.elasticsearch.search.retriever.RetrieversFeatures org.elasticsearch.script.ScriptFeatures diff --git a/server/src/main/resources/org/elasticsearch/TransportVersions.csv b/server/src/main/resources/org/elasticsearch/TransportVersions.csv index ba1dab5589ee2..8cd6fe9720039 100644 --- a/server/src/main/resources/org/elasticsearch/TransportVersions.csv +++ b/server/src/main/resources/org/elasticsearch/TransportVersions.csv @@ -70,6 +70,7 @@ 7.17.20,7172099 7.17.21,7172199 7.17.22,7172299 +7.17.23,7172399 8.0.0,8000099 8.0.1,8000199 8.1.0,8010099 @@ -123,3 +124,6 @@ 8.13.4,8595001 8.14.0,8636001 8.14.1,8636001 +8.14.2,8636001 +8.14.3,8636001 +8.15.0,8702002 diff --git a/server/src/main/resources/org/elasticsearch/index/IndexVersions.csv b/server/src/main/resources/org/elasticsearch/index/IndexVersions.csv index b7ca55a2b2b0d..df0df2d05ba5b 100644 --- a/server/src/main/resources/org/elasticsearch/index/IndexVersions.csv +++ b/server/src/main/resources/org/elasticsearch/index/IndexVersions.csv @@ -70,6 +70,7 @@ 7.17.20,7172099 7.17.21,7172199 7.17.22,7172299 +7.17.23,7172399 8.0.0,8000099 8.0.1,8000199 8.1.0,8010099 @@ -123,3 +124,6 @@ 8.13.4,8503000 8.14.0,8505000 8.14.1,8505000 +8.14.2,8505000 +8.14.3,8505000 +8.15.0,8512000 diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/close/CloseIndexRequestTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/close/CloseIndexRequestTests.java index b3caf93fbcddf..24c0f9d97800b 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/close/CloseIndexRequestTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/close/CloseIndexRequestTests.java @@ -49,6 +49,9 @@ public void testBwcSerialization() throws Exception { in.setTransportVersion(out.getTransportVersion()); assertEquals(request.getParentTask(), TaskId.readFromStream(in)); assertEquals(request.masterNodeTimeout(), in.readTimeValue()); + if (in.getTransportVersion().onOrAfter(TransportVersions.VERSIONED_MASTER_NODE_REQUESTS)) { + assertEquals(request.masterTerm(), in.readVLong()); + } assertEquals(request.ackTimeout(), in.readTimeValue()); assertArrayEquals(request.indices(), in.readStringArray()); final IndicesOptions indicesOptions = IndicesOptions.readIndicesOptions(in); @@ -75,6 +78,9 @@ public void testBwcSerialization() throws Exception { out.setTransportVersion(version); sample.getParentTask().writeTo(out); out.writeTimeValue(sample.masterNodeTimeout()); + if (out.getTransportVersion().onOrAfter(TransportVersions.VERSIONED_MASTER_NODE_REQUESTS)) { + out.writeVLong(sample.masterTerm()); + } out.writeTimeValue(sample.ackTimeout()); out.writeStringArray(sample.indices()); sample.indicesOptions().writeIndicesOptions(out); diff --git a/server/src/test/java/org/elasticsearch/action/search/DfsQueryPhaseTests.java b/server/src/test/java/org/elasticsearch/action/search/DfsQueryPhaseTests.java index e9ff8336ef4c9..511a7c90dbedc 100644 --- a/server/src/test/java/org/elasticsearch/action/search/DfsQueryPhaseTests.java +++ b/server/src/test/java/org/elasticsearch/action/search/DfsQueryPhaseTests.java @@ -35,6 +35,7 @@ import org.elasticsearch.search.rank.TestRankBuilder; import org.elasticsearch.search.vectors.KnnScoreDocQueryBuilder; import org.elasticsearch.search.vectors.KnnSearchBuilder; +import org.elasticsearch.search.vectors.VectorData; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.InternalAggregationTestCase; import org.elasticsearch.transport.Transport; @@ -351,12 +352,14 @@ public void testRewriteShardSearchRequestWithRank() { KnnScoreDocQueryBuilder ksdqb0 = new KnnScoreDocQueryBuilder( new ScoreDoc[] { new ScoreDoc(1, 3.0f, 1), new ScoreDoc(4, 1.5f, 1) }, "vector", - new float[] { 0.0f } + VectorData.fromFloats(new float[] { 0.0f }), + null ); KnnScoreDocQueryBuilder ksdqb1 = new KnnScoreDocQueryBuilder( new ScoreDoc[] { new ScoreDoc(1, 2.0f, 1) }, "vector2", - new float[] { 0.0f } + VectorData.fromFloats(new float[] { 0.0f }), + null ); assertEquals( List.of(bm25, ksdqb0, ksdqb1), diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java index b19cce96a2208..b57badb3a180f 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java @@ -2047,7 +2047,15 @@ public void testElectionSchedulingAfterDiscoveryOutage() { + 4 * DEFAULT_DELAY_VARIABILITY // Then a commit of the new leader's first cluster state + DEFAULT_CLUSTER_STATE_UPDATE_DELAY - // Then the remaining node may join + // Then the remaining node may experience a disconnect (see comment in PeerFinder#closePeers) for which it is + // removed from the cluster, and its fault detection must also detect its removal + + Math.max( + DEFAULT_CLUSTER_STATE_UPDATE_DELAY, + defaultMillis(LEADER_CHECK_TIMEOUT_SETTING) * LEADER_CHECK_RETRY_COUNT_SETTING.get(Settings.EMPTY) + ) + // then it does another round of discovery + + defaultMillis(DISCOVERY_FIND_PEERS_INTERVAL_SETTING) + // and finally it joins the master + DEFAULT_CLUSTER_STATE_UPDATE_DELAY ); diff --git a/server/src/test/java/org/elasticsearch/cluster/features/NodeFeaturesFixupListenerTests.java b/server/src/test/java/org/elasticsearch/cluster/features/NodeFeaturesFixupListenerTests.java new file mode 100644 index 0000000000000..30d4c85e8fb67 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/cluster/features/NodeFeaturesFixupListenerTests.java @@ -0,0 +1,245 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.cluster.features; + +import org.elasticsearch.Version; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.admin.cluster.node.features.NodeFeatures; +import org.elasticsearch.action.admin.cluster.node.features.NodesFeaturesRequest; +import org.elasticsearch.action.admin.cluster.node.features.NodesFeaturesResponse; +import org.elasticsearch.action.admin.cluster.node.features.TransportNodesFeaturesAction; +import org.elasticsearch.client.internal.ClusterAdminClient; +import org.elasticsearch.cluster.ClusterChangedEvent; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.features.NodeFeaturesFixupListener.NodesFeaturesTask; +import org.elasticsearch.cluster.features.NodeFeaturesFixupListener.NodesFeaturesUpdater; +import org.elasticsearch.cluster.node.DiscoveryNodeUtils; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.node.VersionInformation; +import org.elasticsearch.cluster.service.ClusterStateTaskExecutorUtils; +import org.elasticsearch.cluster.service.MasterServiceTaskQueue; +import org.elasticsearch.common.transport.TransportAddress; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.threadpool.Scheduler; +import org.mockito.ArgumentCaptor; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.Executor; + +import static org.elasticsearch.test.LambdaMatchers.transformedMatch; +import static org.hamcrest.Matchers.arrayContainingInAnyOrder; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.ArgumentMatchers.same; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; +import static org.mockito.hamcrest.MockitoHamcrest.argThat; + +public class NodeFeaturesFixupListenerTests extends ESTestCase { + + @SuppressWarnings("unchecked") + private static MasterServiceTaskQueue newMockTaskQueue() { + return mock(MasterServiceTaskQueue.class); + } + + private static DiscoveryNodes nodes(Version... versions) { + var builder = DiscoveryNodes.builder(); + for (int i = 0; i < versions.length; i++) { + builder.add(DiscoveryNodeUtils.create("node" + i, new TransportAddress(TransportAddress.META_ADDRESS, 9200 + i), versions[i])); + } + builder.localNodeId("node0").masterNodeId("node0"); + return builder.build(); + } + + private static DiscoveryNodes nodes(VersionInformation... versions) { + var builder = DiscoveryNodes.builder(); + for (int i = 0; i < versions.length; i++) { + builder.add( + DiscoveryNodeUtils.builder("node" + i) + .address(new TransportAddress(TransportAddress.META_ADDRESS, 9200 + i)) + .version(versions[i]) + .build() + ); + } + builder.localNodeId("node0").masterNodeId("node0"); + return builder.build(); + } + + @SafeVarargs + private static Map> features(Set... nodeFeatures) { + Map> features = new HashMap<>(); + for (int i = 0; i < nodeFeatures.length; i++) { + features.put("node" + i, nodeFeatures[i]); + } + return features; + } + + private static NodesFeaturesResponse getResponse(Map> responseData) { + return new NodesFeaturesResponse( + ClusterName.DEFAULT, + responseData.entrySet() + .stream() + .map( + e -> new NodeFeatures( + e.getValue(), + DiscoveryNodeUtils.create(e.getKey(), new TransportAddress(TransportAddress.META_ADDRESS, 9200)) + ) + ) + .toList(), + List.of() + ); + } + + public void testNothingDoneWhenNothingToFix() { + MasterServiceTaskQueue taskQueue = newMockTaskQueue(); + ClusterAdminClient client = mock(ClusterAdminClient.class); + + ClusterState testState = ClusterState.builder(ClusterState.EMPTY_STATE) + .nodes(nodes(Version.CURRENT, Version.CURRENT)) + .nodeFeatures(features(Set.of("f1", "f2"), Set.of("f1", "f2"))) + .build(); + + NodeFeaturesFixupListener listener = new NodeFeaturesFixupListener(taskQueue, client, null, null); + listener.clusterChanged(new ClusterChangedEvent("test", testState, ClusterState.EMPTY_STATE)); + + verify(taskQueue, never()).submitTask(anyString(), any(), any()); + } + + public void testFeaturesFixedAfterNewMaster() throws Exception { + MasterServiceTaskQueue taskQueue = newMockTaskQueue(); + ClusterAdminClient client = mock(ClusterAdminClient.class); + Set features = Set.of("f1", "f2"); + + ClusterState testState = ClusterState.builder(ClusterState.EMPTY_STATE) + .nodes(nodes(Version.CURRENT, Version.CURRENT, Version.CURRENT)) + .nodeFeatures(features(features, Set.of(), Set.of())) + .build(); + + ArgumentCaptor> action = ArgumentCaptor.captor(); + ArgumentCaptor task = ArgumentCaptor.captor(); + + NodeFeaturesFixupListener listener = new NodeFeaturesFixupListener(taskQueue, client, null, null); + listener.clusterChanged(new ClusterChangedEvent("test", testState, ClusterState.EMPTY_STATE)); + verify(client).execute( + eq(TransportNodesFeaturesAction.TYPE), + argThat(transformedMatch(NodesFeaturesRequest::nodesIds, arrayContainingInAnyOrder("node1", "node2"))), + action.capture() + ); + + action.getValue().onResponse(getResponse(Map.of("node1", features, "node2", features))); + verify(taskQueue).submitTask(anyString(), task.capture(), any()); + + ClusterState newState = ClusterStateTaskExecutorUtils.executeAndAssertSuccessful( + testState, + new NodesFeaturesUpdater(), + List.of(task.getValue()) + ); + + assertThat(newState.clusterFeatures().allNodeFeatures(), containsInAnyOrder("f1", "f2")); + } + + public void testFeaturesFetchedOnlyForUpdatedNodes() { + MasterServiceTaskQueue taskQueue = newMockTaskQueue(); + ClusterAdminClient client = mock(ClusterAdminClient.class); + + ClusterState testState = ClusterState.builder(ClusterState.EMPTY_STATE) + .nodes( + nodes( + VersionInformation.CURRENT, + VersionInformation.CURRENT, + new VersionInformation(Version.V_8_12_0, IndexVersion.current(), IndexVersion.current()) + ) + ) + .nodeFeatures(features(Set.of("f1", "f2"), Set.of(), Set.of())) + .build(); + + ArgumentCaptor> action = ArgumentCaptor.captor(); + + NodeFeaturesFixupListener listener = new NodeFeaturesFixupListener(taskQueue, client, null, null); + listener.clusterChanged(new ClusterChangedEvent("test", testState, ClusterState.EMPTY_STATE)); + verify(client).execute( + eq(TransportNodesFeaturesAction.TYPE), + argThat(transformedMatch(NodesFeaturesRequest::nodesIds, arrayContainingInAnyOrder("node1"))), + action.capture() + ); + } + + public void testConcurrentChangesDoNotOverlap() { + MasterServiceTaskQueue taskQueue = newMockTaskQueue(); + ClusterAdminClient client = mock(ClusterAdminClient.class); + Set features = Set.of("f1", "f2"); + + ClusterState testState1 = ClusterState.builder(ClusterState.EMPTY_STATE) + .nodes(nodes(Version.CURRENT, Version.CURRENT, Version.CURRENT)) + .nodeFeatures(features(features, Set.of(), Set.of())) + .build(); + + NodeFeaturesFixupListener listeners = new NodeFeaturesFixupListener(taskQueue, client, null, null); + listeners.clusterChanged(new ClusterChangedEvent("test", testState1, ClusterState.EMPTY_STATE)); + verify(client).execute( + eq(TransportNodesFeaturesAction.TYPE), + argThat(transformedMatch(NodesFeaturesRequest::nodesIds, arrayContainingInAnyOrder("node1", "node2"))), + any() + ); + // don't send back the response yet + + ClusterState testState2 = ClusterState.builder(ClusterState.EMPTY_STATE) + .nodes(nodes(Version.CURRENT, Version.CURRENT, Version.CURRENT)) + .nodeFeatures(features(features, features, Set.of())) + .build(); + // should not send any requests + listeners.clusterChanged(new ClusterChangedEvent("test", testState2, testState1)); + verifyNoMoreInteractions(client); + } + + public void testFailedRequestsAreRetried() { + MasterServiceTaskQueue taskQueue = newMockTaskQueue(); + ClusterAdminClient client = mock(ClusterAdminClient.class); + Scheduler scheduler = mock(Scheduler.class); + Executor executor = mock(Executor.class); + Set features = Set.of("f1", "f2"); + + ClusterState testState = ClusterState.builder(ClusterState.EMPTY_STATE) + .nodes(nodes(Version.CURRENT, Version.CURRENT, Version.CURRENT)) + .nodeFeatures(features(features, Set.of(), Set.of())) + .build(); + + ArgumentCaptor> action = ArgumentCaptor.captor(); + ArgumentCaptor retry = ArgumentCaptor.forClass(Runnable.class); + + NodeFeaturesFixupListener listener = new NodeFeaturesFixupListener(taskQueue, client, scheduler, executor); + listener.clusterChanged(new ClusterChangedEvent("test", testState, ClusterState.EMPTY_STATE)); + verify(client).execute( + eq(TransportNodesFeaturesAction.TYPE), + argThat(transformedMatch(NodesFeaturesRequest::nodesIds, arrayContainingInAnyOrder("node1", "node2"))), + action.capture() + ); + + action.getValue().onFailure(new RuntimeException("failure")); + verify(scheduler).schedule(retry.capture(), any(), same(executor)); + + // running the retry should cause another call + retry.getValue().run(); + verify(client, times(2)).execute( + eq(TransportNodesFeaturesAction.TYPE), + argThat(transformedMatch(NodesFeaturesRequest::nodesIds, arrayContainingInAnyOrder("node1", "node2"))), + action.capture() + ); + } +} diff --git a/server/src/test/java/org/elasticsearch/index/LogsIndexModeTests.java b/server/src/test/java/org/elasticsearch/index/LogsIndexModeTests.java index caddc7d5ea5af..84a9682635c8c 100644 --- a/server/src/test/java/org/elasticsearch/index/LogsIndexModeTests.java +++ b/server/src/test/java/org/elasticsearch/index/LogsIndexModeTests.java @@ -16,7 +16,7 @@ public class LogsIndexModeTests extends ESTestCase { public void testLogsIndexModeSetting() { - assertThat(IndexSettings.MODE.get(buildSettings()), equalTo(IndexMode.LOGS)); + assertThat(IndexSettings.MODE.get(buildSettings()), equalTo(IndexMode.LOGSDB)); } public void testSortField() { @@ -25,9 +25,9 @@ public void testSortField() { .put(IndexSortConfig.INDEX_SORT_FIELD_SETTING.getKey(), "agent_id") .build(); final IndexMetadata metadata = IndexSettingsTests.newIndexMeta("test", sortSettings); - assertThat(metadata.getIndexMode(), equalTo(IndexMode.LOGS)); + assertThat(metadata.getIndexMode(), equalTo(IndexMode.LOGSDB)); final IndexSettings settings = new IndexSettings(metadata, Settings.EMPTY); - assertThat(settings.getMode(), equalTo(IndexMode.LOGS)); + assertThat(settings.getMode(), equalTo(IndexMode.LOGSDB)); assertThat("agent_id", equalTo(getIndexSetting(settings, IndexSortConfig.INDEX_SORT_FIELD_SETTING.getKey()))); } @@ -38,9 +38,9 @@ public void testSortMode() { .put(IndexSortConfig.INDEX_SORT_MODE_SETTING.getKey(), "max") .build(); final IndexMetadata metadata = IndexSettingsTests.newIndexMeta("test", sortSettings); - assertThat(metadata.getIndexMode(), equalTo(IndexMode.LOGS)); + assertThat(metadata.getIndexMode(), equalTo(IndexMode.LOGSDB)); final IndexSettings settings = new IndexSettings(metadata, Settings.EMPTY); - assertThat(settings.getMode(), equalTo(IndexMode.LOGS)); + assertThat(settings.getMode(), equalTo(IndexMode.LOGSDB)); assertThat("agent_id", equalTo(getIndexSetting(settings, IndexSortConfig.INDEX_SORT_FIELD_SETTING.getKey()))); assertThat("max", equalTo(getIndexSetting(settings, IndexSortConfig.INDEX_SORT_MODE_SETTING.getKey()))); } @@ -52,9 +52,9 @@ public void testSortOrder() { .put(IndexSortConfig.INDEX_SORT_ORDER_SETTING.getKey(), "desc") .build(); final IndexMetadata metadata = IndexSettingsTests.newIndexMeta("test", sortSettings); - assertThat(metadata.getIndexMode(), equalTo(IndexMode.LOGS)); + assertThat(metadata.getIndexMode(), equalTo(IndexMode.LOGSDB)); final IndexSettings settings = new IndexSettings(metadata, Settings.EMPTY); - assertThat(settings.getMode(), equalTo(IndexMode.LOGS)); + assertThat(settings.getMode(), equalTo(IndexMode.LOGSDB)); assertThat("agent_id", equalTo(getIndexSetting(settings, IndexSortConfig.INDEX_SORT_FIELD_SETTING.getKey()))); assertThat("desc", equalTo(getIndexSetting(settings, IndexSortConfig.INDEX_SORT_ORDER_SETTING.getKey()))); } @@ -66,15 +66,15 @@ public void testSortMissing() { .put(IndexSortConfig.INDEX_SORT_MISSING_SETTING.getKey(), "_last") .build(); final IndexMetadata metadata = IndexSettingsTests.newIndexMeta("test", sortSettings); - assertThat(metadata.getIndexMode(), equalTo(IndexMode.LOGS)); + assertThat(metadata.getIndexMode(), equalTo(IndexMode.LOGSDB)); final IndexSettings settings = new IndexSettings(metadata, Settings.EMPTY); - assertThat(settings.getMode(), equalTo(IndexMode.LOGS)); + assertThat(settings.getMode(), equalTo(IndexMode.LOGSDB)); assertThat("agent_id", equalTo(getIndexSetting(settings, IndexSortConfig.INDEX_SORT_FIELD_SETTING.getKey()))); assertThat("_last", equalTo(getIndexSetting(settings, IndexSortConfig.INDEX_SORT_MISSING_SETTING.getKey()))); } private Settings buildSettings() { - return Settings.builder().put(IndexSettings.MODE.getKey(), IndexMode.LOGS.getName()).build(); + return Settings.builder().put(IndexSettings.MODE.getKey(), IndexMode.LOGSDB.getName()).build(); } private String getIndexSetting(final IndexSettings settings, final String name) { diff --git a/server/src/test/java/org/elasticsearch/index/codec/PerFieldMapperCodecTests.java b/server/src/test/java/org/elasticsearch/index/codec/PerFieldMapperCodecTests.java index 525fa31673494..122e238fb6346 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/PerFieldMapperCodecTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/PerFieldMapperCodecTests.java @@ -188,7 +188,7 @@ public void testUseTimeSeriesModeAndCodecEnabled() throws IOException { } public void testLogsIndexMode() throws IOException { - PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, IndexMode.LOGS, MAPPING_3); + PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, IndexMode.LOGSDB, MAPPING_3); assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("@timestamp")), is(true)); assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("hostname")), is(true)); assertThat((perFieldMapperCodec.useTSDBDocValuesFormat("response_size")), is(true)); diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java index e8949dda78f7f..9b58e785131c9 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java @@ -232,6 +232,9 @@ private void assertSortedSetDocValues(LeafReader baselineReader, LeafReader cont for (int i = 0; i < docIdsToAdvanceTo.length; i++) { int docId = docIdsToAdvanceTo[i]; int baselineTarget = assertAdvance(docId, baselineReader, contenderReader, baseline, contender); + if (baselineTarget == NO_MORE_DOCS) { + break; + } assertEquals(baseline.docValueCount(), contender.docValueCount()); for (int j = 0; j < baseline.docValueCount(); j++) { long baselineOrd = baseline.nextOrd(); @@ -255,12 +258,14 @@ private void assertSortedSetDocValues(LeafReader baselineReader, LeafReader cont boolean contenderFound = contender.advanceExact(docId); assertEquals(baselineFound, contenderFound); assertEquals(baseline.docID(), contender.docID()); - assertEquals(baseline.docValueCount(), contender.docValueCount()); - for (int i = 0; i < baseline.docValueCount(); i++) { - long baselineOrd = baseline.nextOrd(); - long contenderOrd = contender.nextOrd(); - assertEquals(baselineOrd, contenderOrd); - assertEquals(baseline.lookupOrd(baselineOrd), contender.lookupOrd(contenderOrd)); + if (baselineFound) { + assertEquals(baseline.docValueCount(), contender.docValueCount()); + for (int i = 0; i < baseline.docValueCount(); i++) { + long baselineOrd = baseline.nextOrd(); + long contenderOrd = contender.nextOrd(); + assertEquals(baselineOrd, contenderOrd); + assertEquals(baseline.lookupOrd(baselineOrd), contender.lookupOrd(contenderOrd)); + } } } } @@ -328,6 +333,9 @@ private void assertSortedNumericDocValues(LeafReader baselineReader, LeafReader for (int i = 0; i < docIdsToAdvanceTo.length; i++) { int docId = docIdsToAdvanceTo[i]; int baselineTarget = assertAdvance(docId, baselineReader, contenderReader, baseline, contender); + if (baselineTarget == NO_MORE_DOCS) { + break; + } assertEquals(baseline.docValueCount(), contender.docValueCount()); for (int j = 0; j < baseline.docValueCount(); j++) { long baselineValue = baseline.nextValue(); @@ -349,11 +357,13 @@ private void assertSortedNumericDocValues(LeafReader baselineReader, LeafReader boolean contenderResult = contender.advanceExact(docId); assertEquals(baselineResult, contenderResult); assertEquals(baseline.docID(), contender.docID()); - assertEquals(baseline.docValueCount(), contender.docValueCount()); - for (int i = 0; i < baseline.docValueCount(); i++) { - long baselineValue = baseline.nextValue(); - long contenderValue = contender.nextValue(); - assertEquals(baselineValue, contenderValue); + if (baselineResult) { + assertEquals(baseline.docValueCount(), contender.docValueCount()); + for (int i = 0; i < baseline.docValueCount(); i++) { + long baselineValue = baseline.nextValue(); + long contenderValue = contender.nextValue(); + assertEquals(baselineValue, contenderValue); + } } } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java index 2a4554091dc91..27dbc79333c2d 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java @@ -215,7 +215,7 @@ public void testExactKnnQuery() { for (int i = 0; i < dims; i++) { queryVector[i] = randomFloat(); } - Query query = field.createExactKnnQuery(VectorData.fromFloats(queryVector)); + Query query = field.createExactKnnQuery(VectorData.fromFloats(queryVector), null); assertTrue(query instanceof DenseVectorQuery.Floats); } { @@ -233,7 +233,7 @@ public void testExactKnnQuery() { for (int i = 0; i < dims; i++) { queryVector[i] = randomByte(); } - Query query = field.createExactKnnQuery(VectorData.fromBytes(queryVector)); + Query query = field.createExactKnnQuery(VectorData.fromBytes(queryVector), null); assertTrue(query instanceof DenseVectorQuery.Bytes); } } diff --git a/server/src/test/java/org/elasticsearch/ingest/IngestServiceTests.java b/server/src/test/java/org/elasticsearch/ingest/IngestServiceTests.java index b2b19f14cfd4b..42c42c309f931 100644 --- a/server/src/test/java/org/elasticsearch/ingest/IngestServiceTests.java +++ b/server/src/test/java/org/elasticsearch/ingest/IngestServiceTests.java @@ -1825,9 +1825,9 @@ public void testBulkRequestExecution() throws Exception { for (int i = 0; i < numRequest; i++) { IndexRequest indexRequest = new IndexRequest("_index").id("_id").setPipeline(pipelineId).setFinalPipeline("_none"); indexRequest.source(xContentType, "field1", "value1"); - boolean shouldListExecutedPiplines = randomBoolean(); - executedPipelinesExpected.add(shouldListExecutedPiplines); - indexRequest.setListExecutedPipelines(shouldListExecutedPiplines); + boolean shouldListExecutedPipelines = randomBoolean(); + executedPipelinesExpected.add(shouldListExecutedPipelines); + indexRequest.setListExecutedPipelines(shouldListExecutedPipelines); bulkRequest.add(indexRequest); } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java index 788249fee1187..27f0b21d2767f 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java @@ -329,6 +329,65 @@ public void testStringShardMinDocCount() throws IOException { } } + public void testStringShardZeroMinDocCount() throws IOException { + MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("string", true, true, Collections.emptyMap()); + for (TermsAggregatorFactory.ExecutionMode executionMode : TermsAggregatorFactory.ExecutionMode.values()) { + TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name").field("string") + .executionHint(executionMode.toString()) + .size(2) + .minDocCount(0) + .executionHint("map") + .excludeDeletedDocs(true) + .order(BucketOrder.key(true)); + + { + boolean delete = randomBoolean(); + // force single shard/segment + testCase(iw -> { + // force single shard/segment + iw.addDocuments(Arrays.asList(doc(fieldType, "a"), doc(fieldType, "b"), doc(fieldType, "c"), doc(fieldType, "d"))); + if (delete) { + iw.deleteDocuments(new TermQuery(new Term("string", "b"))); + } + }, (InternalTerms result) -> { + assertEquals(2, result.getBuckets().size()); + assertEquals("a", result.getBuckets().get(0).getKeyAsString()); + assertEquals(0L, result.getBuckets().get(0).getDocCount()); + if (delete) { + assertEquals("c", result.getBuckets().get(1).getKeyAsString()); + } else { + assertEquals("b", result.getBuckets().get(1).getKeyAsString()); + } + assertEquals(0L, result.getBuckets().get(1).getDocCount()); + }, new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e")))); + } + + { + boolean delete = randomBoolean(); + // force single shard/segment + testCase(iw -> { + // force single shard/segment + iw.addDocuments( + Arrays.asList(doc(fieldType, "a"), doc(fieldType, "c", "d"), doc(fieldType, "b", "d"), doc(fieldType, "b")) + ); + if (delete) { + iw.deleteDocuments(new TermQuery(new Term("string", "b"))); + } + }, (InternalTerms result) -> { + assertEquals(2, result.getBuckets().size()); + assertEquals("a", result.getBuckets().get(0).getKeyAsString()); + assertEquals(0L, result.getBuckets().get(0).getDocCount()); + if (delete) { + assertEquals("c", result.getBuckets().get(1).getKeyAsString()); + } else { + assertEquals("b", result.getBuckets().get(1).getKeyAsString()); + } + assertEquals(0L, result.getBuckets().get(1).getDocCount()); + }, new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e")))); + } + } + } + public void testManyTerms() throws Exception { MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("string", randomBoolean(), true, Collections.emptyMap()); TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name").executionHint(randomHint()).field("string"); diff --git a/server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java b/server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java index f5d9f35e34695..0cc151a16e4b7 100644 --- a/server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java +++ b/server/src/test/java/org/elasticsearch/search/vectors/AbstractKnnVectorQueryBuilderTestCase.java @@ -21,6 +21,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.index.query.InnerHitsRewriteContext; import org.elasticsearch.index.query.MatchNoneQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; @@ -296,6 +297,22 @@ private void assertBWCSerialization(QueryBuilder newQuery, QueryBuilder bwcQuery } } + public void testRewriteForInnerHits() throws IOException { + SearchExecutionContext context = createSearchExecutionContext(); + InnerHitsRewriteContext innerHitsRewriteContext = new InnerHitsRewriteContext(context.getParserConfig(), System::currentTimeMillis); + KnnVectorQueryBuilder queryBuilder = createTestQueryBuilder(); + queryBuilder.boost(randomFloat()); + queryBuilder.queryName(randomAlphaOfLength(10)); + QueryBuilder rewritten = queryBuilder.rewrite(innerHitsRewriteContext); + assertTrue(rewritten instanceof ExactKnnQueryBuilder); + ExactKnnQueryBuilder exactKnnQueryBuilder = (ExactKnnQueryBuilder) rewritten; + assertEquals(queryBuilder.queryVector(), exactKnnQueryBuilder.getQuery()); + assertEquals(queryBuilder.getFieldName(), exactKnnQueryBuilder.getField()); + assertEquals(queryBuilder.boost(), exactKnnQueryBuilder.boost(), 0.0001f); + assertEquals(queryBuilder.queryName(), exactKnnQueryBuilder.queryName()); + assertEquals(queryBuilder.getVectorSimilarity(), exactKnnQueryBuilder.vectorSimilarity()); + } + public void testRewriteWithQueryVectorBuilder() throws Exception { int dims = randomInt(1024); float[] expectedArray = new float[dims]; diff --git a/server/src/test/java/org/elasticsearch/search/vectors/ExactKnnQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/search/vectors/ExactKnnQueryBuilderTests.java index 1e77e35b60a4c..220682f11de01 100644 --- a/server/src/test/java/org/elasticsearch/search/vectors/ExactKnnQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/search/vectors/ExactKnnQueryBuilderTests.java @@ -14,6 +14,7 @@ import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.AbstractQueryTestCase; @@ -61,12 +62,12 @@ protected ExactKnnQueryBuilder doCreateTestQueryBuilder() { for (int i = 0; i < VECTOR_DIMENSION; i++) { query[i] = randomFloat(); } - return new ExactKnnQueryBuilder(query, VECTOR_FIELD); + return new ExactKnnQueryBuilder(VectorData.fromFloats(query), VECTOR_FIELD, randomBoolean() ? randomFloat() : null); } @Override public void testValidOutput() { - ExactKnnQueryBuilder query = new ExactKnnQueryBuilder(new float[] { 1.0f, 2.0f, 3.0f }, "field"); + ExactKnnQueryBuilder query = new ExactKnnQueryBuilder(VectorData.fromFloats(new float[] { 1.0f, 2.0f, 3.0f }), "field", null); String expected = """ { "exact_knn" : { @@ -79,15 +80,36 @@ public void testValidOutput() { } }"""; assertEquals(expected, query.toString()); + query = new ExactKnnQueryBuilder(VectorData.fromFloats(new float[] { 1.0f, 2.0f, 3.0f }), "field", 1f); + expected = """ + { + "exact_knn" : { + "query" : [ + 1.0, + 2.0, + 3.0 + ], + "field" : "field", + "similarity" : 1.0 + } + }"""; + assertEquals(expected, query.toString()); } @Override protected void doAssertLuceneQuery(ExactKnnQueryBuilder queryBuilder, Query query, SearchExecutionContext context) throws IOException { + if (queryBuilder.vectorSimilarity() != null) { + assertTrue(query instanceof VectorSimilarityQuery); + VectorSimilarityQuery vectorSimilarityQuery = (VectorSimilarityQuery) query; + query = vectorSimilarityQuery.getInnerKnnQuery(); + } assertTrue(query instanceof DenseVectorQuery.Floats); DenseVectorQuery.Floats denseVectorQuery = (DenseVectorQuery.Floats) query; assertEquals(VECTOR_FIELD, denseVectorQuery.field); float[] expected = Arrays.copyOf(queryBuilder.getQuery().asFloatVector(), queryBuilder.getQuery().asFloatVector().length); - if (context.getIndexSettings().getIndexVersionCreated().onOrAfter(IndexVersions.NORMALIZED_VECTOR_COSINE)) { + float magnitude = VectorUtil.dotProduct(expected, expected); + if (context.getIndexSettings().getIndexVersionCreated().onOrAfter(IndexVersions.NORMALIZED_VECTOR_COSINE) + && DenseVectorFieldMapper.isNotUnitVector(magnitude)) { VectorUtil.l2normalize(expected); assertArrayEquals(expected, denseVectorQuery.getQuery(), 0.0f); } else { diff --git a/server/src/test/java/org/elasticsearch/search/vectors/KnnScoreDocQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/search/vectors/KnnScoreDocQueryBuilderTests.java index 67bc6bde9c1af..3733c884c401b 100644 --- a/server/src/test/java/org/elasticsearch/search/vectors/KnnScoreDocQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/search/vectors/KnnScoreDocQueryBuilderTests.java @@ -64,7 +64,8 @@ protected KnnScoreDocQueryBuilder doCreateTestQueryBuilder() { return new KnnScoreDocQueryBuilder( scoreDocs.toArray(new ScoreDoc[0]), randomBoolean() ? "field" : null, - randomBoolean() ? randomVector(10) : null + randomBoolean() ? VectorData.fromFloats(randomVector(10)) : null, + randomBoolean() ? randomFloat() : null ); } @@ -73,7 +74,8 @@ public void testValidOutput() { KnnScoreDocQueryBuilder query = new KnnScoreDocQueryBuilder( new ScoreDoc[] { new ScoreDoc(0, 4.25f), new ScoreDoc(5, 1.6f) }, "field", - new float[] { 1.0f, 2.0f } + VectorData.fromFloats(new float[] { 1.0f, 2.0f }), + null ); String expected = """ { @@ -163,7 +165,8 @@ public void testRewriteToMatchNone() throws IOException { KnnScoreDocQueryBuilder queryBuilder = new KnnScoreDocQueryBuilder( new ScoreDoc[0], randomBoolean() ? "field" : null, - randomBoolean() ? randomVector(10) : null + randomBoolean() ? VectorData.fromFloats(randomVector(10)) : null, + randomBoolean() ? randomFloat() : null ); QueryRewriteContext context = randomBoolean() ? new InnerHitsRewriteContext(createSearchExecutionContext().getParserConfig(), System::currentTimeMillis) @@ -177,7 +180,8 @@ public void testRewriteForInnerHits() throws IOException { KnnScoreDocQueryBuilder queryBuilder = new KnnScoreDocQueryBuilder( new ScoreDoc[] { new ScoreDoc(0, 4.25f), new ScoreDoc(5, 1.6f) }, randomAlphaOfLength(10), - randomVector(10) + VectorData.fromFloats(randomVector(10)), + randomBoolean() ? randomFloat() : null ); queryBuilder.boost(randomFloat()); queryBuilder.queryName(randomAlphaOfLength(10)); @@ -188,6 +192,7 @@ public void testRewriteForInnerHits() throws IOException { assertEquals(queryBuilder.fieldName(), exactKnnQueryBuilder.getField()); assertEquals(queryBuilder.boost(), exactKnnQueryBuilder.boost(), 0.0001f); assertEquals(queryBuilder.queryName(), exactKnnQueryBuilder.queryName()); + assertEquals(queryBuilder.vectorSimilarity(), exactKnnQueryBuilder.vectorSimilarity()); } @Override @@ -226,7 +231,12 @@ public void testScoreDocQueryWeightCount() throws IOException { } ScoreDoc[] scoreDocs = scoreDocsList.toArray(new ScoreDoc[0]); - KnnScoreDocQueryBuilder queryBuilder = new KnnScoreDocQueryBuilder(scoreDocs, "field", randomVector(10)); + KnnScoreDocQueryBuilder queryBuilder = new KnnScoreDocQueryBuilder( + scoreDocs, + "field", + VectorData.fromFloats(randomVector(10)), + null + ); Query query = queryBuilder.doToQuery(context); final Weight w = query.createWeight(searcher, ScoreMode.TOP_SCORES, 1.0f); for (LeafReaderContext leafReaderContext : searcher.getLeafContexts()) { @@ -269,7 +279,12 @@ public void testScoreDocQuery() throws IOException { } ScoreDoc[] scoreDocs = scoreDocsList.toArray(new ScoreDoc[0]); - KnnScoreDocQueryBuilder queryBuilder = new KnnScoreDocQueryBuilder(scoreDocs, "field", randomVector(10)); + KnnScoreDocQueryBuilder queryBuilder = new KnnScoreDocQueryBuilder( + scoreDocs, + "field", + VectorData.fromFloats(randomVector(10)), + null + ); final Query query = queryBuilder.doToQuery(context); final Weight w = query.createWeight(searcher, ScoreMode.TOP_SCORES, 1.0f); diff --git a/test/fixtures/geoip-fixture/src/main/java/fixture/geoip/EnterpriseGeoIpHttpFixture.java b/test/fixtures/geoip-fixture/src/main/java/fixture/geoip/EnterpriseGeoIpHttpFixture.java new file mode 100644 index 0000000000000..5932890dd8459 --- /dev/null +++ b/test/fixtures/geoip-fixture/src/main/java/fixture/geoip/EnterpriseGeoIpHttpFixture.java @@ -0,0 +1,119 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package fixture.geoip; + +import com.sun.net.httpserver.HttpServer; + +import org.elasticsearch.common.hash.MessageDigests; +import org.junit.rules.ExternalResource; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.UncheckedIOException; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.security.MessageDigest; + +/** + * This fixture is used to simulate a maxmind-provided server for downloading maxmind geoip database files from the + * EnterpriseGeoIpDownloader. It can be used by integration tests so that they don't actually hit maxmind servers. + */ +public class EnterpriseGeoIpHttpFixture extends ExternalResource { + + private final Path source; + private final String[] databaseTypes; + private HttpServer server; + + /* + * The values in databaseTypes must be in DatabaseConfiguration.MAXMIND_NAMES, and must be one of the databases copied in the + * copyFiles method of thisi class. + */ + public EnterpriseGeoIpHttpFixture(String... databaseTypes) { + this.databaseTypes = databaseTypes; + try { + this.source = Files.createTempDirectory("source"); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + public String getAddress() { + return "http://" + server.getAddress().getHostString() + ":" + server.getAddress().getPort() + "/"; + } + + @Override + protected void before() throws Throwable { + copyFiles(); + this.server = HttpServer.create(new InetSocketAddress(InetAddress.getLoopbackAddress(), 0), 0); + + // for expediency reasons, it is handy to have this test fixture be able to serve the dual purpose of actually stubbing + // out the download protocol for downloading files from maxmind (see the looped context creation after this stanza), as + // we as to serve an empty response for the geoip.elastic.co service here + this.server.createContext("/", exchange -> { + String response = "[]"; // an empty json array + exchange.sendResponseHeaders(200, response.length()); + try (OutputStream os = exchange.getResponseBody()) { + os.write(response.getBytes(StandardCharsets.UTF_8)); + } + }); + + // register the file types for the download fixture + for (String databaseType : databaseTypes) { + createContextForEnterpriseDatabase(databaseType); + } + + server.start(); + } + + private void createContextForEnterpriseDatabase(String databaseType) { + this.server.createContext("/" + databaseType + "/download", exchange -> { + exchange.sendResponseHeaders(200, 0); + if (exchange.getRequestURI().toString().contains("sha256")) { + MessageDigest sha256 = MessageDigests.sha256(); + try (InputStream inputStream = GeoIpHttpFixture.class.getResourceAsStream("/geoip-fixture/" + databaseType + ".tgz")) { + sha256.update(inputStream.readAllBytes()); + } + exchange.getResponseBody() + .write( + (MessageDigests.toHexString(sha256.digest()) + " " + databaseType + "_20240709.tar.gz").getBytes( + StandardCharsets.UTF_8 + ) + ); + } else { + try ( + OutputStream outputStream = exchange.getResponseBody(); + InputStream inputStream = GeoIpHttpFixture.class.getResourceAsStream("/geoip-fixture/" + databaseType + ".tgz") + ) { + inputStream.transferTo(outputStream); + } + } + exchange.getResponseBody().close(); + }); + } + + @Override + protected void after() { + server.stop(0); + } + + private void copyFiles() throws Exception { + for (String databaseType : databaseTypes) { + Files.copy( + GeoIpHttpFixture.class.getResourceAsStream("/geoip-fixture/GeoIP2-City.tgz"), + source.resolve(databaseType + ".tgz"), + StandardCopyOption.REPLACE_EXISTING + ); + } + } +} diff --git a/test/fixtures/geoip-fixture/src/main/resources/geoip-fixture/GeoIP2-City.tgz b/test/fixtures/geoip-fixture/src/main/resources/geoip-fixture/GeoIP2-City.tgz new file mode 100644 index 0000000000000..76dd40000f132 Binary files /dev/null and b/test/fixtures/geoip-fixture/src/main/resources/geoip-fixture/GeoIP2-City.tgz differ diff --git a/test/fixtures/krb5kdc-fixture/src/main/java/org/elasticsearch/test/fixtures/krb5kdc/Krb5kDcContainer.java b/test/fixtures/krb5kdc-fixture/src/main/java/org/elasticsearch/test/fixtures/krb5kdc/Krb5kDcContainer.java index fa75b57ea87a6..14357b6d47bbc 100644 --- a/test/fixtures/krb5kdc-fixture/src/main/java/org/elasticsearch/test/fixtures/krb5kdc/Krb5kDcContainer.java +++ b/test/fixtures/krb5kdc-fixture/src/main/java/org/elasticsearch/test/fixtures/krb5kdc/Krb5kDcContainer.java @@ -22,6 +22,7 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; +import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -74,6 +75,7 @@ public Krb5kDcContainer(ProvisioningId provisioningId) { this.provisioningId = provisioningId; withNetwork(Network.newNetwork()); addExposedPorts(88, 4444); + withStartupTimeout(Duration.ofMinutes(2)); withCreateContainerCmdModifier(cmd -> { // Add previously exposed ports and UDP port List exposedPorts = new ArrayList<>(); diff --git a/test/framework/src/main/java/org/elasticsearch/bootstrap/BootstrapForTesting.java b/test/framework/src/main/java/org/elasticsearch/bootstrap/BootstrapForTesting.java index 8ef80c08517de..b6abd3f7fab5a 100644 --- a/test/framework/src/main/java/org/elasticsearch/bootstrap/BootstrapForTesting.java +++ b/test/framework/src/main/java/org/elasticsearch/bootstrap/BootstrapForTesting.java @@ -24,6 +24,7 @@ import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.jdk.JarHell; import org.elasticsearch.plugins.PluginDescriptor; +import org.elasticsearch.preallocate.Preallocate; import org.elasticsearch.secure_sm.SecureSM; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.PrivilegedOperations; @@ -111,9 +112,10 @@ public class BootstrapForTesting { // init mockito SecureMockMaker.init(); - // init the privileged operation try { - MethodHandles.publicLookup().ensureInitialized(PrivilegedOperations.class); + var lookup = MethodHandles.publicLookup(); + lookup.ensureInitialized(PrivilegedOperations.class); + lookup.ensureInitialized(Preallocate.class); } catch (IllegalAccessException unexpected) { throw new AssertionError(unexpected); } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java index a6b737f162547..fa48309821b16 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java @@ -36,6 +36,7 @@ import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.core.Nullable; import org.elasticsearch.index.Index; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.analysis.AnalyzerScope; @@ -151,7 +152,7 @@ protected final DocumentMapper createTimeSeriesModeDocumentMapper(XContentBuilde } protected final DocumentMapper createLogsModeDocumentMapper(XContentBuilder mappings) throws IOException { - Settings settings = Settings.builder().put(IndexSettings.MODE.getKey(), "logs").build(); + Settings settings = Settings.builder().put(IndexSettings.MODE.getKey(), IndexMode.LOGSDB.getName()).build(); return createMapperService(settings, mappings).documentMapper(); } diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESSingleNodeTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESSingleNodeTestCase.java index 8526acc851c72..a538c39704a73 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESSingleNodeTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESSingleNodeTestCase.java @@ -69,6 +69,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.elasticsearch.action.search.SearchTransportService.FREE_CONTEXT_ACTION_NAME; import static org.elasticsearch.cluster.coordination.ClusterBootstrapService.INITIAL_MASTER_NODES_SETTING; import static org.elasticsearch.discovery.SettingsBasedSeedHostsProvider.DISCOVERY_SEED_HOSTS_SETTING; import static org.elasticsearch.test.NodeRoles.dataNode; @@ -130,9 +131,9 @@ public void tearDown() throws Exception { logger.trace("[{}#{}]: cleaning up after test", getTestClass().getSimpleName(), getTestName()); awaitIndexShardCloseAsyncTasks(); ensureNoInitializingShards(); - SearchService searchService = getInstanceFromNode(SearchService.class); - assertThat(searchService.getActiveContexts(), equalTo(0)); - assertThat(searchService.getOpenScrollContexts(), equalTo(0)); + ensureAllFreeContextActionsAreConsumed(); + + ensureAllContextsReleased(getInstanceFromNode(SearchService.class)); super.tearDown(); var deleteDataStreamsRequest = new DeleteDataStreamAction.Request("*"); deleteDataStreamsRequest.indicesOptions(IndicesOptions.LENIENT_EXPAND_OPEN_CLOSED_HIDDEN); @@ -455,6 +456,14 @@ protected void ensureNoInitializingShards() { assertFalse("timed out waiting for shards to initialize", actionGet.isTimedOut()); } + /** + * waits until all free_context actions have been handled by the generic thread pool + */ + protected void ensureAllFreeContextActionsAreConsumed() throws Exception { + logger.info("--> waiting for all free_context tasks to complete within a reasonable time"); + safeGet(clusterAdmin().prepareListTasks().setActions(FREE_CONTEXT_ACTION_NAME + "*").setWaitForCompletion(true).execute()); + } + /** * Whether we'd like to enable inter-segment search concurrency and increase the likelihood of leveraging it, by creating multiple * slices with a low amount of documents in them, which would not be allowed in production. diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java index add0de1993233..81db437483c68 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java @@ -112,6 +112,7 @@ import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptType; import org.elasticsearch.search.MockSearchService; +import org.elasticsearch.search.SearchService; import org.elasticsearch.test.junit.listeners.LoggingListener; import org.elasticsearch.test.junit.listeners.ReproduceInfoPrinter; import org.elasticsearch.threadpool.ExecutorBuilder; @@ -2423,4 +2424,15 @@ public static T expectThrows(Class expectedType, Reques () -> builder.get().decRef() // dec ref if we unexpectedly fail to not leak transport response ); } + + public static void ensureAllContextsReleased(SearchService searchService) { + try { + assertBusy(() -> { + assertThat(searchService.getActiveContexts(), equalTo(0)); + assertThat(searchService.getOpenScrollContexts(), equalTo(0)); + }); + } catch (Exception e) { + throw new AssertionError("Failed to verify search contexts", e); + } + } } diff --git a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java index bb78c43fca449..3a5961d4e7ddf 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java @@ -2569,15 +2569,7 @@ public void assertRequestsFinished() { private void assertSearchContextsReleased() { for (NodeAndClient nodeAndClient : nodes.values()) { - SearchService searchService = getInstance(SearchService.class, nodeAndClient.name); - try { - assertBusy(() -> { - assertThat(searchService.getActiveContexts(), equalTo(0)); - assertThat(searchService.getOpenScrollContexts(), equalTo(0)); - }); - } catch (Exception e) { - throw new AssertionError("Failed to verify search contexts", e); - } + ESTestCase.ensureAllContextsReleased(getInstance(SearchService.class, nodeAndClient.name)); } } diff --git a/test/framework/src/main/java/org/elasticsearch/test/junit/listeners/ReproduceInfoPrinter.java b/test/framework/src/main/java/org/elasticsearch/test/junit/listeners/ReproduceInfoPrinter.java index 5844dcbd66471..b4dbf437afbe7 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/junit/listeners/ReproduceInfoPrinter.java +++ b/test/framework/src/main/java/org/elasticsearch/test/junit/listeners/ReproduceInfoPrinter.java @@ -178,11 +178,13 @@ private ReproduceErrorMessageBuilder appendESProperties() { if (System.getProperty("tests.jvm.argline") != null && System.getProperty("tests.jvm.argline").isEmpty() == false) { appendOpt("tests.jvm.argline", "\"" + System.getProperty("tests.jvm.argline") + "\""); } + if (Boolean.parseBoolean(System.getProperty("build.snapshot", "true")) == false) { + appendOpt("license.key", "x-pack/license-tools/src/test/resources/public.key"); + } appendOpt("tests.locale", Locale.getDefault().toLanguageTag()); appendOpt("tests.timezone", TimeZone.getDefault().getID()); appendOpt("tests.distribution", System.getProperty("tests.distribution")); appendOpt("runtime.java", Integer.toString(Runtime.version().feature())); - appendOpt("license.key", System.getProperty("licence.key")); appendOpt(ESTestCase.FIPS_SYSPROP, System.getProperty(ESTestCase.FIPS_SYSPROP)); return this; } diff --git a/test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestFeatureService.java b/test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestFeatureService.java index 78a4126ec09db..92d72afbf9d52 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestFeatureService.java +++ b/test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestFeatureService.java @@ -86,19 +86,6 @@ public boolean clusterHasFeature(String featureId) { Matcher matcher = VERSION_FEATURE_PATTERN.matcher(featureId); if (matcher.matches()) { Version extractedVersion = Version.fromString(matcher.group(1)); - if (Version.V_8_15_0.before(extractedVersion)) { - // As of version 8.14.0 REST tests have been migrated to use features only. - // For migration purposes we provide a synthetic version feature gte_vX.Y.Z for any version at or before 8.15.0 - // allowing for some transition period. - throw new IllegalArgumentException( - Strings.format( - "Synthetic version features are only available before [%s] for migration purposes! " - + "Please add a cluster feature to an appropriate FeatureSpecification; test-only historical-features " - + "can be supplied via ESRestTestCase#additionalTestOnlyHistoricalFeatures()", - Version.V_8_15_0 - ) - ); - } return version.onOrAfter(extractedVersion); } diff --git a/test/immutable-collections-patch/build.gradle b/test/immutable-collections-patch/build.gradle index 2d42215b3e02c..c3354e189847d 100644 --- a/test/immutable-collections-patch/build.gradle +++ b/test/immutable-collections-patch/build.gradle @@ -35,7 +35,7 @@ generatePatch.configure { executable = "${BuildParams.runtimeJavaHome}/bin/java" + (OS.current() == OS.WINDOWS ? '.exe' : '') } else { javaLauncher = javaToolchains.launcherFor { - languageVersion = JavaLanguageVersion.of(BuildParams.runtimeJavaVersion.majorVersion) + languageVersion = JavaLanguageVersion.of(VersionProperties.bundledJdkMajorVersion) vendor = VersionProperties.bundledJdkVendor == "openjdk" ? JvmVendorSpec.ORACLE : JvmVendorSpec.matching(VersionProperties.bundledJdkVendor) diff --git a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/local/DefaultLocalClusterHandle.java b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/local/DefaultLocalClusterHandle.java index 5292d917df630..a2d7fe11698e8 100644 --- a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/local/DefaultLocalClusterHandle.java +++ b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/local/DefaultLocalClusterHandle.java @@ -39,7 +39,7 @@ public class DefaultLocalClusterHandle implements LocalClusterHandle { public static final AtomicInteger NEXT_DEBUG_PORT = new AtomicInteger(5007); private static final Logger LOGGER = LogManager.getLogger(DefaultLocalClusterHandle.class); - private static final Duration CLUSTER_UP_TIMEOUT = Duration.ofSeconds(30); + private static final Duration CLUSTER_UP_TIMEOUT = Duration.ofMinutes(5); public final ForkJoinPool executor = new ForkJoinPool( Math.max(Runtime.getRuntime().availableProcessors(), 4), diff --git a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/local/WaitForHttpResource.java b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/local/WaitForHttpResource.java index 0c839aeddd189..e897dca20b0a5 100644 --- a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/local/WaitForHttpResource.java +++ b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/local/WaitForHttpResource.java @@ -56,7 +56,20 @@ public class WaitForHttpResource { private String password; public WaitForHttpResource(String protocol, String host, int numberOfNodes) throws MalformedURLException { - this(new URL(protocol + "://" + host + "/_cluster/health?wait_for_nodes=>=" + numberOfNodes + "&wait_for_status=yellow")); + this( + new URL( + protocol + + "://" + + host + + "/_cluster/health" + + "?wait_for_nodes=>=" + + numberOfNodes + + "&wait_for_status=yellow" + + "&wait_for_events=LANGUID" + + "&wait_for_no_initializing_shards" + + "&wait_for_no_relocating_shards" + ) + ); } public WaitForHttpResource(URL url) { diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistogramPercentileAggregationTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistogramPercentileAggregationTests.java index f60466bcf43cc..7c6f85104b5f8 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistogramPercentileAggregationTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/aggregations/metrics/HistogramPercentileAggregationTests.java @@ -241,7 +241,6 @@ public void testTDigestHistogram() throws Exception { ); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/110406") public void testBoxplotHistogram() throws Exception { int compression = TestUtil.nextInt(random(), 200, 300); setupTDigestHistogram(compression); diff --git a/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/index/engine/FollowingEngineTests.java b/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/index/engine/FollowingEngineTests.java index fbddfc7683d2f..478a0d08d6612 100644 --- a/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/index/engine/FollowingEngineTests.java +++ b/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/index/engine/FollowingEngineTests.java @@ -697,8 +697,8 @@ public void testProcessOnceOnPrimary() throws Exception { case TIME_SERIES: settingsBuilder.put("index.mode", "time_series").put("index.routing_path", "foo"); break; - case LOGS: - settingsBuilder.put("index.mode", "logs"); + case LOGSDB: + settingsBuilder.put("index.mode", IndexMode.LOGSDB.getName()); break; default: throw new UnsupportedOperationException("Unknown index mode [" + indexMode + "]"); diff --git a/x-pack/plugin/core/build.gradle b/x-pack/plugin/core/build.gradle index 0c65c7e4b6d29..1ed59d6fe3581 100644 --- a/x-pack/plugin/core/build.gradle +++ b/x-pack/plugin/core/build.gradle @@ -51,7 +51,6 @@ dependencies { // security deps api 'com.unboundid:unboundid-ldapsdk:6.0.3' - api "com.nimbusds:nimbus-jose-jwt:9.23" implementation project(":x-pack:plugin:core:template-resources") @@ -135,27 +134,7 @@ tasks.named("thirdPartyAudit").configure { //commons-logging provided dependencies 'javax.servlet.ServletContextEvent', 'javax.servlet.ServletContextListener', - 'javax.jms.Message', - // Optional dependency of nimbus-jose-jwt for handling Ed25519 signatures and ECDH with X25519 (RFC 8037) - 'com.google.crypto.tink.subtle.Ed25519Sign', - 'com.google.crypto.tink.subtle.Ed25519Sign$KeyPair', - 'com.google.crypto.tink.subtle.Ed25519Verify', - 'com.google.crypto.tink.subtle.X25519', - 'com.google.crypto.tink.subtle.XChaCha20Poly1305', - // optional dependencies for nimbus-jose-jwt - 'org.bouncycastle.asn1.pkcs.PrivateKeyInfo', - 'org.bouncycastle.asn1.x509.AlgorithmIdentifier', - 'org.bouncycastle.asn1.x509.SubjectPublicKeyInfo', - 'org.bouncycastle.cert.X509CertificateHolder', - 'org.bouncycastle.cert.jcajce.JcaX509CertificateHolder', - 'org.bouncycastle.crypto.InvalidCipherTextException', - 'org.bouncycastle.crypto.engines.AESEngine', - 'org.bouncycastle.crypto.modes.GCMBlockCipher', - 'org.bouncycastle.jcajce.provider.BouncyCastleFipsProvider', - 'org.bouncycastle.jce.provider.BouncyCastleProvider', - 'org.bouncycastle.openssl.PEMKeyPair', - 'org.bouncycastle.openssl.PEMParser', - 'org.bouncycastle.openssl.jcajce.JcaPEMKeyConverter' + 'javax.jms.Message' ) } diff --git a/x-pack/plugin/core/src/main/java/module-info.java b/x-pack/plugin/core/src/main/java/module-info.java index 282072417875b..72436bb9d5171 100644 --- a/x-pack/plugin/core/src/main/java/module-info.java +++ b/x-pack/plugin/core/src/main/java/module-info.java @@ -22,7 +22,6 @@ requires unboundid.ldapsdk; requires org.elasticsearch.tdigest; requires org.elasticsearch.xcore.templates; - requires com.nimbusds.jose.jwt; exports org.elasticsearch.index.engine.frozen; exports org.elasticsearch.license; diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/index/engine/frozen/FrozenEngine.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/index/engine/frozen/FrozenEngine.java index 0a13aab82aced..3b242ca94ac61 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/index/engine/frozen/FrozenEngine.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/index/engine/frozen/FrozenEngine.java @@ -63,8 +63,6 @@ public final class FrozenEngine extends ReadOnlyEngine { ); private final SegmentsStats segmentsStats; private final DocsStats docsStats; - private final DenseVectorStats denseVectorStats; - private final SparseVectorStats sparseVectorStats; private volatile ElasticsearchDirectoryReader lastOpenedReader; private final ElasticsearchDirectoryReader canMatchReader; private final Object cacheIdentity = new Object(); @@ -95,8 +93,6 @@ public FrozenEngine( fillSegmentStats(segmentReader, true, segmentsStats); } this.docsStats = docsStats(reader); - this.denseVectorStats = denseVectorStats(reader); - this.sparseVectorStats = sparseVectorStats(reader, null); canMatchReader = ElasticsearchDirectoryReader.wrap( new RewriteCachingDirectoryReader(directory, reader.leaves(), null), config.getShardId() @@ -334,13 +330,17 @@ public DocsStats docStats() { } @Override - public DenseVectorStats denseVectorStats() { - return denseVectorStats; + public DenseVectorStats denseVectorStats(MappingLookup mappingLookup) { + // We could cache the result on first call but dense vectors on frozen tier + // are very unlikely, so we just don't count them in the stats. + return new DenseVectorStats(0); } @Override public SparseVectorStats sparseVectorStats(MappingLookup mappingLookup) { - return sparseVectorStats; + // We could cache the result on first call but sparse vectors on frozen tier + // are very unlikely, so we just don't count them in the stats. + return new SparseVectorStats(0); } synchronized boolean isReaderOpen() { diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackField.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackField.java index 4ed2e2a8e056c..388868188b675 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackField.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackField.java @@ -87,6 +87,7 @@ public final class XPackField { /** Name constant for the redact processor feature. */ public static final String REDACT_PROCESSOR = "redact_processor"; + public static final String ENTERPRISE_GEOIP_DOWNLOADER = "enterprise_geoip_downloader"; /** Name for Universal Profiling. */ public static final String UNIVERSAL_PROFILING = "universal_profiling"; diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/InferenceConfig.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/InferenceConfig.java index 92e833c250873..d50db1bd171ee 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/InferenceConfig.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/InferenceConfig.java @@ -12,6 +12,8 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.VersionedNamedWriteable; import org.elasticsearch.core.Nullable; +import org.elasticsearch.license.License; +import org.elasticsearch.rest.RestRequest; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xpack.core.ml.MlConfigVersion; import org.elasticsearch.xpack.core.ml.inference.TrainedModelInput; @@ -22,6 +24,7 @@ import java.util.Arrays; import static org.elasticsearch.action.ValidateActions.addValidationError; +import static org.elasticsearch.xpack.core.ml.MachineLearningField.ML_API_FEATURE; public interface InferenceConfig extends NamedXContentObject, VersionedNamedWriteable { @@ -114,4 +117,12 @@ default ElasticsearchStatusException incompatibleUpdateException(String updateNa updateName ); } + + default License.OperationMode getMinLicenseSupported() { + return ML_API_FEATURE.getMinimumOperationMode(); + } + + default License.OperationMode getMinLicenseSupportedForAction(RestRequest.Method method) { + return getMinLicenseSupported(); + } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/LearningToRankConfig.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/LearningToRankConfig.java index 7d515c9509c41..33e510fcb227c 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/LearningToRankConfig.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/LearningToRankConfig.java @@ -13,6 +13,8 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.index.query.Rewriteable; +import org.elasticsearch.license.License; +import org.elasticsearch.rest.RestRequest; import org.elasticsearch.xcontent.ObjectParser; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.XContentBuilder; @@ -226,6 +228,14 @@ public TransportVersion getMinimalSupportedTransportVersion() { return MIN_SUPPORTED_TRANSPORT_VERSION; } + @Override + public License.OperationMode getMinLicenseSupportedForAction(RestRequest.Method method) { + if (method == RestRequest.Method.PUT) { + return License.OperationMode.ENTERPRISE; + } + return super.getMinLicenseSupportedForAction(method); + } + @Override public LearningToRankConfig rewrite(QueryRewriteContext ctx) throws IOException { if (this.featureExtractorBuilders.isEmpty()) { diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/Grant.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/Grant.java index b186ab45a7dc7..c98564251cd43 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/Grant.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/Grant.java @@ -7,19 +7,13 @@ package org.elasticsearch.xpack.core.security.action; -import org.elasticsearch.ElasticsearchSecurityException; import org.elasticsearch.TransportVersions; import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.settings.SecureString; -import org.elasticsearch.rest.RestStatus; -import org.elasticsearch.xpack.core.security.authc.AuthenticationToken; -import org.elasticsearch.xpack.core.security.authc.jwt.JwtAuthenticationToken; import org.elasticsearch.xpack.core.security.authc.jwt.JwtRealmSettings; -import org.elasticsearch.xpack.core.security.authc.support.BearerToken; -import org.elasticsearch.xpack.core.security.authc.support.UsernamePasswordToken; import java.io.IOException; @@ -136,30 +130,6 @@ public void setClientAuthentication(ClientAuthentication clientAuthentication) { this.clientAuthentication = clientAuthentication; } - public AuthenticationToken getAuthenticationToken() { - assert validate(null) == null : "grant is invalid"; - return switch (type) { - case PASSWORD_GRANT_TYPE -> new UsernamePasswordToken(username, password); - case ACCESS_TOKEN_GRANT_TYPE -> { - SecureString clientAuthentication = this.clientAuthentication != null ? this.clientAuthentication.value() : null; - AuthenticationToken token = JwtAuthenticationToken.tryParseJwt(accessToken, clientAuthentication); - if (token != null) { - yield token; - } - if (clientAuthentication != null) { - clientAuthentication.close(); - throw new ElasticsearchSecurityException( - "[client_authentication] not supported with the supplied access_token type", - RestStatus.BAD_REQUEST - ); - } - // here we effectively assume it's an ES access token (from the {@code TokenService}) - yield new BearerToken(accessToken); - } - default -> throw new ElasticsearchSecurityException("the grant type [{}] is not supported", type); - }; - } - public ActionRequestValidationException validate(ActionRequestValidationException validationException) { if (type == null) { validationException = addValidationError("[grant_type] is required", validationException); diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/role/BulkPutRoleRequestBuilder.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/role/BulkPutRoleRequestBuilder.java index ba199e183d4af..cda45a67e81c6 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/role/BulkPutRoleRequestBuilder.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/role/BulkPutRoleRequestBuilder.java @@ -44,7 +44,7 @@ public class BulkPutRoleRequestBuilder extends ActionRequestBuilder roles; - public BulkPutRolesRequest() {} + public BulkPutRolesRequest(List roles) { + this.roles = roles; + } public void setRoles(List roles) { this.roles = roles; diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/role/QueryRoleResponse.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/role/QueryRoleResponse.java index 6bdc6c66c1835..8e9da10e449ad 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/role/QueryRoleResponse.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/role/QueryRoleResponse.java @@ -86,7 +86,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws // other details of the role descriptor (in the same object). assert Strings.isNullOrEmpty(roleDescriptor.getName()) == false; builder.field("name", roleDescriptor.getName()); - roleDescriptor.innerToXContent(builder, params, false, false); + roleDescriptor.innerToXContent(builder, params, false); if (sortValues != null && sortValues.length > 0) { builder.array("_sort", sortValues); } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/DefaultAuthenticationFailureHandler.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/DefaultAuthenticationFailureHandler.java index b5469fadd95b6..c9b30a826248a 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/DefaultAuthenticationFailureHandler.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/DefaultAuthenticationFailureHandler.java @@ -43,7 +43,7 @@ public DefaultAuthenticationFailureHandler(final Map> failu if (failureResponseHeaders == null || failureResponseHeaders.isEmpty()) { this.defaultFailureResponseHeaders = Collections.singletonMap( "WWW-Authenticate", - Collections.singletonList("Basic realm=\"" + XPackField.SECURITY + "\" charset=\"UTF-8\"") + Collections.singletonList("Basic realm=\"" + XPackField.SECURITY + "\", charset=\"UTF-8\"") ); } else { this.defaultFailureResponseHeaders = Collections.unmodifiableMap( diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/Realm.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/Realm.java index 63989ee86b3a0..bce9e6255a037 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/Realm.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/Realm.java @@ -69,7 +69,7 @@ public int order() { public Map> getAuthenticationFailureHeaders() { return Collections.singletonMap( "WWW-Authenticate", - Collections.singletonList("Basic realm=\"" + XPackField.SECURITY + "\" charset=\"UTF-8\"") + Collections.singletonList("Basic realm=\"" + XPackField.SECURITY + "\", charset=\"UTF-8\"") ); } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/RoleDescriptor.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/RoleDescriptor.java index 7bedab61bd43d..1a8839fa0fa4a 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/RoleDescriptor.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/RoleDescriptor.java @@ -417,13 +417,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } public XContentBuilder toXContent(XContentBuilder builder, Params params, boolean docCreation) throws IOException { - return toXContent(builder, params, docCreation, false); - } - - public XContentBuilder toXContent(XContentBuilder builder, Params params, boolean docCreation, boolean includeMetadataFlattened) - throws IOException { builder.startObject(); - innerToXContent(builder, params, docCreation, includeMetadataFlattened); + innerToXContent(builder, params, docCreation); return builder.endObject(); } @@ -435,12 +430,10 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params, boolea * @param docCreation {@code true} if the x-content is being generated for creating a document * in the security index, {@code false} if the x-content being generated * is for API display purposes - * @param includeMetadataFlattened {@code true} if the metadataFlattened field should be included in doc * @return x-content builder * @throws IOException if there was an error writing the x-content to the builder */ - public XContentBuilder innerToXContent(XContentBuilder builder, Params params, boolean docCreation, boolean includeMetadataFlattened) - throws IOException { + public XContentBuilder innerToXContent(XContentBuilder builder, Params params, boolean docCreation) throws IOException { builder.array(Fields.CLUSTER.getPreferredName(), clusterPrivileges); if (configurableClusterPrivileges.length != 0) { builder.field(Fields.GLOBAL.getPreferredName()); @@ -452,9 +445,7 @@ public XContentBuilder innerToXContent(XContentBuilder builder, Params params, b builder.array(Fields.RUN_AS.getPreferredName(), runAs); } builder.field(Fields.METADATA.getPreferredName(), metadata); - if (includeMetadataFlattened) { - builder.field(Fields.METADATA_FLATTENED.getPreferredName(), metadata); - } + if (docCreation) { builder.field(Fields.TYPE.getPreferredName(), ROLE_TYPE); } else { @@ -1196,7 +1187,7 @@ private static ApplicationResourcePrivileges parseApplicationPrivilege(String ro public static final class RemoteIndicesPrivileges implements Writeable, ToXContentObject { - private static final RemoteIndicesPrivileges[] NONE = new RemoteIndicesPrivileges[0]; + public static final RemoteIndicesPrivileges[] NONE = new RemoteIndicesPrivileges[0]; private final IndicesPrivileges indicesPrivileges; private final String[] remoteClusters; diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/support/Exceptions.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/support/Exceptions.java index 37f1dce5af7ba..b323e7ef20171 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/support/Exceptions.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/support/Exceptions.java @@ -18,13 +18,13 @@ private Exceptions() {} public static ElasticsearchSecurityException authenticationError(String msg, Throwable cause, Object... args) { ElasticsearchSecurityException e = new ElasticsearchSecurityException(msg, RestStatus.UNAUTHORIZED, cause, args); - e.addHeader("WWW-Authenticate", "Basic realm=\"" + XPackField.SECURITY + "\" charset=\"UTF-8\""); + e.addHeader("WWW-Authenticate", "Basic realm=\"" + XPackField.SECURITY + "\", charset=\"UTF-8\""); return e; } public static ElasticsearchSecurityException authenticationError(String msg, Object... args) { ElasticsearchSecurityException e = new ElasticsearchSecurityException(msg, RestStatus.UNAUTHORIZED, args); - e.addHeader("WWW-Authenticate", "Basic realm=\"" + XPackField.SECURITY + "\" charset=\"UTF-8\""); + e.addHeader("WWW-Authenticate", "Basic realm=\"" + XPackField.SECURITY + "\", charset=\"UTF-8\""); return e; } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/LearningToRankConfigTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/LearningToRankConfigTests.java index 09d2366984383..5adf62a3b67ef 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/LearningToRankConfigTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/LearningToRankConfigTests.java @@ -14,6 +14,8 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Tuple; import org.elasticsearch.index.query.QueryRewriteContext; +import org.elasticsearch.license.License; +import org.elasticsearch.rest.RestRequest; import org.elasticsearch.search.SearchModule; import org.elasticsearch.xcontent.ConstructingObjectParser; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -36,6 +38,7 @@ import java.util.stream.Stream; import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; +import static org.hamcrest.Matchers.is; public class LearningToRankConfigTests extends InferenceConfigItemTestCase { private boolean lenient; @@ -140,6 +143,16 @@ public void testDuplicateFeatureNames() { expectThrows(IllegalArgumentException.class, () -> builder.build()); } + public void testLicenseSupport_ForPutAction_RequiresEnterprise() { + var config = randomLearningToRankConfig(); + assertThat(config.getMinLicenseSupportedForAction(RestRequest.Method.PUT), is(License.OperationMode.ENTERPRISE)); + } + + public void testLicenseSupport_ForGetAction_RequiresPlatinum() { + var config = randomLearningToRankConfig(); + assertThat(config.getMinLicenseSupportedForAction(RestRequest.Method.GET), is(License.OperationMode.PLATINUM)); + } + @Override protected NamedXContentRegistry xContentRegistry() { List namedXContent = new ArrayList<>(); diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authc/DefaultAuthenticationFailureHandlerTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authc/DefaultAuthenticationFailureHandlerTests.java index 46cb1b8e66930..b8b3087fd72b8 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authc/DefaultAuthenticationFailureHandlerTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authc/DefaultAuthenticationFailureHandlerTests.java @@ -33,7 +33,7 @@ public class DefaultAuthenticationFailureHandlerTests extends ESTestCase { public void testAuthenticationRequired() { final boolean testDefault = randomBoolean(); - final String basicAuthScheme = "Basic realm=\"" + XPackField.SECURITY + "\" charset=\"UTF-8\""; + final String basicAuthScheme = "Basic realm=\"" + XPackField.SECURITY + "\", charset=\"UTF-8\""; final String bearerAuthScheme = "Bearer realm=\"" + XPackField.SECURITY + "\""; final DefaultAuthenticationFailureHandler failureHandler; if (testDefault) { @@ -69,7 +69,7 @@ public void testMissingToken() { } public void testExceptionProcessingRequest() { - final String basicAuthScheme = "Basic realm=\"" + XPackField.SECURITY + "\" charset=\"UTF-8\""; + final String basicAuthScheme = "Basic realm=\"" + XPackField.SECURITY + "\", charset=\"UTF-8\""; final String bearerAuthScheme = "Bearer realm=\"" + XPackField.SECURITY + "\""; final String negotiateAuthScheme = randomFrom("Negotiate", "Negotiate Ijoijksdk"); final Map> failureResponseHeaders = new HashMap<>(); @@ -134,7 +134,7 @@ public void testExceptionProcessingRequest() { } public void testSortsWWWAuthenticateHeaderValues() { - final String basicAuthScheme = "Basic realm=\"" + XPackField.SECURITY + "\" charset=\"UTF-8\""; + final String basicAuthScheme = "Basic realm=\"" + XPackField.SECURITY + "\", charset=\"UTF-8\""; final String bearerAuthScheme = "Bearer realm=\"" + XPackField.SECURITY + "\""; final String negotiateAuthScheme = randomFrom("Negotiate", "Negotiate Ijoijksdk"); final String apiKeyAuthScheme = "ApiKey"; diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/test/SecurityAssertions.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/test/SecurityAssertions.java index 3b40b96d26e10..cb989a970332d 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/test/SecurityAssertions.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/test/SecurityAssertions.java @@ -22,6 +22,6 @@ public static void assertContainsWWWAuthenticateHeader(ElasticsearchSecurityExce assertThat(e.status(), is(RestStatus.UNAUTHORIZED)); assertThat(e.getHeaderKeys(), hasSize(1)); assertThat(e.getHeader("WWW-Authenticate"), notNullValue()); - assertThat(e.getHeader("WWW-Authenticate"), contains("Basic realm=\"" + XPackField.SECURITY + "\" charset=\"UTF-8\"")); + assertThat(e.getHeader("WWW-Authenticate"), contains("Basic realm=\"" + XPackField.SECURITY + "\", charset=\"UTF-8\"")); } } diff --git a/x-pack/plugin/core/template-resources/src/main/resources/logs@mappings-logsdb.json b/x-pack/plugin/core/template-resources/src/main/resources/logs@mappings-logsdb.json deleted file mode 100644 index 167efbd3ffaf5..0000000000000 --- a/x-pack/plugin/core/template-resources/src/main/resources/logs@mappings-logsdb.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "template": { - "mappings": { - "date_detection": false, - "properties": { - "@timestamp": { - "type": "date" - }, - "host.name": { - "type": "keyword" - }, - "data_stream.type": { - "type": "constant_keyword", - "value": "logs" - }, - "data_stream.dataset": { - "type": "constant_keyword" - }, - "data_stream.namespace": { - "type": "constant_keyword" - } - } - } - }, - "_meta": { - "description": "default mappings for the logs index template installed by x-pack", - "managed": true - }, - "version": ${xpack.stack.template.version}, - "deprecated": ${xpack.stack.template.deprecated} -} diff --git a/x-pack/plugin/core/template-resources/src/main/resources/logs@settings.json b/x-pack/plugin/core/template-resources/src/main/resources/logs@settings.json index 240abf9934db5..e9a9f2611ad7b 100644 --- a/x-pack/plugin/core/template-resources/src/main/resources/logs@settings.json +++ b/x-pack/plugin/core/template-resources/src/main/resources/logs@settings.json @@ -5,7 +5,7 @@ "lifecycle": { "name": "logs" }, - "mode": "${xpack.stack.template.logs.index.mode}", + "mode": "${xpack.stack.template.logsdb.index.mode}", "codec": "best_compression", "mapping": { "ignore_malformed": true, diff --git a/x-pack/plugin/core/template-resources/src/main/resources/metrics@settings.json b/x-pack/plugin/core/template-resources/src/main/resources/metrics@settings.json index 4f3fac1aed5ae..9960bd2e7fdac 100644 --- a/x-pack/plugin/core/template-resources/src/main/resources/metrics@settings.json +++ b/x-pack/plugin/core/template-resources/src/main/resources/metrics@settings.json @@ -10,9 +10,6 @@ "total_fields": { "ignore_dynamic_beyond_limit": true } - }, - "query": { - "default_field": ["message"] } } } diff --git a/x-pack/plugin/core/template-resources/src/main/resources/metrics@tsdb-settings.json b/x-pack/plugin/core/template-resources/src/main/resources/metrics@tsdb-settings.json index b0db168e8189d..cb0e2cbffb50b 100644 --- a/x-pack/plugin/core/template-resources/src/main/resources/metrics@tsdb-settings.json +++ b/x-pack/plugin/core/template-resources/src/main/resources/metrics@tsdb-settings.json @@ -9,9 +9,6 @@ "total_fields": { "ignore_dynamic_beyond_limit": true } - }, - "query": { - "default_field": ["message"] } } } diff --git a/x-pack/plugin/core/template-resources/src/main/resources/monitoring-beats-mb.json b/x-pack/plugin/core/template-resources/src/main/resources/monitoring-beats-mb.json index fab8ca451358f..7457dce805eca 100644 --- a/x-pack/plugin/core/template-resources/src/main/resources/monitoring-beats-mb.json +++ b/x-pack/plugin/core/template-resources/src/main/resources/monitoring-beats-mb.json @@ -1,5 +1,7 @@ { - "index_patterns": [".monitoring-beats-${xpack.stack.monitoring.template.version}-*"], + "index_patterns": [ + ".monitoring-beats-${xpack.stack.monitoring.template.version}-*" + ], "version": ${xpack.stack.monitoring.template.release.version}, "template": { "mappings": { @@ -198,6 +200,9 @@ "ratelimit": { "type": "long" }, + "timeout": { + "type": "long" + }, "toolarge": { "type": "long" }, @@ -212,16 +217,6 @@ } } }, - "request": { - "properties": { - "count": { - "type": "long" - } - } - }, - "unset": { - "type": "long" - }, "valid": { "properties": { "accepted": { @@ -239,151 +234,436 @@ } } } + }, + "unset": { + "type": "long" } } }, - "decoder": { + "agentcfg": { "properties": { - "deflate": { - "properties": { - "content-length": { - "type": "long" - }, - "count": { - "type": "long" - } - } - }, - "gzip": { - "properties": { - "content-length": { - "type": "long" - }, - "count": { - "type": "long" - } - } - }, - "missing-content-length": { + "elasticsearch": { "properties": { - "count": { - "type": "long" - } - } - }, - "reader": { - "properties": { - "count": { - "type": "long" - }, - "size": { - "type": "long" - } - } - }, - "uncompressed": { - "properties": { - "content-length": { - "type": "long" + "cache": { + "properties": { + "entries": { + "properties": { + "count": { + "type": "long" + } + } + }, + "refresh": { + "properties": { + "failures": { + "type": "long" + }, + "successes": { + "type": "long" + } + } + } + } }, - "count": { - "type": "long" + "fetch": { + "properties": { + "es": { + "type": "long" + }, + "fallback": { + "type": "long" + }, + "invalid": { + "type": "long" + }, + "unavailable": { + "type": "long" + } + } } } } } }, - "processor": { + "jaeger": { "properties": { - "error": { + "grpc": { "properties": { - "decoding": { + "collect": { "properties": { - "count": { - "type": "long" + "request": { + "properties": { + "count": { + "type": "long" + } + } }, - "errors": { - "type": "long" + "response": { + "properties": { + "count": { + "type": "long" + }, + "errors": { + "properties": { + "count": { + "type": "long" + }, + "ratelimit": { + "type": "long" + }, + "timeout": { + "type": "long" + }, + "unauthorized": { + "type": "long" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "long" + } + } + } + } } } }, - "frames": { - "type": "long" - }, - "spans": { - "type": "long" - }, - "stacktraces": { - "type": "long" - }, - "transformations": { - "type": "long" - }, - "validation": { + "sampling": { "properties": { - "count": { - "type": "long" + "event": { + "properties": { + "received": { + "properties": { + "count": { + "type": "long" + } + } + } + } }, - "errors": { - "type": "long" + "request": { + "properties": { + "count": { + "type": "long" + } + } + }, + "response": { + "properties": { + "count": { + "type": "long" + }, + "errors": { + "properties": { + "count": { + "type": "long" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "long" + } + } + } + } } } } } - }, - "metric": { + } + } + }, + "otlp": { + "properties": { + "grpc": { "properties": { - "decoding": { + "logs": { "properties": { - "count": { - "type": "long" + "request": { + "properties": { + "count": { + "type": "long" + } + } }, - "errors": { - "type": "long" + "response": { + "properties": { + "count": { + "type": "long" + }, + "errors": { + "properties": { + "count": { + "type": "long" + }, + "ratelimit": { + "type": "long" + }, + "timeout": { + "type": "long" + }, + "unauthorized": { + "type": "long" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "long" + } + } + } + } } } }, - "transformations": { - "type": "long" + "metrics": { + "properties": { + "consumer": { + "properties": { + "unsupported_dropped": { + "type": "long" + } + } + }, + "request": { + "properties": { + "count": { + "type": "long" + } + } + }, + "response": { + "properties": { + "count": { + "type": "long" + }, + "errors": { + "properties": { + "count": { + "type": "long" + }, + "ratelimit": { + "type": "long" + }, + "timeout": { + "type": "long" + }, + "unauthorized": { + "type": "long" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "long" + } + } + } + } + } + } }, - "validation": { + "traces": { "properties": { - "count": { - "type": "long" + "request": { + "properties": { + "count": { + "type": "long" + } + } }, - "errors": { - "type": "long" + "response": { + "properties": { + "count": { + "type": "long" + }, + "errors": { + "properties": { + "count": { + "type": "long" + }, + "ratelimit": { + "type": "long" + }, + "timeout": { + "type": "long" + }, + "unauthorized": { + "type": "long" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "long" + } + } + } + } } } } } }, - "sourcemap": { + "http": { "properties": { - "counter": { - "type": "long" + "logs": { + "properties": { + "request": { + "properties": { + "count": { + "type": "long" + } + } + }, + "response": { + "properties": { + "count": { + "type": "long" + }, + "errors": { + "properties": { + "count": { + "type": "long" + }, + "ratelimit": { + "type": "long" + }, + "timeout": { + "type": "long" + }, + "unauthorized": { + "type": "long" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "long" + } + } + } + } + } + } }, - "decoding": { + "metrics": { "properties": { - "count": { - "type": "long" + "consumer": { + "properties": { + "unsupported_dropped": { + "type": "long" + } + } }, - "errors": { - "type": "long" + "request": { + "properties": { + "count": { + "type": "long" + } + } + }, + "response": { + "properties": { + "count": { + "type": "long" + }, + "errors": { + "properties": { + "count": { + "type": "long" + }, + "ratelimit": { + "type": "long" + }, + "timeout": { + "type": "long" + }, + "unauthorized": { + "type": "long" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "long" + } + } + } + } } } }, - "validation": { + "traces": { "properties": { - "count": { - "type": "long" + "request": { + "properties": { + "count": { + "type": "long" + } + } }, - "errors": { - "type": "long" + "response": { + "properties": { + "count": { + "type": "long" + }, + "errors": { + "properties": { + "count": { + "type": "long" + }, + "ratelimit": { + "type": "long" + }, + "timeout": { + "type": "long" + }, + "unauthorized": { + "type": "long" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "long" + } + } + } + } } } } } + } + } + }, + "processor": { + "properties": { + "error": { + "properties": { + "transformations": { + "type": "long" + } + } + }, + "metric": { + "properties": { + "transformations": { + "type": "long" + } + } }, "span": { "properties": { @@ -392,60 +672,127 @@ } } }, - "transaction": { + "stream": { "properties": { - "decoding": { + "accepted": { + "type": "long" + }, + "errors": { "properties": { - "count": { + "invalid": { "type": "long" }, - "errors": { + "toolarge": { "type": "long" } } - }, - "frames": { - "type": "long" - }, - "spans": { + } + } + }, + "transaction": { + "properties": { + "transformations": { "type": "long" - }, - "stacktraces": { + } + } + } + } + }, + "root": { + "properties": { + "request": { + "properties": { + "count": { "type": "long" - }, - "transactions": { + } + } + }, + "response": { + "properties": { + "count": { "type": "long" }, - "transformations": { - "type": "long" + "errors": { + "properties": { + "closed": { + "type": "long" + }, + "count": { + "type": "long" + }, + "decode": { + "type": "long" + }, + "forbidden": { + "type": "long" + }, + "internal": { + "type": "long" + }, + "invalidquery": { + "type": "long" + }, + "method": { + "type": "long" + }, + "notfound": { + "type": "long" + }, + "queue": { + "type": "long" + }, + "ratelimit": { + "type": "long" + }, + "timeout": { + "type": "long" + }, + "toolarge": { + "type": "long" + }, + "unauthorized": { + "type": "long" + }, + "unavailable": { + "type": "long" + }, + "validate": { + "type": "long" + } + } }, - "validation": { + "valid": { "properties": { + "accepted": { + "type": "long" + }, "count": { "type": "long" }, - "errors": { + "notmodified": { + "type": "long" + }, + "ok": { "type": "long" } } } } + }, + "unset": { + "type": "long" + } + } + }, + "sampling": { + "properties": { + "transactions_dropped": { + "type": "long" } } }, "server": { "properties": { - "concurrent": { - "properties": { - "wait": { - "properties": { - "ms": { - "type": "long" - } - } - } - } - }, "request": { "properties": { "count": { @@ -478,21 +825,33 @@ "internal": { "type": "long" }, + "invalidquery": { + "type": "long" + }, "method": { "type": "long" }, + "notfound": { + "type": "long" + }, "queue": { "type": "long" }, "ratelimit": { "type": "long" }, + "timeout": { + "type": "long" + }, "toolarge": { "type": "long" }, "unauthorized": { "type": "long" }, + "unavailable": { + "type": "long" + }, "validate": { "type": "long" } @@ -506,12 +865,18 @@ "count": { "type": "long" }, + "notmodified": { + "type": "long" + }, "ok": { "type": "long" } } } } + }, + "unset": { + "type": "long" } } } @@ -918,6 +1283,37 @@ "type": "long" } } + }, + "output": { + "properties": { + "elasticsearch": { + "properties": { + "bulk_requests": { + "properties": { + "available": { + "type": "long" + }, + "completed": { + "type": "long" + } + } + }, + "indexers": { + "properties": { + "active": { + "type": "long" + }, + "created": { + "type": "long" + }, + "destroyed": { + "type": "long" + } + } + } + } + } + } } } }, @@ -1135,6 +1531,10 @@ "type": "alias", "path": "beat.stats.apm_server.acm.response.errors.ratelimit" }, + "timeout": { + "type": "alias", + "path": "beat.stats.apm_server.acm.response.errors.timeout" + }, "toolarge": { "type": "alias", "path": "beat.stats.apm_server.acm.response.errors.toolarge" @@ -1153,18 +1553,6 @@ } } }, - "request": { - "properties": { - "count": { - "type": "alias", - "path": "beat.stats.apm_server.acm.response.request.count" - } - } - }, - "unset": { - "type": "alias", - "path": "beat.stats.apm_server.acm.response.unset" - }, "valid": { "properties": { "accepted": { @@ -1179,9 +1567,485 @@ "type": "alias", "path": "beat.stats.apm_server.acm.response.valid.notmodified" }, - "ok": { - "type": "alias", - "path": "beat.stats.apm_server.acm.response.valid.ok" + "ok": { + "type": "alias", + "path": "beat.stats.apm_server.acm.response.valid.ok" + } + } + } + } + }, + "unset": { + "type": "alias", + "path": "beat.stats.apm_server.acm.unset" + } + } + }, + "agentcfg": { + "properties": { + "elasticsearch": { + "properties": { + "cache": { + "properties": { + "entries": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.agentcfg.elasticsearch.cache.entries.count" + } + } + }, + "refresh": { + "properties": { + "failures": { + "type": "alias", + "path": "beat.stats.apm_server.agentcfg.elasticsearch.cache.refresh.failures" + }, + "successes": { + "type": "alias", + "path": "beat.stats.apm_server.agentcfg.elasticsearch.cache.refresh.successes" + } + } + } + } + }, + "fetch": { + "properties": { + "es": { + "type": "alias", + "path": "beat.stats.apm_server.agentcfg.elasticsearch.fetch.es" + }, + "fallback": { + "type": "alias", + "path": "beat.stats.apm_server.agentcfg.elasticsearch.fetch.fallback" + }, + "invalid": { + "type": "alias", + "path": "beat.stats.apm_server.agentcfg.elasticsearch.fetch.invalid" + }, + "unavailable": { + "type": "alias", + "path": "beat.stats.apm_server.agentcfg.elasticsearch.fetch.unavailable" + } + } + } + } + } + } + }, + "jaeger": { + "properties": { + "grpc": { + "properties": { + "collect": { + "properties": { + "request": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.jaeger.grpc.collect.request.count" + } + } + }, + "response": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.jaeger.grpc.collect.response.count" + }, + "errors": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.jaeger.grpc.collect.response.errors.count" + }, + "ratelimit": { + "type": "alias", + "path": "beat.stats.apm_server.jaeger.grpc.collect.response.errors.ratelimit" + }, + "timeout": { + "type": "alias", + "path": "beat.stats.apm_server.jaeger.grpc.collect.response.errors.timeout" + }, + "unauthorized": { + "type": "alias", + "path": "beat.stats.apm_server.jaeger.grpc.collect.response.errors.unauthorized" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.jaeger.grpc.collect.response.valid.count" + } + } + } + } + } + } + }, + "sampling": { + "properties": { + "event": { + "properties": { + "received": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.jaeger.grpc.sampling.event.received.count" + } + } + } + } + }, + "request": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.jaeger.grpc.sampling.request.count" + } + } + }, + "response": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.jaeger.grpc.sampling.response.count" + }, + "errors": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.jaeger.grpc.sampling.response.errors.count" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.jaeger.grpc.sampling.response.valid.count" + } + } + } + } + } + } + } + } + } + } + }, + "otlp": { + "properties": { + "grpc": { + "properties": { + "logs": { + "properties": { + "request": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.logs.request.count" + } + } + }, + "response": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.logs.response.count" + }, + "errors": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.logs.response.errors.count" + }, + "ratelimit": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.logs.response.errors.ratelimit" + }, + "timeout": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.logs.response.errors.timeout" + }, + "unauthorized": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.logs.response.errors.unauthorized" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.logs.response.valid.count" + } + } + } + } + } + } + }, + "metrics": { + "properties": { + "consumer": { + "properties": { + "unsupported_dropped": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.metrics.consumer.unsupported_dropped" + } + } + }, + "request": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.metrics.request.count" + } + } + }, + "response": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.metrics.response.count" + }, + "errors": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.metrics.response.errors.count" + }, + "ratelimit": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.metrics.response.errors.ratelimit" + }, + "timeout": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.metrics.response.errors.timeout" + }, + "unauthorized": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.metrics.response.errors.unauthorized" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.metrics.response.valid.count" + } + } + } + } + } + } + }, + "traces": { + "properties": { + "request": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.traces.request.count" + } + } + }, + "response": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.traces.response.count" + }, + "errors": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.traces.response.errors.count" + }, + "ratelimit": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.traces.response.errors.ratelimit" + }, + "timeout": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.traces.response.errors.timeout" + }, + "unauthorized": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.traces.response.errors.unauthorized" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.grpc.traces.response.valid.count" + } + } + } + } + } + } + } + } + }, + "http": { + "properties": { + "logs": { + "properties": { + "request": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.logs.request.count" + } + } + }, + "response": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.logs.response.count" + }, + "errors": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.logs.response.errors.count" + }, + "ratelimit": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.logs.response.errors.ratelimit" + }, + "timeout": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.logs.response.errors.timeout" + }, + "unauthorized": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.logs.response.errors.unauthorized" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.logs.response.valid.count" + } + } + } + } + } + } + }, + "metrics": { + "properties": { + "consumer": { + "properties": { + "unsupported_dropped": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.metrics.consumer.unsupported_dropped" + } + } + }, + "request": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.metrics.request.count" + } + } + }, + "response": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.metrics.response.count" + }, + "errors": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.metrics.response.errors.count" + }, + "ratelimit": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.metrics.response.errors.ratelimit" + }, + "timeout": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.metrics.response.errors.timeout" + }, + "unauthorized": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.metrics.response.errors.unauthorized" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.metrics.response.valid.count" + } + } + } + } + } + } + }, + "traces": { + "properties": { + "request": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.traces.request.count" + } + } + }, + "response": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.traces.response.count" + }, + "errors": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.traces.response.errors.count" + }, + "ratelimit": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.traces.response.errors.ratelimit" + }, + "timeout": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.traces.response.errors.timeout" + }, + "unauthorized": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.traces.response.errors.unauthorized" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "alias", + "path": "beat.stats.apm_server.otlp.http.traces.response.valid.count" + } + } + } + } } } } @@ -1189,248 +2053,180 @@ } } }, - "decoder": { + "processor": { "properties": { - "deflate": { + "error": { "properties": { - "content-length": { - "type": "alias", - "path": "beat.stats.apm_server.decoder.deflate.content-length" - }, - "count": { + "transformations": { "type": "alias", - "path": "beat.stats.apm_server.decoder.deflate.count" + "path": "beat.stats.apm_server.processor.error.transformations" } } }, - "gzip": { + "metric": { "properties": { - "content-length": { - "type": "alias", - "path": "beat.stats.apm_server.decoder.gzip.content-length" - }, - "count": { + "transformations": { "type": "alias", - "path": "beat.stats.apm_server.decoder.gzip.count" + "path": "beat.stats.apm_server.processor.metric.transformations" } } }, - "missing-content-length": { + "span": { "properties": { - "count": { + "transformations": { "type": "alias", - "path": "beat.stats.apm_server.decoder.missing-content-length.count" + "path": "beat.stats.apm_server.processor.span.transformations" } } }, - "reader": { + "stream": { "properties": { - "count": { + "accepted": { "type": "alias", - "path": "beat.stats.apm_server.decoder.reader.count" + "path": "beat.stats.apm_server.processor.stream.accepted" }, - "size": { - "type": "alias", - "path": "beat.stats.apm_server.decoder.reader.size" + "errors": { + "properties": { + "invalid": { + "type": "alias", + "path": "beat.stats.apm_server.processor.stream.errors.invalid" + }, + "toolarge": { + "type": "alias", + "path": "beat.stats.apm_server.processor.stream.errors.toolarge" + } + } } } }, - "uncompressed": { + "transaction": { "properties": { - "content-length": { - "type": "alias", - "path": "beat.stats.apm_server.decoder.uncompressed.content-length" - }, - "count": { + "transformations": { "type": "alias", - "path": "beat.stats.apm_server.decoder.uncompressed.count" + "path": "beat.stats.apm_server.processor.transaction.transformations" } } } } }, - "processor": { + "root": { "properties": { - "error": { + "request": { "properties": { - "decoding": { - "properties": { - "count": { - "type": "alias", - "path": "beat.stats.apm_server.processor.error.decoding.count" - }, - "errors": { - "type": "alias", - "path": "beat.stats.apm_server.processor.error.decoding.errors" - } - } - }, - "frames": { - "type": "alias", - "path": "beat.stats.apm_server.processor.error.frames" - }, - "spans": { - "type": "alias", - "path": "beat.stats.apm_server.processor.error.spans" - }, - "stacktraces": { + "count": { "type": "alias", - "path": "beat.stats.apm_server.processor.error.stacktraces" - }, - "transformations": { + "path": "beat.stats.apm_server.root.request.count" + } + } + }, + "response": { + "properties": { + "count": { "type": "alias", - "path": "beat.stats.apm_server.processor.error.transformations" + "path": "beat.stats.apm_server.root.response.count" }, - "validation": { + "errors": { "properties": { + "closed": { + "type": "alias", + "path": "beat.stats.apm_server.root.response.errors.closed" + }, "count": { "type": "alias", - "path": "beat.stats.apm_server.processor.error.validation.count" + "path": "beat.stats.apm_server.root.response.errors.count" }, - "errors": { + "decode": { "type": "alias", - "path": "beat.stats.apm_server.processor.error.validation.errors" - } - } - } - } - }, - "metric": { - "properties": { - "decoding": { - "properties": { - "count": { + "path": "beat.stats.apm_server.root.response.errors.decode" + }, + "forbidden": { "type": "alias", - "path": "beat.stats.apm_server.processor.metric.decoding.count" + "path": "beat.stats.apm_server.root.response.errors.forbidden" }, - "errors": { + "internal": { "type": "alias", - "path": "beat.stats.apm_server.processor.metric.decoding.errors" - } - } - }, - "transformations": { - "type": "alias", - "path": "beat.stats.apm_server.processor.metric.transformations" - }, - "validation": { - "properties": { - "count": { + "path": "beat.stats.apm_server.root.response.errors.internal" + }, + "invalidquery": { "type": "alias", - "path": "beat.stats.apm_server.processor.metric.validation.count" + "path": "beat.stats.apm_server.root.response.errors.invalidquery" }, - "errors": { + "method": { "type": "alias", - "path": "beat.stats.apm_server.processor.metric.validation.errors" - } - } - } - } - }, - "sourcemap": { - "properties": { - "counter": { - "type": "alias", - "path": "beat.stats.apm_server.processor.sourcemap.counter" - }, - "decoding": { - "properties": { - "count": { + "path": "beat.stats.apm_server.root.response.errors.method" + }, + "notfound": { "type": "alias", - "path": "beat.stats.apm_server.processor.sourcemap.decoding.count" + "path": "beat.stats.apm_server.root.response.errors.notfound" }, - "errors": { + "queue": { "type": "alias", - "path": "beat.stats.apm_server.processor.sourcemap.decoding.errors" - } - } - }, - "validation": { - "properties": { - "count": { + "path": "beat.stats.apm_server.root.response.errors.queue" + }, + "ratelimit": { "type": "alias", - "path": "beat.stats.apm_server.processor.sourcemap.validation.count" + "path": "beat.stats.apm_server.root.response.errors.ratelimit" }, - "errors": { + "timeout": { "type": "alias", - "path": "beat.stats.apm_server.processor.sourcemap.validation.errors" - } - } - } - } - }, - "span": { - "properties": { - "transformations": { - "type": "alias", - "path": "beat.stats.apm_server.processor.span.transformations" - } - } - }, - "transaction": { - "properties": { - "decoding": { - "properties": { - "count": { + "path": "beat.stats.apm_server.root.response.errors.timeout" + }, + "toolarge": { + "type": "alias", + "path": "beat.stats.apm_server.root.response.errors.toolarge" + }, + "unauthorized": { "type": "alias", - "path": "beat.stats.apm_server.processor.transaction.decoding.count" + "path": "beat.stats.apm_server.root.response.errors.unauthorized" }, - "errors": { + "unavailable": { + "type": "alias", + "path": "beat.stats.apm_server.root.response.errors.unavailable" + }, + "validate": { "type": "alias", - "path": "beat.stats.apm_server.processor.transaction.decoding.errors" + "path": "beat.stats.apm_server.root.response.errors.validate" } } }, - "frames": { - "type": "alias", - "path": "beat.stats.apm_server.processor.transaction.frames" - }, - "spans": { - "type": "alias", - "path": "beat.stats.apm_server.processor.transaction.spans" - }, - "stacktraces": { - "type": "alias", - "path": "beat.stats.apm_server.processor.transaction.stacktraces" - }, - "transactions": { - "type": "alias", - "path": "beat.stats.apm_server.processor.transaction.transactions" - }, - "transformations": { - "type": "alias", - "path": "beat.stats.apm_server.processor.transaction.transformations" - }, - "validation": { + "valid": { "properties": { + "accepted": { + "type": "alias", + "path": "beat.stats.apm_server.root.response.valid.accepted" + }, "count": { "type": "alias", - "path": "beat.stats.apm_server.processor.transaction.validation.count" + "path": "beat.stats.apm_server.root.response.valid.count" }, - "errors": { + "notmodified": { + "type": "alias", + "path": "beat.stats.apm_server.root.response.valid.notmodified" + }, + "ok": { "type": "alias", - "path": "beat.stats.apm_server.processor.transaction.validation.errors" + "path": "beat.stats.apm_server.root.response.valid.ok" } } } } + }, + "unset": { + "type": "alias", + "path": "beat.stats.apm_server.root.unset" + } + } + }, + "sampling": { + "properties": { + "transactions_dropped": { + "type": "alias", + "path": "beat.stats.apm_server.sampling.transactions_dropped" } } }, "server": { "properties": { - "concurrent": { - "properties": { - "wait": { - "properties": { - "ms": { - "type": "alias", - "path": "beat.stats.apm_server.server.concurrent.wait.ms" - } - } - } - } - }, "request": { "properties": { "count": { @@ -1471,10 +2267,18 @@ "type": "alias", "path": "beat.stats.apm_server.server.response.errors.internal" }, + "invalidquery": { + "type": "alias", + "path": "beat.stats.apm_server.server.response.errors.invalidquery" + }, "method": { "type": "alias", "path": "beat.stats.apm_server.server.response.errors.method" }, + "notfound": { + "type": "alias", + "path": "beat.stats.apm_server.server.response.errors.notfound" + }, "queue": { "type": "alias", "path": "beat.stats.apm_server.server.response.errors.queue" @@ -1483,6 +2287,10 @@ "type": "alias", "path": "beat.stats.apm_server.server.response.errors.ratelimit" }, + "timeout": { + "type": "alias", + "path": "beat.stats.apm_server.server.response.errors.timeout" + }, "toolarge": { "type": "alias", "path": "beat.stats.apm_server.server.response.errors.toolarge" @@ -1491,6 +2299,10 @@ "type": "alias", "path": "beat.stats.apm_server.server.response.errors.unauthorized" }, + "unavailable": { + "type": "alias", + "path": "beat.stats.apm_server.server.response.errors.unavailable" + }, "validate": { "type": "alias", "path": "beat.stats.apm_server.server.response.errors.validate" @@ -1507,6 +2319,10 @@ "type": "alias", "path": "beat.stats.apm_server.server.response.valid.count" }, + "notmodified": { + "type": "alias", + "path": "beat.stats.apm_server.server.response.valid.notmodified" + }, "ok": { "type": "alias", "path": "beat.stats.apm_server.server.response.valid.ok" @@ -1514,49 +2330,10 @@ } } } - } - } - }, - "sampling": { - "properties": { - "transactions_dropped": { - "type": "long" }, - "tail": { - "properties": { - "dynamic_service_groups": { - "type": "long" - }, - "storage": { - "properties": { - "lsm_size": { - "type": "long" - }, - "value_log_size": { - "type": "long" - } - } - }, - "events": { - "properties": { - "processed": { - "type": "long" - }, - "dropped": { - "type": "long" - }, - "stored": { - "type": "long" - }, - "sampled": { - "type": "long" - }, - "head_unsampled": { - "type": "long" - } - } - } - } + "unset": { + "type": "alias", + "path": "beat.stats.apm_server.server.unset" } } } @@ -1985,6 +2762,42 @@ } } } + }, + "output": { + "properties": { + "elasticsearch": { + "properties": { + "bulk_requests": { + "properties": { + "available": { + "type": "alias", + "path": "beat.stats.output.elasticsearch.bulk_requests.available" + }, + "completed": { + "type": "alias", + "path": "beat.stats.output.elasticsearch.bulk_requests.completed" + } + } + }, + "indexers": { + "properties": { + "active": { + "type": "alias", + "path": "beat.stats.output.elasticsearch.indexers.active" + }, + "created": { + "type": "alias", + "path": "beat.stats.output.elasticsearch.indexers.created" + }, + "destroyed": { + "type": "alias", + "path": "beat.stats.output.elasticsearch.indexers.destroyed" + } + } + } + } + } + } } } }, diff --git a/x-pack/plugin/core/template-resources/src/main/resources/monitoring-beats.json b/x-pack/plugin/core/template-resources/src/main/resources/monitoring-beats.json index 6dee05564cc10..d699317c29da3 100644 --- a/x-pack/plugin/core/template-resources/src/main/resources/monitoring-beats.json +++ b/x-pack/plugin/core/template-resources/src/main/resources/monitoring-beats.json @@ -346,17 +346,11 @@ "response": { "properties": { "count": { - "type": "long" + "type": "long" }, "errors": { "properties": { - "validate": { - "type": "long" - }, - "internal": { - "type": "long" - }, - "queue": { + "closed": { "type": "long" }, "count": { @@ -365,13 +359,13 @@ "decode": { "type": "long" }, - "toolarge": { + "forbidden": { "type": "long" }, - "unavailable": { + "internal": { "type": "long" }, - "forbidden": { + "invalidquery": { "type": "long" }, "method": { @@ -380,125 +374,454 @@ "notfound": { "type": "long" }, - "invalidquery": { + "queue": { "type": "long" }, "ratelimit": { "type": "long" }, - "closed": { + "timeout": { + "type": "long" + }, + "toolarge": { "type": "long" }, "unauthorized": { "type": "long" + }, + "unavailable": { + "type": "long" + }, + "validate": { + "type": "long" } } }, "valid": { "properties": { - "notmodified": { + "accepted": { "type": "long" }, "count": { "type": "long" }, - "ok": { + "notmodified": { "type": "long" }, - "accepted": { - "type": "long" - } - } - }, - "unset": { - "type": "long" - }, - "request": { - "properties": { - "count": { + "ok": { "type": "long" } } } } + }, + "unset": { + "type": "long" } } }, - "server": { + "agentcfg": { "properties": { - "request": { + "elasticsearch": { "properties": { - "count": { - "type": "long" - } - } - }, - "concurrent": { - "properties": { - "wait": { + "cache": { "properties": { - "ms": { - "type": "long" + "entries": { + "properties": { + "count": { + "type": "long" + } + } + }, + "refresh": { + "properties": { + "failures": { + "type": "long" + }, + "successes": { + "type": "long" + } + } } } - } - } - }, - "response": { - "properties": { - "count": { - "type": "long" }, - "errors": { + "fetch": { "properties": { - "count": { + "es": { "type": "long" }, - "toolarge": { + "fallback": { "type": "long" }, - "validate": { + "invalid": { "type": "long" }, - "ratelimit": { + "unavailable": { "type": "long" + } + } + } + } + } + } + }, + "jaeger": { + "properties": { + "grpc": { + "properties": { + "collect": { + "properties": { + "request": { + "properties": { + "count": { + "type": "long" + } + } }, - "queue": { - "type": "long" + "response": { + "properties": { + "count": { + "type": "long" + }, + "errors": { + "properties": { + "count": { + "type": "long" + }, + "ratelimit": { + "type": "long" + }, + "timeout": { + "type": "long" + }, + "unauthorized": { + "type": "long" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "long" + } + } + } + } + } + } + }, + "sampling": { + "properties": { + "event": { + "properties": { + "received": { + "properties": { + "count": { + "type": "long" + } + } + } + } }, - "closed": { - "type": "long" + "request": { + "properties": { + "count": { + "type": "long" + } + } }, - "forbidden": { - "type": "long" + "response": { + "properties": { + "count": { + "type": "long" + }, + "errors": { + "properties": { + "count": { + "type": "long" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "long" + } + } + } + } + } + } + } + } + } + } + }, + "otlp": { + "properties": { + "grpc": { + "properties": { + "logs": { + "properties": { + "request": { + "properties": { + "count": { + "type": "long" + } + } }, - "concurrency": { - "type": "long" + "response": { + "properties": { + "count": { + "type": "long" + }, + "errors": { + "properties": { + "count": { + "type": "long" + }, + "ratelimit": { + "type": "long" + }, + "timeout": { + "type": "long" + }, + "unauthorized": { + "type": "long" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "long" + } + } + } + } + } + } + }, + "metrics": { + "properties": { + "consumer": { + "properties": { + "unsupported_dropped": { + "type": "long" + } + } }, - "unauthorized": { - "type": "long" + "request": { + "properties": { + "count": { + "type": "long" + } + } }, - "internal": { - "type": "long" + "response": { + "properties": { + "count": { + "type": "long" + }, + "errors": { + "properties": { + "count": { + "type": "long" + }, + "ratelimit": { + "type": "long" + }, + "timeout": { + "type": "long" + }, + "unauthorized": { + "type": "long" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "long" + } + } + } + } + } + } + }, + "traces": { + "properties": { + "request": { + "properties": { + "count": { + "type": "long" + } + } }, - "decode": { - "type": "long" + "response": { + "properties": { + "count": { + "type": "long" + }, + "errors": { + "properties": { + "count": { + "type": "long" + }, + "ratelimit": { + "type": "long" + }, + "timeout": { + "type": "long" + }, + "unauthorized": { + "type": "long" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "long" + } + } + } + } + } + } + } + } + }, + "http": { + "properties": { + "logs": { + "properties": { + "request": { + "properties": { + "count": { + "type": "long" + } + } }, - "method": { - "type": "long" + "response": { + "properties": { + "count": { + "type": "long" + }, + "errors": { + "properties": { + "count": { + "type": "long" + }, + "ratelimit": { + "type": "long" + }, + "timeout": { + "type": "long" + }, + "unauthorized": { + "type": "long" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "long" + } + } + } + } } } }, - "valid": { + "metrics": { "properties": { - "ok": { - "type": "long" + "consumer": { + "properties": { + "unsupported_dropped": { + "type": "long" + } + } }, - "accepted": { - "type": "long" + "request": { + "properties": { + "count": { + "type": "long" + } + } }, - "count": { - "type": "long" + "response": { + "properties": { + "count": { + "type": "long" + }, + "errors": { + "properties": { + "count": { + "type": "long" + }, + "ratelimit": { + "type": "long" + }, + "timeout": { + "type": "long" + }, + "unauthorized": { + "type": "long" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "long" + } + } + } + } + } + } + }, + "traces": { + "properties": { + "request": { + "properties": { + "count": { + "type": "long" + } + } + }, + "response": { + "properties": { + "count": { + "type": "long" + }, + "errors": { + "properties": { + "count": { + "type": "long" + }, + "ratelimit": { + "type": "long" + }, + "timeout": { + "type": "long" + }, + "unauthorized": { + "type": "long" + } + } + }, + "valid": { + "properties": { + "count": { + "type": "long" + } + } + } + } } } } @@ -506,195 +829,138 @@ } } }, - "decoder": { + "processor": { "properties": { - "deflate": { + "error": { "properties": { - "content-length": { - "type": "long" - }, - "count": { + "transformations": { "type": "long" } } }, - "gzip": { + "metric": { "properties": { - "content-length": { - "type": "long" - }, - "count": { + "transformations": { "type": "long" } } }, - "uncompressed": { + "span": { "properties": { - "content-length": { - "type": "long" - }, - "count": { + "transformations": { "type": "long" } } }, - "reader": { + "stream": { "properties": { - "size": { + "accepted": { "type": "long" }, - "count": { - "type": "long" + "errors": { + "properties": { + "invalid": { + "type": "long" + }, + "toolarge": { + "type": "long" + } + } } } }, - "missing-content-length": { + "transaction": { "properties": { - "count": { + "transformations": { "type": "long" } } } } - }, - "processor": { + "root": { "properties": { - "metric": { + "request": { "properties": { - "decoding": { - "properties": { - "errors": { - "type": "long" - }, - "count": { - "type": "long" - } - } - }, - "validation": { - "properties": { - "errors": { - "type": "long" - }, - "count": { - "type": "long" - } - } - }, - "transformations": { + "count": { "type": "long" } } }, - "sourcemap": { + "response": { "properties": { - "counter": { + "count": { "type": "long" }, - "decoding": { + "errors": { "properties": { - "errors": { + "closed": { "type": "long" }, "count": { "type": "long" - } - } - }, - "validation": { - "properties": { - "errors": { + }, + "decode": { "type": "long" }, - "count": { + "forbidden": { "type": "long" - } - } - } - } - }, - "transaction": { - "properties": { - "decoding": { - "properties": { - "errors": { + }, + "internal": { "type": "long" }, - "count": { + "invalidquery": { "type": "long" - } - } - }, - "validation": { - "properties": { - "errors": { + }, + "method": { "type": "long" }, - "count": { + "notfound": { "type": "long" - } - } - }, - "transformations": { - "type": "long" - }, - "transactions": { - "type": "long" - }, - "spans": { - "type": "long" - }, - "stacktraces": { - "type": "long" - }, - "frames": { - "type": "long" - } - } - }, - "error": { - "properties": { - "decoding": { - "properties": { - "errors": { + }, + "queue": { "type": "long" }, - "count": { + "ratelimit": { + "type": "long" + }, + "timeout": { + "type": "long" + }, + "toolarge": { + "type": "long" + }, + "unauthorized": { + "type": "long" + }, + "unavailable": { + "type": "long" + }, + "validate": { "type": "long" } } }, - "validation": { + "valid": { "properties": { - "errors": { + "accepted": { "type": "long" }, "count": { "type": "long" + }, + "notmodified": { + "type": "long" + }, + "ok": { + "type": "long" } } - }, - "transformations": { - "type": "long" - }, - "errors": { - "type": "long" - }, - "stacktraces": { - "type": "long" - }, - "frames": { - "type": "long" } } }, - "span": { - "properties": { - "transformations": { - "type": "long" - } - } + "unset": { + "type": "long" } } }, @@ -702,42 +968,95 @@ "properties": { "transactions_dropped": { "type": "long" + } + } + }, + "server": { + "properties": { + "request": { + "properties": { + "count": { + "type": "long" + } + } }, - "tail": { + "response": { "properties": { - "dynamic_service_groups": { + "count": { "type": "long" }, - "storage": { + "errors": { "properties": { - "lsm_size": { + "closed": { "type": "long" }, - "value_log_size": { + "concurrency": { + "type": "long" + }, + "count": { + "type": "long" + }, + "decode": { + "type": "long" + }, + "forbidden": { + "type": "long" + }, + "internal": { + "type": "long" + }, + "invalidquery": { + "type": "long" + }, + "method": { + "type": "long" + }, + "notfound": { + "type": "long" + }, + "queue": { + "type": "long" + }, + "ratelimit": { + "type": "long" + }, + "timeout": { + "type": "long" + }, + "toolarge": { + "type": "long" + }, + "unauthorized": { + "type": "long" + }, + "unavailable": { + "type": "long" + }, + "validate": { "type": "long" } } }, - "events": { + "valid": { "properties": { - "processed": { - "type": "long" - }, - "dropped": { + "accepted": { "type": "long" }, - "stored": { + "count": { "type": "long" }, - "sampled": { + "notmodified": { "type": "long" }, - "head_unsampled": { + "ok": { "type": "long" } } } } + }, + "unset": { + "type": "long" } } } @@ -893,6 +1212,37 @@ } } } + }, + "output": { + "properties": { + "elasticsearch": { + "properties": { + "bulk_requests": { + "properties": { + "available": { + "type": "long" + }, + "completed": { + "type": "long" + } + } + }, + "indexers": { + "properties": { + "active": { + "type": "long" + }, + "created": { + "type": "long" + }, + "destroyed": { + "type": "long" + } + } + } + } + } + } } } }, diff --git a/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/EnrichPolicyRunner.java b/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/EnrichPolicyRunner.java index 5cb9c0cf9c051..2ff4863a12b6e 100644 --- a/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/EnrichPolicyRunner.java +++ b/x-pack/plugin/enrich/src/main/java/org/elasticsearch/xpack/enrich/EnrichPolicyRunner.java @@ -25,11 +25,11 @@ import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; import org.elasticsearch.action.admin.indices.segments.IndexSegments; import org.elasticsearch.action.admin.indices.segments.IndexShardSegments; -import org.elasticsearch.action.admin.indices.segments.IndicesSegmentResponse; import org.elasticsearch.action.admin.indices.segments.IndicesSegmentsRequest; import org.elasticsearch.action.admin.indices.segments.ShardSegments; import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest; import org.elasticsearch.action.bulk.BulkItemResponse; +import org.elasticsearch.action.support.DefaultShardOperationFailedException; import org.elasticsearch.client.internal.Client; import org.elasticsearch.client.internal.FilterClient; import org.elasticsearch.client.internal.OriginSettingClient; @@ -572,48 +572,82 @@ private void refreshEnrichIndex(final String destinationIndexName, final int att protected void ensureSingleSegment(final String destinationIndexName, final int attempt) { enrichOriginClient().admin() .indices() - .segments(new IndicesSegmentsRequest(destinationIndexName), new DelegatingActionListener<>(listener) { - @Override - public void onResponse(IndicesSegmentResponse indicesSegmentResponse) { - IndexSegments indexSegments = indicesSegmentResponse.getIndices().get(destinationIndexName); - if (indexSegments == null) { + .segments(new IndicesSegmentsRequest(destinationIndexName), listener.delegateFailureAndWrap((l, indicesSegmentResponse) -> { + int failedShards = indicesSegmentResponse.getFailedShards(); + if (failedShards > 0) { + // Encountered a problem while querying the segments for the enrich index. Try and surface the problem in the log. + logger.warn( + "Policy [{}]: Encountered [{}] shard level failures while querying the segments for enrich index [{}]. " + + "Turn on DEBUG logging for details.", + policyName, + failedShards, + enrichIndexName + ); + if (logger.isDebugEnabled()) { + DefaultShardOperationFailedException[] shardFailures = indicesSegmentResponse.getShardFailures(); + int failureNumber = 1; + String logPrefix = "Policy [" + policyName + "]: Encountered shard failure ["; + String logSuffix = " of " + + shardFailures.length + + "] while querying segments for enrich index [" + + enrichIndexName + + "]. Shard ["; + for (DefaultShardOperationFailedException shardFailure : shardFailures) { + logger.debug( + logPrefix + failureNumber + logSuffix + shardFailure.index() + "][" + shardFailure.shardId() + "]", + shardFailure.getCause() + ); + failureNumber++; + } + } + } + IndexSegments indexSegments = indicesSegmentResponse.getIndices().get(destinationIndexName); + if (indexSegments == null) { + if (indicesSegmentResponse.getShardFailures().length == 0) { throw new ElasticsearchException( "Could not locate segment information for newly created index [{}]", destinationIndexName ); + } else { + DefaultShardOperationFailedException shardFailure = indicesSegmentResponse.getShardFailures()[0]; + throw new ElasticsearchException( + "Could not obtain segment information for newly created index [{}]; shard info [{}][{}]", + shardFailure.getCause(), + destinationIndexName, + shardFailure.index(), + shardFailure.shardId() + ); } - Map indexShards = indexSegments.getShards(); - assert indexShards.size() == 1 : "Expected enrich index to contain only one shard"; - ShardSegments[] shardSegments = indexShards.get(0).shards(); - assert shardSegments.length == 1 : "Expected enrich index to contain no replicas at this point"; - ShardSegments primarySegments = shardSegments[0]; - if (primarySegments.getSegments().size() > 1) { - int nextAttempt = attempt + 1; - if (nextAttempt > maxForceMergeAttempts) { - delegate.onFailure( - new ElasticsearchException( - "Force merging index [{}] attempted [{}] times but did not result in one segment.", - destinationIndexName, - attempt, - maxForceMergeAttempts - ) - ); - } else { - logger.debug( - "Policy [{}]: Force merge result contains more than one segment [{}], retrying (attempt {}/{})", - policyName, - primarySegments.getSegments().size(), - nextAttempt, - maxForceMergeAttempts - ); - forceMergeEnrichIndex(destinationIndexName, nextAttempt); - } + } + Map indexShards = indexSegments.getShards(); + assert indexShards.size() == 1 : "Expected enrich index to contain only one shard"; + ShardSegments[] shardSegments = indexShards.get(0).shards(); + assert shardSegments.length == 1 : "Expected enrich index to contain no replicas at this point"; + ShardSegments primarySegments = shardSegments[0]; + if (primarySegments.getSegments().size() > 1) { + int nextAttempt = attempt + 1; + if (nextAttempt > maxForceMergeAttempts) { + throw new ElasticsearchException( + "Force merging index [{}] attempted [{}] times but did not result in one segment.", + destinationIndexName, + attempt, + maxForceMergeAttempts + ); } else { - // Force merge down to one segment successful - setIndexReadOnly(destinationIndexName); + logger.debug( + "Policy [{}]: Force merge result contains more than one segment [{}], retrying (attempt {}/{})", + policyName, + primarySegments.getSegments().size(), + nextAttempt, + maxForceMergeAttempts + ); + forceMergeEnrichIndex(destinationIndexName, nextAttempt); } + } else { + // Force merge down to one segment successful + setIndexReadOnly(destinationIndexName); } - }); + })); } private void setIndexReadOnly(final String destinationIndexName) { diff --git a/x-pack/plugin/enrich/src/test/java/org/elasticsearch/xpack/enrich/EnrichPolicyRunnerTests.java b/x-pack/plugin/enrich/src/test/java/org/elasticsearch/xpack/enrich/EnrichPolicyRunnerTests.java index 8ce1e7f350ccb..7ba3b356d6015 100644 --- a/x-pack/plugin/enrich/src/test/java/org/elasticsearch/xpack/enrich/EnrichPolicyRunnerTests.java +++ b/x-pack/plugin/enrich/src/test/java/org/elasticsearch/xpack/enrich/EnrichPolicyRunnerTests.java @@ -33,6 +33,7 @@ import org.elasticsearch.action.admin.indices.settings.put.TransportUpdateSettingsAction; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.support.DefaultShardOperationFailedException; import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.action.support.WriteRequest; import org.elasticsearch.client.internal.Client; @@ -2048,6 +2049,254 @@ protected void ensureSingleSegment(String destinationIndexName, int attempt) { ensureEnrichIndexIsReadOnly(createdEnrichIndex); } + public void testRunnerWithEmptySegmentsResponse() throws Exception { + final String sourceIndex = "source-index"; + DocWriteResponse indexRequest = client().index(new IndexRequest().index(sourceIndex).id("id").source(""" + { + "field1": "value1", + "field2": 2, + "field3": "ignored", + "field4": "ignored", + "field5": "value5" + }""", XContentType.JSON).setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)).actionGet(); + assertEquals(RestStatus.CREATED, indexRequest.status()); + + assertResponse( + client().search(new SearchRequest(sourceIndex).source(SearchSourceBuilder.searchSource().query(QueryBuilders.matchAllQuery()))), + sourceSearchResponse -> { + assertThat(sourceSearchResponse.getHits().getTotalHits().value, equalTo(1L)); + Map sourceDocMap = sourceSearchResponse.getHits().getAt(0).getSourceAsMap(); + assertNotNull(sourceDocMap); + assertThat(sourceDocMap.get("field1"), is(equalTo("value1"))); + assertThat(sourceDocMap.get("field2"), is(equalTo(2))); + assertThat(sourceDocMap.get("field3"), is(equalTo("ignored"))); + assertThat(sourceDocMap.get("field4"), is(equalTo("ignored"))); + assertThat(sourceDocMap.get("field5"), is(equalTo("value5"))); + } + ); + List enrichFields = List.of("field2", "field5"); + EnrichPolicy policy = new EnrichPolicy(EnrichPolicy.MATCH_TYPE, null, List.of(sourceIndex), "field1", enrichFields); + String policyName = "test1"; + + final long createTime = randomNonNegativeLong(); + String createdEnrichIndex = ".enrich-test1-" + createTime; + final AtomicReference exception = new AtomicReference<>(); + final CountDownLatch latch = new CountDownLatch(1); + ActionListener listener = createTestListener(latch, exception::set); + ClusterService clusterService = getInstanceFromNode(ClusterService.class); + IndexNameExpressionResolver resolver = getInstanceFromNode(IndexNameExpressionResolver.class); + Task asyncTask = testTaskManager.register("enrich", "policy_execution", new TaskAwareRequest() { + @Override + public void setParentTask(TaskId taskId) {} + + @Override + public void setRequestId(long requestId) {} + + @Override + public TaskId getParentTask() { + return TaskId.EMPTY_TASK_ID; + } + + @Override + public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { + return new ExecuteEnrichPolicyTask(id, type, action, getDescription(), parentTaskId, headers); + } + + @Override + public String getDescription() { + return policyName; + } + }); + ExecuteEnrichPolicyTask task = ((ExecuteEnrichPolicyTask) asyncTask); + // The executor would wrap the listener in order to clean up the task in the + // task manager, but we're just testing the runner, so we make sure to clean + // up after ourselves. + ActionListener wrappedListener = ActionListener.runBefore( + listener, + () -> testTaskManager.unregister(task) + ); + + // Wrap the client so that when we receive the indices segments action, we intercept the request and complete it on another thread + // with an empty segments response. + Client client = new FilterClient(client()) { + @Override + protected void doExecute( + ActionType action, + Request request, + ActionListener listener + ) { + if (action.equals(IndicesSegmentsAction.INSTANCE)) { + testThreadPool.generic().execute(() -> { + @SuppressWarnings("unchecked") + ActionListener castListener = ((ActionListener) listener); + castListener.onResponse(new IndicesSegmentResponse(new ShardSegments[0], 0, 0, 0, List.of())); + }); + } else { + super.doExecute(action, request, listener); + } + } + }; + + EnrichPolicyRunner enrichPolicyRunner = new EnrichPolicyRunner( + policyName, + policy, + task, + wrappedListener, + clusterService, + getInstanceFromNode(IndicesService.class), + client, + resolver, + createdEnrichIndex, + randomIntBetween(1, 10000), + randomIntBetween(3, 10) + ); + + logger.info("Starting policy run"); + enrichPolicyRunner.run(); + if (latch.await(1, TimeUnit.MINUTES) == false) { + fail("Timeout while waiting for runner to complete"); + } + Exception exceptionThrown = exception.get(); + if (exceptionThrown == null) { + fail("Expected exception to be thrown from segment api"); + } + + // Validate exception information + assertThat(exceptionThrown, instanceOf(ElasticsearchException.class)); + assertThat(exceptionThrown.getMessage(), containsString("Could not locate segment information for newly created index")); + } + + public void testRunnerWithShardFailuresInSegmentResponse() throws Exception { + final String sourceIndex = "source-index"; + DocWriteResponse indexRequest = client().index(new IndexRequest().index(sourceIndex).id("id").source(""" + { + "field1": "value1", + "field2": 2, + "field3": "ignored", + "field4": "ignored", + "field5": "value5" + }""", XContentType.JSON).setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)).actionGet(); + assertEquals(RestStatus.CREATED, indexRequest.status()); + + assertResponse( + client().search(new SearchRequest(sourceIndex).source(SearchSourceBuilder.searchSource().query(QueryBuilders.matchAllQuery()))), + sourceSearchResponse -> { + assertThat(sourceSearchResponse.getHits().getTotalHits().value, equalTo(1L)); + Map sourceDocMap = sourceSearchResponse.getHits().getAt(0).getSourceAsMap(); + assertNotNull(sourceDocMap); + assertThat(sourceDocMap.get("field1"), is(equalTo("value1"))); + assertThat(sourceDocMap.get("field2"), is(equalTo(2))); + assertThat(sourceDocMap.get("field3"), is(equalTo("ignored"))); + assertThat(sourceDocMap.get("field4"), is(equalTo("ignored"))); + assertThat(sourceDocMap.get("field5"), is(equalTo("value5"))); + } + ); + List enrichFields = List.of("field2", "field5"); + EnrichPolicy policy = new EnrichPolicy(EnrichPolicy.MATCH_TYPE, null, List.of(sourceIndex), "field1", enrichFields); + String policyName = "test1"; + + final long createTime = randomNonNegativeLong(); + String createdEnrichIndex = ".enrich-test1-" + createTime; + final AtomicReference exception = new AtomicReference<>(); + final CountDownLatch latch = new CountDownLatch(1); + ActionListener listener = createTestListener(latch, exception::set); + ClusterService clusterService = getInstanceFromNode(ClusterService.class); + IndexNameExpressionResolver resolver = getInstanceFromNode(IndexNameExpressionResolver.class); + Task asyncTask = testTaskManager.register("enrich", "policy_execution", new TaskAwareRequest() { + @Override + public void setParentTask(TaskId taskId) {} + + @Override + public void setRequestId(long requestId) {} + + @Override + public TaskId getParentTask() { + return TaskId.EMPTY_TASK_ID; + } + + @Override + public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { + return new ExecuteEnrichPolicyTask(id, type, action, getDescription(), parentTaskId, headers); + } + + @Override + public String getDescription() { + return policyName; + } + }); + ExecuteEnrichPolicyTask task = ((ExecuteEnrichPolicyTask) asyncTask); + // The executor would wrap the listener in order to clean up the task in the + // task manager, but we're just testing the runner, so we make sure to clean + // up after ourselves. + ActionListener wrappedListener = ActionListener.runBefore( + listener, + () -> testTaskManager.unregister(task) + ); + + // Wrap the client so that when we receive the indices segments action, we intercept the request and complete it on another thread + // with an failed segments response. + Client client = new FilterClient(client()) { + @Override + protected void doExecute( + ActionType action, + Request request, + ActionListener listener + ) { + if (action.equals(IndicesSegmentsAction.INSTANCE)) { + testThreadPool.generic().execute(() -> { + @SuppressWarnings("unchecked") + ActionListener castListener = ((ActionListener) listener); + castListener.onResponse( + new IndicesSegmentResponse( + new ShardSegments[0], + 0, + 0, + 3, + List.of( + new DefaultShardOperationFailedException(createdEnrichIndex, 1, new ElasticsearchException("failure1")), + new DefaultShardOperationFailedException(createdEnrichIndex, 2, new ElasticsearchException("failure2")), + new DefaultShardOperationFailedException(createdEnrichIndex, 3, new ElasticsearchException("failure3")) + ) + ) + ); + }); + } else { + super.doExecute(action, request, listener); + } + } + }; + + EnrichPolicyRunner enrichPolicyRunner = new EnrichPolicyRunner( + policyName, + policy, + task, + wrappedListener, + clusterService, + getInstanceFromNode(IndicesService.class), + client, + resolver, + createdEnrichIndex, + randomIntBetween(1, 10000), + randomIntBetween(3, 10) + ); + + logger.info("Starting policy run"); + enrichPolicyRunner.run(); + if (latch.await(1, TimeUnit.MINUTES) == false) { + fail("Timeout while waiting for runner to complete"); + } + Exception exceptionThrown = exception.get(); + if (exceptionThrown == null) { + fail("Expected exception to be thrown from segment api"); + } + + // Validate exception information + assertThat(exceptionThrown, instanceOf(ElasticsearchException.class)); + assertThat(exceptionThrown.getMessage(), containsString("Could not obtain segment information for newly created index")); + assertThat(exceptionThrown.getCause(), instanceOf(ElasticsearchException.class)); + assertThat(exceptionThrown.getCause().getMessage(), containsString("failure1")); + } + public void testRunnerCancel() throws Exception { final String sourceIndex = "source-index"; DocWriteResponse indexRequest = client().index(new IndexRequest().index(sourceIndex).id("id").source(""" @@ -2495,7 +2744,7 @@ private ActionListener createTestListener( final CountDownLatch latch, final Consumer exceptionConsumer ) { - return new LatchedActionListener<>(ActionListener.wrap((r) -> logger.info("Run complete"), exceptionConsumer), latch); + return new LatchedActionListener<>(ActionListener.wrap((r) -> logger.debug("Run complete"), exceptionConsumer), latch); } private void validateMappingMetadata(Map mapping, String policyName, EnrichPolicy policy) { diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/analyzer/AnalyzerRules.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/analyzer/AnalyzerRules.java index ce188511fe7bc..83afb2ac38a4e 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/analyzer/AnalyzerRules.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/analyzer/AnalyzerRules.java @@ -20,8 +20,6 @@ import java.util.function.Predicate; import java.util.function.Supplier; -import static java.util.Collections.singletonList; - public final class AnalyzerRules { public abstract static class AnalyzerRule extends Rule { @@ -138,14 +136,6 @@ public static List maybeResolveAgainstList( ) .toList(); - return singletonList( - ua.withUnresolvedMessage( - "Reference [" - + ua.qualifiedName() - + "] is ambiguous (to disambiguate use quotes or qualifiers); " - + "matches any of " - + refs - ) - ); + throw new IllegalStateException("Reference [" + ua.qualifiedName() + "] is ambiguous; " + "matches any of " + refs); } } diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Alias.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Alias.java index 01cc716a20547..c68e77d47ed0c 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Alias.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Alias.java @@ -115,6 +115,11 @@ public Nullability nullable() { return child.nullable(); } + @Override + protected TypeResolution resolveType() { + return child.resolveType(); + } + @Override public DataType dataType() { return child.dataType(); diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java index 0f7d92564c8ab..a3bc7ea621d8a 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java @@ -29,6 +29,10 @@ * - nestedParent - if nested, what's the parent (which might not be the immediate one) */ public class FieldAttribute extends TypedAttribute { + // TODO: This constant should not be used if possible; use .synthetic() + // https://github.com/elastic/elasticsearch/issues/105821 + public static final String SYNTHETIC_ATTRIBUTE_NAME_PREFIX = "$$"; + static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( Attribute.class, "FieldAttribute", @@ -72,12 +76,11 @@ public FieldAttribute( boolean synthetic ) { super(source, name, type, qualifier, nullability, id, synthetic); - this.path = parent != null ? parent.name() : StringUtils.EMPTY; + this.path = parent != null ? parent.fieldName() : StringUtils.EMPTY; this.parent = parent; this.field = field; } - @SuppressWarnings("unchecked") public FieldAttribute(StreamInput in) throws IOException { /* * The funny casting dance with `(StreamInput & PlanStreamInput) in` is required @@ -131,6 +134,20 @@ public String path() { return path; } + /** + * The full name of the field in the index, including all parent fields. E.g. {@code parent.subfield.this_field}. + */ + public String fieldName() { + // Before 8.15, the field name was the same as the attribute's name. + // On later versions, the attribute can be renamed when creating synthetic attributes. + // TODO: We should use synthetic() to check for that case. + // https://github.com/elastic/elasticsearch/issues/105821 + if (name().startsWith(SYNTHETIC_ATTRIBUTE_NAME_PREFIX) == false) { + return name(); + } + return Strings.hasText(path) ? path + "." + field.getName() : field.getName(); + } + public String qualifiedPath() { // return only the qualifier is there's no path return qualifier() != null ? qualifier() + (Strings.hasText(path) ? "." + path : StringUtils.EMPTY) : path; diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/NamedExpression.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/NamedExpression.java index e3e9a60180da7..266ad8e2bb051 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/NamedExpression.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/NamedExpression.java @@ -55,6 +55,10 @@ public boolean synthetic() { return synthetic; } + /** + * Try to return either {@code this} if it is an {@link Attribute}, or a {@link ReferenceAttribute} to it otherwise. + * Return an {@link UnresolvedAttribute} if this is unresolved. + */ public abstract Attribute toAttribute(); @Override diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/TypeResolutions.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/TypeResolutions.java index c3593e91c537e..ab05a71b0e1c6 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/TypeResolutions.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/TypeResolutions.java @@ -9,6 +9,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expression.TypeResolution; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; +import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; import java.util.Locale; import java.util.StringJoiner; @@ -176,19 +177,60 @@ public static TypeResolution isType( ParamOrdinal paramOrd, String... acceptedTypes ) { - return predicate.test(e.dataType()) || e.dataType() == NULL - ? TypeResolution.TYPE_RESOLVED - : new TypeResolution( - format( - null, - "{}argument of [{}] must be [{}], found value [{}] type [{}]", - paramOrd == null || paramOrd == DEFAULT ? "" : paramOrd.name().toLowerCase(Locale.ROOT) + " ", - operationName, - acceptedTypesForErrorMsg(acceptedTypes), - name(e), - e.dataType().typeName() - ) - ); + return isType(e, predicate, operationName, paramOrd, false, acceptedTypes); + } + + public static TypeResolution isTypeOrUnionType( + Expression e, + Predicate predicate, + String operationName, + ParamOrdinal paramOrd, + String... acceptedTypes + ) { + return isType(e, predicate, operationName, paramOrd, true, acceptedTypes); + } + + public static TypeResolution isType( + Expression e, + Predicate predicate, + String operationName, + ParamOrdinal paramOrd, + boolean allowUnionTypes, + String... acceptedTypes + ) { + if (predicate.test(e.dataType()) || e.dataType() == NULL) { + return TypeResolution.TYPE_RESOLVED; + } + + // TODO: Shouldn't we perform widening of small numerical types here? + if (allowUnionTypes + && e instanceof FieldAttribute fa + && fa.field() instanceof InvalidMappedField imf + && imf.types().stream().allMatch(predicate)) { + return TypeResolution.TYPE_RESOLVED; + } + + return new TypeResolution( + errorStringIncompatibleTypes(operationName, paramOrd, name(e), e.dataType(), acceptedTypesForErrorMsg(acceptedTypes)) + ); + } + + private static String errorStringIncompatibleTypes( + String operationName, + ParamOrdinal paramOrd, + String argumentName, + DataType foundType, + String... acceptedTypes + ) { + return format( + null, + "{}argument of [{}] must be [{}], found value [{}] type [{}]", + paramOrd == null || paramOrd == DEFAULT ? "" : paramOrd.name().toLowerCase(Locale.ROOT) + " ", + operationName, + acceptedTypesForErrorMsg(acceptedTypes), + argumentName, + foundType.typeName() + ); } private static String acceptedTypesForErrorMsg(String... acceptedTypes) { diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/UnresolvedAttribute.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/UnresolvedAttribute.java index 87ef37cb84d1f..fe04df8e7e1d8 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/UnresolvedAttribute.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/UnresolvedAttribute.java @@ -96,6 +96,11 @@ public UnresolvedAttribute withUnresolvedMessage(String unresolvedMessage) { return new UnresolvedAttribute(source(), name(), qualifier(), id(), unresolvedMessage, resolutionMetadata()); } + @Override + protected TypeResolution resolveType() { + return new TypeResolution("unresolved attribute [" + name() + "]"); + } + @Override public DataType dataType() { throw new UnresolvedException("dataType", this); diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/Rule.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/Rule.java index 6121c9b36442b..163b1f89f2abb 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/Rule.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/rule/Rule.java @@ -6,8 +6,8 @@ */ package org.elasticsearch.xpack.esql.core.rule; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; import org.elasticsearch.xpack.esql.core.tree.Node; import org.elasticsearch.xpack.esql.core.util.ReflectionUtils; diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java index 9b088cfb19f6c..9b3d7950c2a01 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java @@ -17,6 +17,7 @@ import java.util.Objects; import java.util.Set; import java.util.TreeMap; +import java.util.stream.Collectors; /** * Representation of field mapped differently across indices. @@ -64,6 +65,10 @@ private InvalidMappedField(StreamInput in) throws IOException { this(in.readString(), in.readString(), in.readImmutableMap(StreamInput::readString, i -> i.readNamedWriteable(EsField.class))); } + public Set types() { + return typesToIndices.keySet().stream().map(DataType::fromTypeName).collect(Collectors.toSet()); + } + @Override public void writeTo(StreamOutput out) throws IOException { out.writeString(getName()); diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/StringUtils.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/StringUtils.java index 47246a4e190dd..4ba3658697c0d 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/StringUtils.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/StringUtils.java @@ -354,6 +354,9 @@ public static Number parseIntegral(String string) throws InvalidArgumentExceptio } return bi; } + if (bi.compareTo(BigInteger.valueOf(Long.MIN_VALUE)) < 0) { + throw new InvalidArgumentException("Magnitude of negative number [{}] is too large", string); + } // try to downsize to int if possible (since that's the most common type) if (bi.intValue() == bi.longValue()) { // ternary operator would always promote to Long return bi.intValueExact(); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/MaxDoubleAggregator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/MaxDoubleAggregator.java index ee6555c4af67d..f0804278e5002 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/MaxDoubleAggregator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/MaxDoubleAggregator.java @@ -16,7 +16,7 @@ class MaxDoubleAggregator { public static double init() { - return Double.MIN_VALUE; + return -Double.MAX_VALUE; } public static double combine(double current, double v) { diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/AsyncOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/AsyncOperator.java index 061cefc86bed0..0fed88370a144 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/AsyncOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/AsyncOperator.java @@ -21,13 +21,11 @@ import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.seqno.LocalCheckpointTracker; import org.elasticsearch.index.seqno.SequenceNumbers; -import org.elasticsearch.tasks.TaskCancelledException; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; import java.util.Map; import java.util.Objects; -import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.LongAdder; /** @@ -40,7 +38,7 @@ public abstract class AsyncOperator implements Operator { private volatile SubscribableListener blockedFuture; private final Map buffers = ConcurrentCollections.newConcurrentMap(); - private final AtomicReference failure = new AtomicReference<>(); + private final FailureCollector failureCollector = new FailureCollector(); private final DriverContext driverContext; private final int maxOutstandingRequests; @@ -77,7 +75,7 @@ public boolean needsInput() { @Override public void addInput(Page input) { - if (failure.get() != null) { + if (failureCollector.hasFailure()) { input.releaseBlocks(); return; } @@ -90,7 +88,7 @@ public void addInput(Page input) { onSeqNoCompleted(seqNo); }, e -> { releasePageOnAnyThread(input); - onFailure(e); + failureCollector.unwrapAndCollect(e); onSeqNoCompleted(seqNo); }); final long startNanos = System.nanoTime(); @@ -121,31 +119,12 @@ private void releasePageOnAnyThread(Page page) { protected abstract void doClose(); - private void onFailure(Exception e) { - failure.getAndUpdate(first -> { - if (first == null) { - return e; - } - // ignore subsequent TaskCancelledException exceptions as they don't provide useful info. - if (ExceptionsHelper.unwrap(e, TaskCancelledException.class) != null) { - return first; - } - if (ExceptionsHelper.unwrap(first, TaskCancelledException.class) != null) { - return e; - } - if (ExceptionsHelper.unwrapCause(first) != ExceptionsHelper.unwrapCause(e)) { - first.addSuppressed(e); - } - return first; - }); - } - private void onSeqNoCompleted(long seqNo) { checkpoint.markSeqNoAsProcessed(seqNo); if (checkpoint.getPersistedCheckpoint() < checkpoint.getProcessedCheckpoint()) { notifyIfBlocked(); } - if (closed || failure.get() != null) { + if (closed || failureCollector.hasFailure()) { discardPages(); } } @@ -164,7 +143,7 @@ private void notifyIfBlocked() { } private void checkFailure() { - Exception e = failure.get(); + Exception e = failureCollector.getFailure(); if (e != null) { discardPages(); throw ExceptionsHelper.convertToElastic(e); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/DriverRunner.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/DriverRunner.java index 5de017fbd279e..b427a36566f11 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/DriverRunner.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/DriverRunner.java @@ -7,14 +7,11 @@ package org.elasticsearch.compute.operator; -import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.ActionListener; import org.elasticsearch.common.util.concurrent.CountDown; import org.elasticsearch.common.util.concurrent.ThreadContext; -import org.elasticsearch.tasks.TaskCancelledException; import java.util.List; -import java.util.concurrent.atomic.AtomicReference; /** * Run a set of drivers to completion. @@ -35,8 +32,8 @@ public DriverRunner(ThreadContext threadContext) { * Run all drivers to completion asynchronously. */ public void runToCompletion(List drivers, ActionListener listener) { - AtomicReference failure = new AtomicReference<>(); var responseHeadersCollector = new ResponseHeadersCollector(threadContext); + var failure = new FailureCollector(); CountDown counter = new CountDown(drivers.size()); for (int i = 0; i < drivers.size(); i++) { Driver driver = drivers.get(i); @@ -48,23 +45,7 @@ public void onResponse(Void unused) { @Override public void onFailure(Exception e) { - failure.getAndUpdate(first -> { - if (first == null) { - return e; - } - if (ExceptionsHelper.unwrap(e, TaskCancelledException.class) != null) { - return first; - } else { - if (ExceptionsHelper.unwrap(first, TaskCancelledException.class) != null) { - return e; - } else { - if (first != e) { - first.addSuppressed(e); - } - return first; - } - } - }); + failure.unwrapAndCollect(e); for (Driver d : drivers) { if (driver != d) { d.cancel("Driver [" + driver.sessionId() + "] was cancelled or failed"); @@ -77,7 +58,7 @@ private void done() { responseHeadersCollector.collect(); if (counter.countDown()) { responseHeadersCollector.finish(); - Exception error = failure.get(); + Exception error = failure.getFailure(); if (error != null) { listener.onFailure(error); } else { diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/FailureCollector.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/FailureCollector.java new file mode 100644 index 0000000000000..99edab038af31 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/FailureCollector.java @@ -0,0 +1,112 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.common.util.concurrent.ConcurrentCollections; +import org.elasticsearch.tasks.TaskCancelledException; +import org.elasticsearch.transport.TransportException; + +import java.util.List; +import java.util.Queue; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * {@code FailureCollector} is responsible for collecting exceptions that occur in the compute engine. + * The collected exceptions are categorized into task-cancelled and non-task-cancelled exceptions. + * To limit memory usage, this class collects only the first 10 exceptions in each category by default. + * When returning the accumulated failure to the caller, this class prefers non-task-cancelled exceptions + * over task-cancelled ones as they are more useful for diagnosing issues. + */ +public final class FailureCollector { + private final Queue cancelledExceptions = ConcurrentCollections.newQueue(); + private final AtomicInteger cancelledExceptionsCount = new AtomicInteger(); + + private final Queue nonCancelledExceptions = ConcurrentCollections.newQueue(); + private final AtomicInteger nonCancelledExceptionsCount = new AtomicInteger(); + + private final int maxExceptions; + private volatile boolean hasFailure = false; + private Exception finalFailure = null; + + public FailureCollector() { + this(10); + } + + public FailureCollector(int maxExceptions) { + if (maxExceptions <= 0) { + throw new IllegalArgumentException("maxExceptions must be at least one"); + } + this.maxExceptions = maxExceptions; + } + + public void unwrapAndCollect(Exception originEx) { + final Exception e = originEx instanceof TransportException + ? (originEx.getCause() instanceof Exception cause ? cause : new ElasticsearchException(originEx.getCause())) + : originEx; + if (ExceptionsHelper.unwrap(e, TaskCancelledException.class) != null) { + if (cancelledExceptionsCount.incrementAndGet() <= maxExceptions) { + cancelledExceptions.add(e); + } + } else { + if (nonCancelledExceptionsCount.incrementAndGet() <= maxExceptions) { + nonCancelledExceptions.add(e); + } + } + hasFailure = true; + } + + /** + * @return {@code true} if any failure has been collected, {@code false} otherwise + */ + public boolean hasFailure() { + return hasFailure; + } + + /** + * Returns the accumulated failure, preferring non-task-cancelled exceptions over task-cancelled ones. + * Once this method builds the failure, incoming failures are discarded. + * + * @return the accumulated failure, or {@code null} if no failure has been collected + */ + public Exception getFailure() { + if (hasFailure == false) { + return null; + } + synchronized (this) { + if (finalFailure == null) { + finalFailure = buildFailure(); + } + return finalFailure; + } + } + + private Exception buildFailure() { + assert hasFailure; + assert Thread.holdsLock(this); + int total = 0; + Exception first = null; + for (var exceptions : List.of(nonCancelledExceptions, cancelledExceptions)) { + for (Exception e : exceptions) { + if (first == null) { + first = e; + total++; + } else if (first != e) { + first.addSuppressed(e); + total++; + } + if (total >= maxExceptions) { + return first; + } + } + } + assert first != null; + return first; + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java index adce8d8a88407..77b535949eb9d 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java @@ -7,21 +7,18 @@ package org.elasticsearch.compute.operator.exchange; -import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.support.RefCountingListener; import org.elasticsearch.action.support.SubscribableListener; import org.elasticsearch.common.util.concurrent.AbstractRunnable; import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.FailureCollector; import org.elasticsearch.core.Releasable; -import org.elasticsearch.tasks.TaskCancelledException; -import org.elasticsearch.transport.TransportException; import java.util.List; import java.util.concurrent.Executor; import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; /** * An {@link ExchangeSourceHandler} asynchronously fetches pages and status from multiple {@link RemoteSink}s @@ -37,7 +34,7 @@ public final class ExchangeSourceHandler { private final PendingInstances outstandingSinks; private final PendingInstances outstandingSources; - private final AtomicReference failure = new AtomicReference<>(); + private final FailureCollector failure = new FailureCollector(); public ExchangeSourceHandler(int maxBufferSize, Executor fetchExecutor) { this.buffer = new ExchangeBuffer(maxBufferSize); @@ -54,7 +51,7 @@ private class ExchangeSourceImpl implements ExchangeSource { } private void checkFailure() { - Exception e = failure.get(); + Exception e = failure.getFailure(); if (e != null) { throw ExceptionsHelper.convertToElastic(e); } @@ -172,7 +169,7 @@ void fetchPage() { while (loopControl.isRunning()) { loopControl.exiting(); // finish other sinks if one of them failed or source no longer need pages. - boolean toFinishSinks = buffer.noMoreInputs() || failure.get() != null; + boolean toFinishSinks = buffer.noMoreInputs() || failure.hasFailure(); remoteSink.fetchPageAsync(toFinishSinks, ActionListener.wrap(resp -> { Page page = resp.takePage(); if (page != null) { @@ -199,26 +196,8 @@ void fetchPage() { loopControl.exited(); } - void onSinkFailed(Exception originEx) { - final Exception e = originEx instanceof TransportException - ? (originEx.getCause() instanceof Exception cause ? cause : new ElasticsearchException(originEx.getCause())) - : originEx; - failure.getAndUpdate(first -> { - if (first == null) { - return e; - } - // ignore subsequent TaskCancelledException exceptions as they don't provide useful info. - if (ExceptionsHelper.unwrap(e, TaskCancelledException.class) != null) { - return first; - } - if (ExceptionsHelper.unwrap(first, TaskCancelledException.class) != null) { - return e; - } - if (ExceptionsHelper.unwrapCause(first) != ExceptionsHelper.unwrapCause(e)) { - first.addSuppressed(e); - } - return first; - }); + void onSinkFailed(Exception e) { + failure.unwrapAndCollect(e); buffer.waitForReading().onResponse(null); // resume the Driver if it is being blocked on reading onSinkComplete(); } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/MaxDoubleAggregatorFunctionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/MaxDoubleAggregatorFunctionTests.java index 9d638fae4e822..877db42a81b50 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/MaxDoubleAggregatorFunctionTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/MaxDoubleAggregatorFunctionTests.java @@ -22,7 +22,10 @@ public class MaxDoubleAggregatorFunctionTests extends AggregatorFunctionTestCase { @Override protected SourceOperator simpleInput(BlockFactory blockFactory, int size) { - return new SequenceDoubleBlockSourceOperator(blockFactory, LongStream.range(0, size).mapToDouble(l -> ESTestCase.randomDouble())); + return new SequenceDoubleBlockSourceOperator( + blockFactory, + LongStream.range(0, size).mapToDouble(l -> ESTestCase.randomDoubleBetween(-Double.MAX_VALUE, Double.MAX_VALUE, true)) + ); } @Override diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValueSourceReaderTypeConversionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValueSourceReaderTypeConversionTests.java index 66bcf2a57e393..09f63e9fa45bb 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValueSourceReaderTypeConversionTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValueSourceReaderTypeConversionTests.java @@ -1687,12 +1687,13 @@ public StoredFieldsSpec rowStrideStoredFieldSpec() { @Override public boolean supportsOrdinals() { - return delegate.supportsOrdinals(); + // Fields with mismatching types cannot use ordinals for uniqueness determination, but must convert the values first + return false; } @Override - public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException { - return delegate.ordinals(context); + public SortedSetDocValues ordinals(LeafReaderContext context) { + throw new IllegalArgumentException("Ordinals are not supported for type conversion"); } @Override diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/FailureCollectorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/FailureCollectorTests.java new file mode 100644 index 0000000000000..d5fa0a1eaecc9 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/FailureCollectorTests.java @@ -0,0 +1,90 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator; + +import org.elasticsearch.common.Randomness; +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.breaker.CircuitBreakingException; +import org.elasticsearch.tasks.TaskCancelledException; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.transport.RemoteTransportException; +import org.hamcrest.Matchers; + +import java.io.IOException; +import java.util.List; +import java.util.Queue; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +import static org.hamcrest.Matchers.lessThan; + +public class FailureCollectorTests extends ESTestCase { + + public void testCollect() throws Exception { + int maxExceptions = between(1, 100); + FailureCollector collector = new FailureCollector(maxExceptions); + List cancelledExceptions = List.of( + new TaskCancelledException("user request"), + new TaskCancelledException("cross "), + new TaskCancelledException("on failure") + ); + List nonCancelledExceptions = List.of( + new IOException("i/o simulated"), + new IOException("disk broken"), + new CircuitBreakingException("low memory", CircuitBreaker.Durability.TRANSIENT), + new CircuitBreakingException("over limit", CircuitBreaker.Durability.TRANSIENT) + ); + List failures = Stream.concat( + IntStream.range(0, between(1, 500)).mapToObj(n -> randomFrom(cancelledExceptions)), + IntStream.range(0, between(1, 500)).mapToObj(n -> randomFrom(nonCancelledExceptions)) + ).collect(Collectors.toList()); + Randomness.shuffle(failures); + Queue queue = new ConcurrentLinkedQueue<>(failures); + Thread[] threads = new Thread[between(1, 4)]; + CyclicBarrier carrier = new CyclicBarrier(threads.length); + for (int i = 0; i < threads.length; i++) { + threads[i] = new Thread(() -> { + try { + carrier.await(10, TimeUnit.SECONDS); + } catch (Exception e) { + throw new AssertionError(e); + } + Exception ex; + while ((ex = queue.poll()) != null) { + if (randomBoolean()) { + collector.unwrapAndCollect(ex); + } else { + collector.unwrapAndCollect(new RemoteTransportException("disconnect", ex)); + } + if (randomBoolean()) { + assertTrue(collector.hasFailure()); + } + } + }); + threads[i].start(); + } + for (Thread thread : threads) { + thread.join(); + } + assertTrue(collector.hasFailure()); + Exception failure = collector.getFailure(); + assertNotNull(failure); + assertThat(failure, Matchers.in(nonCancelledExceptions)); + assertThat(failure.getSuppressed().length, lessThan(maxExceptions)); + } + + public void testEmpty() { + FailureCollector collector = new FailureCollector(5); + assertFalse(collector.hasFailure()); + assertNull(collector.getFailure()); + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java index d88d7f9b9448f..3b3e12978ae04 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java @@ -10,7 +10,6 @@ import org.apache.lucene.sandbox.document.HalfFloatPoint; import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; -import org.elasticsearch.common.Strings; import org.elasticsearch.common.breaker.NoopCircuitBreaker; import org.elasticsearch.common.network.InetAddresses; import org.elasticsearch.common.time.DateFormatters; @@ -332,15 +331,15 @@ public static ExpectedResults loadCsvSpecValues(String csv) { columnTypes = new ArrayList<>(header.length); for (String c : header) { - String[] nameWithType = Strings.split(c, ":"); - if (nameWithType == null || nameWithType.length != 2) { + String[] nameWithType = escapeTypecast(c).split(":"); + if (nameWithType.length != 2) { throw new IllegalArgumentException("Invalid CSV header " + c); } - String typeName = nameWithType[1].trim(); - if (typeName.length() == 0) { - throw new IllegalArgumentException("A type is always expected in the csv file; found " + nameWithType); + String typeName = unescapeTypecast(nameWithType[1]).trim(); + if (typeName.isEmpty()) { + throw new IllegalArgumentException("A type is always expected in the csv file; found " + Arrays.toString(nameWithType)); } - String name = nameWithType[0].trim(); + String name = unescapeTypecast(nameWithType[0]).trim(); columnNames.add(name); Type type = Type.asType(typeName); if (type == null) { @@ -398,6 +397,16 @@ public static ExpectedResults loadCsvSpecValues(String csv) { } } + private static final String TYPECAST_SPACER = "__TYPECAST__"; + + private static String escapeTypecast(String typecast) { + return typecast.replace("::", TYPECAST_SPACER); + } + + private static String unescapeTypecast(String typecast) { + return typecast.replace(TYPECAST_SPACER, "::"); + } + public enum Type { INTEGER(Integer::parseInt, Integer.class), LONG(Long::parseLong, Long.class), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 530b2bc01b3d6..f9b768d67d574 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -96,8 +96,8 @@ public class CsvTestsDataLoader { "cartesian_multipolygons.csv" ); private static final TestsDataset DISTANCES = new TestsDataset("distances", "mapping-distances.json", "distances.csv"); - private static final TestsDataset K8S = new TestsDataset("k8s", "k8s-mappings.json", "k8s.csv", "k8s-settings.json", true); + private static final TestsDataset ADDRESSES = new TestsDataset("addresses", "mapping-addresses.json", "addresses.csv", null, true); public static final Map CSV_DATASET_MAP = Map.ofEntries( Map.entry(EMPLOYEES.indexName, EMPLOYEES), @@ -121,7 +121,8 @@ public class CsvTestsDataLoader { Map.entry(AIRPORT_CITY_BOUNDARIES.indexName, AIRPORT_CITY_BOUNDARIES), Map.entry(CARTESIAN_MULTIPOLYGONS.indexName, CARTESIAN_MULTIPOLYGONS), Map.entry(K8S.indexName, K8S), - Map.entry(DISTANCES.indexName, DISTANCES) + Map.entry(DISTANCES.indexName, DISTANCES), + Map.entry(ADDRESSES.indexName, ADDRESSES) ); private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json"); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index d7e067658267f..68696ad5ac99a 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -31,6 +31,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.expression.predicate.Range; import org.elasticsearch.xpack.esql.core.index.EsIndex; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; @@ -169,6 +170,10 @@ public static Literal of(Source source, Object value) { return new Literal(source, value, DataType.fromJava(value)); } + public static ReferenceAttribute referenceAttribute(String name, DataType type) { + return new ReferenceAttribute(EMPTY, name, type); + } + public static Range rangeOf(Expression value, Expression lower, boolean includeLower, Expression upper, boolean includeUpper) { return new Range(EMPTY, value, lower, includeLower, upper, includeUpper, randomZone()); } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/addresses.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/addresses.csv new file mode 100644 index 0000000000000..0eea102400d60 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/addresses.csv @@ -0,0 +1,4 @@ +street:keyword,number:keyword,zip_code:keyword,city.name:keyword,city.country.name:keyword,city.country.continent.name:keyword,city.country.continent.planet.name:keyword,city.country.continent.planet.galaxy:keyword +Keizersgracht,281,1016 ED,Amsterdam,Netherlands,Europe,Earth,Milky Way +Kearny St,88,CA 94108,San Francisco,United States of America,North America,Earth,Milky Way +Marunouchi,2-7-2,100-7014,Tokyo,Japan,Asia,Earth,Milky Way diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec index 812198c324217..38f09d2e3c56e 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec @@ -26,6 +26,19 @@ first_name:keyword | left:keyword | full_name:keyword | right:keyword | last_nam Georgi | left | Georgi Facello | right | Facello ; +shadowingSubfields +FROM addresses +| KEEP city.country.continent.planet.name, city.country.name, city.name +| DISSECT city.name "%{city.country.continent.planet.name} %{?}" +| SORT city.name +; + +city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword +Netherlands | Amsterdam | null +United States of America | San Francisco | San +Japan | Tokyo | null +; + shadowingSelf FROM employees | KEEP first_name, last_name @@ -50,6 +63,51 @@ last_name:keyword | left:keyword | foo:keyword | middle:keyword | ri Facello | left | Georgi1 Georgi2 Facello | middle | right | Georgi1 | Georgi2 | Facello ; +shadowingInternal +FROM employees +| KEEP first_name, last_name +| WHERE last_name == "Facello" +| EVAL name = concat(first_name, "1 ", last_name) +| DISSECT name "%{foo} %{foo}" +; + +first_name:keyword | last_name:keyword | name:keyword | foo:keyword +Georgi | Facello | Georgi1 Facello | Facello +; + +shadowingWhenPushedDownPastRename +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", long_city_name = "Zurich, the largest city in Switzerland" +| RENAME city AS c +| DISSECT long_city_name "Zurich, the %{city} city in Switzerland" +; + +c:keyword | long_city_name:keyword | city:keyword +Zürich | Zurich, the largest city in Switzerland | largest +; + +shadowingWhenPushedDownPastRename2 +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", long_city_name = "Zurich, the largest city in Switzerland" +| RENAME city AS c +| DISSECT long_city_name "Zurich, the %{city} city in %{foo}" +; + +c:keyword | long_city_name:keyword | city:keyword | foo:keyword +Zürich | Zurich, the largest city in Switzerland | largest | Switzerland +; + +shadowingWhenPushedDownPastRename3 +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", long_city_name = "Zurich, the largest city in Switzerland" +| RENAME long_city_name AS c +| DISSECT c "Zurich, the %{long_city_name} city in Switzerland" +; + +city:keyword | c:keyword | long_city_name:keyword +Zürich | Zurich, the largest city in Switzerland | largest +; + complexPattern ROW a = "1953-01-23T12:15:00Z - some text - 127.0.0.1;" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec index d34620a9e118d..15fe6853ae491 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec @@ -436,6 +436,23 @@ ROW a = "1.2.3.4 [2023-01-23T12:15:00.000Z] Connected" // end::grokWithEscape-result[] ; +grokWithDuplicateFieldNames +// tag::grokWithDuplicateFieldNames[] +FROM addresses +| KEEP city.name, zip_code +| GROK zip_code "%{WORD:zip_parts} %{WORD:zip_parts}" +// end::grokWithDuplicateFieldNames[] +| SORT city.name +; + +// tag::grokWithDuplicateFieldNames-result[] +city.name:keyword | zip_code:keyword | zip_parts:keyword +Amsterdam | 1016 ED | ["1016", "ED"] +San Francisco | CA 94108 | ["CA", "94108"] +Tokyo | 100-7014 | null +// end::grokWithDuplicateFieldNames-result[] +; + basicDissect // tag::basicDissect[] ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec index 35530cf6fdb8e..9886d6cce0ca2 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec @@ -122,3 +122,53 @@ FROM employees | STATS COUNT(*), MIN(salary * 10), MAX(languages)| DROP `COUNT( MIN(salary * 10):i | MAX(languages):i 253240 | 5 ; + +// Not really shadowing, but let's keep the name consistent with the other command's tests +shadowingInternal +FROM employees +| SORT emp_no ASC +| KEEP emp_no, first_name, last_name +| DROP last_name, last_name +| LIMIT 2 +; + +emp_no:integer | first_name:keyword + 10001 | Georgi + 10002 | Bezalel +; + +shadowingInternalWildcard +FROM employees +| SORT emp_no ASC +| KEEP emp_no, first_name, last_name +| DROP last*name, last*name, last*, last_name +| LIMIT 2 +; + +emp_no:integer | first_name:keyword + 10001 | Georgi + 10002 | Bezalel +; + +subfields +FROM addresses +| DROP city.country.continent.planet.name, city.country.continent.name, city.country.name, number, street, zip_code, city.country.continent.planet.name +| SORT city.name +; + +city.country.continent.planet.galaxy:keyword | city.name:keyword +Milky Way | Amsterdam +Milky Way | San Francisco +Milky Way | Tokyo +; + +subfieldsWildcard +FROM addresses +| DROP *.name, number, street, zip_code, *ame +; + +city.country.continent.planet.galaxy:keyword +Milky Way +Milky Way +Milky Way +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec index fc8c48afdf8cc..925c08f317125 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec @@ -69,6 +69,33 @@ ROW left = "left", foo = "foo", client_ip = "172.21.0.5", env = "env", right = " left:keyword | client_ip:keyword | env:keyword | right:keyword | foo:keyword ; +shadowingSubfields#[skip:-8.13.99, reason:ENRICH extended in 8.14.0] +required_capability: enrich_load +FROM addresses +| KEEP city.country.continent.planet.name, city.country.name, city.name +| EVAL city.name = REPLACE(city.name, "San Francisco", "South San Francisco") +| ENRICH city_names ON city.name WITH city.country.continent.planet.name = airport +| SORT city.name +; + +city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:text +Netherlands | Amsterdam | null +United States of America | South San Francisco | San Francisco Int'l +Japan | Tokyo | null +; + +shadowingSubfieldsLimit0#[skip:-8.13.99, reason:ENRICH extended in 8.14.0] +FROM addresses +| KEEP city.country.continent.planet.name, city.country.name, city.name +| EVAL city.name = REPLACE(city.name, "San Francisco", "South San Francisco") +| ENRICH city_names ON city.name WITH city.country.continent.planet.name = airport +| SORT city.name +| LIMIT 0 +; + +city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:text +; + shadowingSelf required_capability: enrich_load ROW left = "left", client_ip = "172.21.0.5", env = "env", right = "right" @@ -107,6 +134,82 @@ ROW left = "left", airport = "Zurich Airport ZRH", city = "Zürich", middle = "m left:keyword | city:keyword | middle:keyword | right:keyword | airport:text | region:text | city_boundary:geo_shape ; +shadowingInternal#[skip:-8.13.99, reason:ENRICH extended in 8.14.0] +required_capability: enrich_load +ROW city = "Zürich" +| ENRICH city_names ON city WITH x = airport, x = region +; + +city:keyword | x:text +Zürich | Bezirk Zürich +; + +shadowingInternalImplicit#[skip:-8.13.99, reason:ENRICH extended in 8.14.0] +required_capability: enrich_load +ROW city = "Zürich" +| ENRICH city_names ON city WITH airport = region +; + +city:keyword | airport:text +Zürich | Bezirk Zürich +; + +shadowingInternalImplicit2#[skip:-8.13.99, reason:ENRICH extended in 8.14.0] +required_capability: enrich_load +ROW city = "Zürich" +| ENRICH city_names ON city WITH airport, airport = region +; + +city:keyword | airport:text +Zürich | Bezirk Zürich +; + +shadowingInternalImplicit3#[skip:-8.13.99, reason:ENRICH extended in 8.14.0] +required_capability: enrich_load +ROW city = "Zürich" +| ENRICH city_names ON city WITH airport = region, airport +; + +city:keyword | airport:text +Zürich | Zurich Int'l +; + +shadowingWhenPushedDownPastRename +required_capability: enrich_load +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", airport = "ZRH" +| RENAME airport AS a +| ENRICH city_names ON city WITH airport +; + +city:keyword | a:keyword | airport:text +Zürich | ZRH | Zurich Int'l +; + +shadowingWhenPushedDownPastRename2 +required_capability: enrich_load +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", airport = "ZRH" +| RENAME airport AS a +| ENRICH city_names ON city WITH airport, region +; + +city:keyword | a:keyword | airport:text | region:text +Zürich | ZRH | Zurich Int'l | Bezirk Zürich +; + +shadowingWhenPushedDownPastRename3 +required_capability: enrich_load +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", airport = "ZRH" +| RENAME city as c +| ENRICH city_names ON c WITH city = airport +; + +c:keyword | airport:keyword | city:text +Zürich | ZRH | Zurich Int'l +; + simple required_capability: enrich_load diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec index 3df3b85e5e3af..61a0ccd4af0c5 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec @@ -15,6 +15,19 @@ left:keyword | right:keyword | x:integer left | right | 1 ; +shadowingSubfields#[skip:-8.13.3,reason:fixed in 8.13] +FROM addresses +| KEEP city.country.continent.planet.name, city.country.name, city.name +| EVAL city.country.continent.planet.name = to_upper(city.country.continent.planet.name) +| SORT city.name +; + +city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword +Netherlands | Amsterdam | EARTH +United States of America | San Francisco | EARTH +Japan | Tokyo | EARTH +; + shadowingSelf ROW left = "left", x = 10000 , right = "right" | EVAL x = x + 1 @@ -33,6 +46,55 @@ left:keyword | middle:keyword | right:keyword | x:integer | y:integer left | middle | right | 9 | 10 ; +shadowingInternal +ROW x = 10000 +| EVAL x = x + 1, x = x - 2 +; + +x:integer +9999 +; + +shadowingWhenPushedDownPastRename +required_capability: fixed_pushdown_past_project +FROM employees +| WHERE emp_no < 10002 +| KEEP emp_no, languages +| RENAME emp_no AS z +| EVAL emp_no = 3 +; + +z:integer | languages:integer | emp_no:integer + 10001 | 2 | 3 +; + +shadowingWhenPushedDownPastRename2 +required_capability: fixed_pushdown_past_project +FROM employees +| WHERE emp_no < 10002 +| KEEP emp_no, languages +| RENAME emp_no AS z +| EVAL emp_no = z + 1, emp_no = emp_no + languages, a = 0, languages = -1 +; + +z:integer | emp_no:integer | a:integer | languages:integer + 10001 | 10004 | 0 | -1 +; + +shadowingWhenPushedDownPastRename3 +required_capability: fixed_pushdown_past_project +FROM employees +| WHERE emp_no < 10002 +| KEEP emp_no, languages +| RENAME emp_no AS z +| EVAL emp_no = z + 1 +; + +z:integer | languages:integer | emp_no:integer + 10001 | 2 | 10002 +; + + withMath row a = 1 | eval b = 2 + 3; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/floats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/floats.csv-spec index 2ee7f783b7e97..537b69547c6be 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/floats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/floats.csv-spec @@ -1,4 +1,11 @@ // Floating point types-specific tests +parseLargeMagnitudeValues +required_capability: fix_parsing_large_negative_numbers +row a = 92233720368547758090, b = -9223372036854775809; + +a:double | b:double +9.223372036854776E+19 | -9.223372036854776E+18 +; inDouble from employees | keep emp_no, height, height.float, height.half_float, height.scaled_float | where height in (2.03, 2.0299999713897705, 2.029296875, 2.0300000000000002) | sort emp_no; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec index 9d574eed7be6b..98c88d06caa75 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec @@ -26,6 +26,19 @@ first_name:keyword | left:keyword | full_name:keyword | right:keyword | last_nam Georgi | left | Georgi Facello | right | Facello ; +shadowingSubfields +FROM addresses +| KEEP city.country.continent.planet.name, city.country.name, city.name +| GROK city.name "%{WORD:city.country.continent.planet.name} %{WORD}" +| SORT city.name +; + +city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword +Netherlands | Amsterdam | null +United States of America | San Francisco | San +Japan | Tokyo | null +; + shadowingSelf FROM employees | KEEP first_name, last_name @@ -50,6 +63,51 @@ last_name:keyword | left:keyword | foo:keyword | middle:keyword | ri Facello | left | Georgi1 Georgi2 Facello | middle | right | Georgi1 | Georgi2 | Facello ; +shadowingInternal +FROM addresses +| KEEP city.name, zip_code +| GROK zip_code "%{WORD:zip_parts} %{WORD:zip_parts}" +| SORT city.name +; + +city.name:keyword | zip_code:keyword | zip_parts:keyword +Amsterdam | 1016 ED | ["1016", "ED"] +San Francisco | CA 94108 | ["CA", "94108"] +Tokyo | 100-7014 | null +; + +shadowingWhenPushedDownPastRename +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", long_city_name = "Zürich, the largest city in Switzerland" +| RENAME city AS c +| GROK long_city_name "Zürich, the %{WORD:city} %{WORD:city} %{WORD:city} %{WORD:city}" +; + +c:keyword | long_city_name:keyword | city:keyword +Zürich | Zürich, the largest city in Switzerland | ["largest", "city", "in", "Switzerland"] +; + +shadowingWhenPushedDownPastRename2 +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", long_city_name = "Zürich, the largest city in Switzerland" +| RENAME city AS c +| GROK long_city_name "Zürich, the %{WORD:city} %{WORD:foo} %{WORD:city} %{WORD:foo}" +; + +c:keyword | long_city_name:keyword | city:keyword | foo:keyword +Zürich | Zürich, the largest city in Switzerland | ["largest", "in"] | ["city", "Switzerland"] +; + +shadowingWhenPushedDownPastRename3 +required_capability: fixed_pushdown_past_project +ROW city = "Zürich", long_city_name = "Zürich, the largest city in Switzerland" +| RENAME long_city_name AS c +| GROK c "Zürich, the %{WORD:long_city_name} %{WORD:long_city_name} %{WORD:long_city_name} %{WORD:long_city_name}" +; + +city:keyword | c:keyword | long_city_name:keyword +Zürich | Zürich, the largest city in Switzerland | ["largest", "city", "in", "Switzerland"] +; complexPattern ROW a = "1953-01-23T12:15:00Z 127.0.0.1 some.email@foo.com 42" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec index 14a3807b8729c..6bc534a9fd918 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec @@ -539,3 +539,63 @@ c:i 1 1 ; + +shadowingInternal#[skip:-8.13.3,reason:fixed in 8.13] +FROM employees +| SORT emp_no ASC +| KEEP last_name, emp_no, last_name +| LIMIT 2 +; + +emp_no:integer | last_name:keyword + 10001 | Facello + 10002 | Simmel +; + +shadowingInternalWildcard#[skip:-8.13.3,reason:fixed in 8.13] +FROM employees +| SORT emp_no ASC +| KEEP last*name, emp_no, last*name, first_name, last*, gender, last* +| LIMIT 2 +; + +emp_no:integer | first_name:keyword | gender:keyword | last_name:keyword + 10001 | Georgi | M | Facello + 10002 | Bezalel | F | Simmel +; + +shadowingInternalWildcardAndExplicit#[skip:-8.13.3,reason:fixed in 8.13] +FROM employees +| SORT emp_no ASC +| KEEP last*name, emp_no, last_name, first_name, last*, languages, last_name, gender, last*name +| LIMIT 2 +; + +emp_no:integer | first_name:keyword | languages:integer | last_name:keyword | gender:keyword + 10001 | Georgi | 2 | Facello | M + 10002 | Bezalel | 5 | Simmel | F +; + +shadowingSubfields#[skip:-8.13.3,reason:fixed in 8.13] +FROM addresses +| KEEP city.country.continent.planet.name, city.country.continent.name, city.country.name, city.name, city.country.continent.planet.name +| SORT city.name +; + +city.country.continent.name:keyword | city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword +Europe | Netherlands | Amsterdam | Earth +North America | United States of America | San Francisco | Earth +Asia | Japan | Tokyo | Earth +; + +shadowingSubfieldsWildcard#[skip:-8.13.3,reason:fixed in 8.13] +FROM addresses +| KEEP *name, city.country.continent.planet.name +| SORT city.name +; + +city.country.continent.name:keyword | city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword +Europe | Netherlands | Amsterdam | Earth +North America | United States of America | San Francisco | Earth +Asia | Japan | Tokyo | Earth +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-addresses.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-addresses.json new file mode 100644 index 0000000000000..679efb3c8d38b --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-addresses.json @@ -0,0 +1,44 @@ +{ + "properties" : { + "street" : { + "type": "keyword" + }, + "number" : { + "type": "keyword" + }, + "zip_code": { + "type": "keyword" + }, + "city" : { + "properties": { + "name": { + "type": "keyword" + }, + "country": { + "properties": { + "name": { + "type": "keyword" + }, + "continent": { + "properties": { + "name": { + "type": "keyword" + }, + "planet": { + "properties": { + "name": { + "type": "keyword" + }, + "galaxy": { + "type": "keyword" + } + } + } + } + } + } + } + } + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec index 1e830486cc7c7..ca4c627cae749 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec @@ -174,3 +174,42 @@ avg_worked_seconds:l | birth_date:date | emp_no:i | first_n 341158890 | 1961-10-15T00:00:00.000Z | 10060 | Breannda | M | 1.42 | 1.4199999570846558 | 1.419921875 | 1.42 | 1987-11-02T00:00:00.000Z | [false, false, false, true]| [Business Analyst, Data Scientist, Senior Team Lead] | 2 | 2 | 2 | 2 | Billingsley | 29175 | [-1.76, -0.85] | [-1, 0] | [-0.85, -1.76] | [-1, 0] | true | 29175 246355863 | null | 10042 | Magy | F | 1.44 | 1.440000057220459 | 1.4404296875 | 1.44 | 1993-03-21T00:00:00.000Z | null | [Architect, Business Analyst, Internship, Junior Developer] | 3 | 3 | 3 | 3 | Stamatiou | 30404 | [-9.28, 9.42] | [-9, 9] | [-9.28, 9.42] | [-9, 9] | true | 30404 ; + +shadowing +FROM employees +| SORT emp_no ASC +| KEEP emp_no, first_name, last_name +| RENAME emp_no AS last_name +| LIMIT 2 +; + +last_name:integer | first_name:keyword + 10001 | Georgi + 10002 | Bezalel +; + +shadowingSubfields +FROM addresses +| KEEP city.country.continent.planet.name, city.country.continent.name, city.country.name, city.name +| RENAME city.name AS city.country.continent.planet.name, city.country.name AS city.country.continent.name +| SORT city.country.continent.planet.name +; + +city.country.continent.name:keyword | city.country.continent.planet.name:keyword +Netherlands | Amsterdam +United States of America | San Francisco +Japan | Tokyo +; + +shadowingInternal +FROM employees +| SORT emp_no ASC +| KEEP emp_no, last_name +| RENAME emp_no AS x, last_name AS x +| LIMIT 2 +; + +x:keyword +Facello +Simmel +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec index bb1cf7358ca74..da640b6306299 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec @@ -36,6 +36,24 @@ a:integer // end::multivalue-result[] ; +shadowingInternal +required_capability: unique_names +ROW a = 1, a = 2; + +a:integer + 2 +; + +shadowingInternalSubfields +required_capability: unique_names +// Fun fact: "Sissi" is an actual exoplanet name, after the character from the movie with the same name. A.k.a. HAT-P-14 b. +ROW city.country.continent.planet.name = "Earth", city.country.continent.name = "Netherlands", city.country.continent.planet.name = "Sissi" +; + +city.country.continent.name:keyword | city.country.continent.planet.name:keyword +Netherlands | Sissi +; + unsignedLongLiteral ROW long_max = 9223372036854775807, ul_start = 9223372036854775808, ul_end = 18446744073709551615, double=18446744073709551616; @@ -70,10 +88,11 @@ a:integer | b:integer | c:null | z:integer ; evalRowWithNull2 +required_capability: unique_names row a = 1, null, b = 2, c = null, null | eval z = a+b; -a:integer | null:null | b:integer | c:null | null:null | z:integer -1 | null | 2 | null | null | 3 +a:integer | b:integer | c:null | null:null | z:integer + 1 | 2 | null | null | 3 ; evalRowWithNull3 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index e4fc0580e4ba2..923b3b8dacff1 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -575,6 +575,65 @@ ca:l | cx:l | l:i 1 | 1 | null ; +/////////////////////////////////////////////////////////////// +// Test edge case interaction with push down past a rename +// https://github.com/elastic/elasticsearch/issues/108008 +/////////////////////////////////////////////////////////////// + +countSameFieldWithEval +required_capability: fixed_pushdown_past_project +from employees | stats b = count(gender), c = count(gender) by gender | eval b = gender | sort c asc +; + +c:l | gender:s | b:s +0 | null | null +33 | F | F +57 | M | M +; + +countSameFieldWithDissect +required_capability: fixed_pushdown_past_project +from employees | stats b = count(gender), c = count(gender) by gender | dissect gender "%{b}" | sort c asc +; + +c:l | gender:s | b:s +0 | null | null +33 | F | F +57 | M | M +; + +countSameFieldWithGrok +required_capability: fixed_pushdown_past_project +from employees | stats b = count(gender), c = count(gender) by gender | grok gender "%{USERNAME:b}" | sort c asc +; + +c:l | gender:s | b:s +0 | null | null +33 | F | F +57 | M | M +; + +countSameFieldWithEnrich +required_capability: fixed_pushdown_past_project +required_capability: enrich_load +from employees | stats b = count(gender), c = count(gender) by gender | enrich languages_policy on gender with b = language_name | sort c asc +; + +c:l | gender:s | b:s +0 | null | null +33 | F | null +57 | M | null +; + +countSameFieldWithEnrichLimit0 +required_capability: fixed_pushdown_past_project +from employees | stats b = count(gender), c = count(gender) by gender | enrich languages_policy on gender with b = language_name | sort c asc | limit 0 +; + +c:l | gender:s | b:s +; +/////////////////////////////////////////////////////////////// + aggsWithoutStats from employees | stats by gender | sort gender; @@ -1819,3 +1878,49 @@ warning:Line 3:17: java.lang.ArithmeticException: / by zero w_avg:double null ; + +shadowingInternal +FROM employees +| STATS x = MAX(emp_no), x = MIN(emp_no) +; + +x:integer +10001 +; + +shadowingInternalWithGroup#[skip:-8.14.1,reason:implemented in 8.14] +FROM employees +| STATS x = MAX(emp_no), x = MIN(emp_no) BY x = gender +| SORT x ASC +; + +x:keyword +F +M +null +; + +shadowingInternalWithGroup2#[skip:-8.14.1,reason:implemented in 8.14] +FROM employees +| STATS x = MAX(emp_no), y = count(x) BY x = emp_no, x = gender +| SORT x ASC +; + +y:long | x:keyword + 33 | F + 57 | M + 0 | null +; + + +shadowingTheGroup +FROM employees +| STATS gender = MAX(emp_no), gender = MIN(emp_no) BY gender +| SORT gender ASC +; + +gender:keyword +F +M +null +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec index ee8c4be385e0f..bf7e01f2eb823 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec @@ -45,8 +45,10 @@ FROM sample_data_ts_long ; singleIndexIpStats +required_capability: casting_operator + FROM sample_data -| EVAL client_ip = TO_IP(client_ip) +| EVAL client_ip = client_ip::ip | STATS count=count(*) BY client_ip | SORT count DESC, client_ip ASC | KEEP count, client_ip @@ -60,8 +62,10 @@ count:long | client_ip:ip ; singleIndexIpStringStats +required_capability: casting_operator + FROM sample_data_str -| EVAL client_ip = TO_IP(client_ip) +| EVAL client_ip = client_ip::ip | STATS count=count(*) BY client_ip | SORT count DESC, client_ip ASC | KEEP count, client_ip @@ -74,12 +78,29 @@ count:long | client_ip:ip 1 | 172.21.2.162 ; +singleIndexIpStringStatsInline +required_capability: casting_operator + +FROM sample_data_str +| STATS count=count(*) BY client_ip::ip +| STATS mc=count(count) BY count +| SORT mc DESC, count ASC +| KEEP mc, count +; + +mc:l | count:l +3 | 1 +1 | 4 +; + multiIndexIpString required_capability: union_types required_capability: metadata_fields +required_capability: casting_operator +required_capability: union_types_remove_fields FROM sample_data, sample_data_str METADATA _index -| EVAL client_ip = TO_IP(client_ip) +| EVAL client_ip = client_ip::ip | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC ; @@ -104,9 +125,11 @@ sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 multiIndexIpStringRename required_capability: union_types required_capability: metadata_fields +required_capability: casting_operator +required_capability: union_types_remove_fields FROM sample_data, sample_data_str METADATA _index -| EVAL host_ip = TO_IP(client_ip) +| EVAL host_ip = client_ip::ip | KEEP _index, @timestamp, host_ip, event_duration, message | SORT _index ASC, @timestamp DESC ; @@ -131,6 +154,7 @@ sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 multiIndexIpStringRenameToString required_capability: union_types required_capability: metadata_fields +required_capability: union_types_remove_fields FROM sample_data, sample_data_str METADATA _index | EVAL host_ip = TO_STRING(TO_IP(client_ip)) @@ -158,6 +182,7 @@ sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 multiIndexWhereIpString required_capability: union_types required_capability: metadata_fields +required_capability: union_types_remove_fields FROM sample_data, sample_data_str METADATA _index | WHERE STARTS_WITH(TO_STRING(client_ip), "172.21.2") @@ -175,6 +200,7 @@ sample_data_str | 2023-10-23T12:15:03.360Z | 3450233 | Connected multiIndexWhereIpStringLike required_capability: union_types required_capability: metadata_fields +required_capability: union_types_remove_fields FROM sample_data, sample_data_str METADATA _index | WHERE TO_STRING(client_ip) LIKE "172.21.2.*" @@ -189,11 +215,42 @@ sample_data_str | 2023-10-23T12:27:28.948Z | 2764889 | Connected sample_data_str | 2023-10-23T12:15:03.360Z | 3450233 | Connected to 10.1.0.3 ; +multiIndexSortIpString +required_capability: union_types +required_capability: casting_operator +required_capability: union_types_remove_fields + +FROM sample_data, sample_data_str +| SORT client_ip::ip +| LIMIT 1 +; + +@timestamp:date | client_ip:null | event_duration:long | message:keyword +2023-10-23T13:33:34.937Z | null | 1232382 | Disconnected +; + +multiIndexSortIpStringEval +required_capability: union_types +required_capability: casting_operator +required_capability: union_types_remove_fields + +FROM sample_data, sample_data_str +| SORT client_ip::ip, @timestamp ASC +| EVAL client_ip_as_ip = client_ip::ip +| LIMIT 1 +; + +@timestamp:date | client_ip:null | event_duration:long | message:keyword | client_ip_as_ip:ip +2023-10-23T13:33:34.937Z | null | 1232382 | Disconnected | 172.21.0.5 +; + multiIndexIpStringStats required_capability: union_types +required_capability: casting_operator +required_capability: union_types_remove_fields FROM sample_data, sample_data_str -| EVAL client_ip = TO_IP(client_ip) +| EVAL client_ip = client_ip::ip | STATS count=count(*) BY client_ip | SORT count DESC, client_ip ASC | KEEP count, client_ip @@ -208,9 +265,11 @@ count:long | client_ip:ip multiIndexIpStringRenameStats required_capability: union_types +required_capability: casting_operator +required_capability: union_types_remove_fields FROM sample_data, sample_data_str -| EVAL host_ip = TO_IP(client_ip) +| EVAL host_ip = client_ip::ip | STATS count=count(*) BY host_ip | SORT count DESC, host_ip ASC | KEEP count, host_ip @@ -225,6 +284,7 @@ count:long | host_ip:ip multiIndexIpStringRenameToStringStats required_capability: union_types +required_capability: union_types_remove_fields FROM sample_data, sample_data_str | EVAL host_ip = TO_STRING(TO_IP(client_ip)) @@ -240,6 +300,24 @@ count:long | host_ip:keyword 2 | 172.21.2.162 ; +multiIndexIpStringStatsDrop +required_capability: union_types +required_capability: union_types_agg_cast +required_capability: casting_operator + +FROM sample_data, sample_data_str +| STATS count=count(*) BY client_ip::ip +| KEEP count +| SORT count DESC +; + +count:long +8 +2 +2 +2 +; + multiIndexIpStringStatsInline required_capability: union_types required_capability: union_types_inline_fix @@ -257,8 +335,42 @@ count:long | client_ip:ip 2 | 172.21.2.162 ; +multiIndexIpStringStatsInline2 +required_capability: union_types +required_capability: union_types_agg_cast +required_capability: casting_operator + +FROM sample_data, sample_data_str +| STATS count=count(*) BY client_ip::ip +| SORT count DESC, `client_ip::ip` ASC +; + +count:long | client_ip::ip:ip +8 | 172.21.3.15 +2 | 172.21.0.5 +2 | 172.21.2.113 +2 | 172.21.2.162 +; + +multiIndexIpStringStatsInline3 +required_capability: union_types +required_capability: union_types_agg_cast +required_capability: casting_operator + +FROM sample_data, sample_data_str +| STATS count=count(*) BY client_ip::ip +| STATS mc=count(count) BY count +| SORT mc DESC, count ASC +; + +mc:l | count:l +3 | 2 +1 | 8 +; + multiIndexWhereIpStringStats required_capability: union_types +required_capability: union_types_remove_fields FROM sample_data, sample_data_str | WHERE STARTS_WITH(TO_STRING(client_ip), "172.21.2") @@ -275,6 +387,7 @@ count:long | message:keyword multiIndexTsLong required_capability: union_types required_capability: metadata_fields +required_capability: union_types_remove_fields FROM sample_data, sample_data_ts_long METADATA _index | EVAL @timestamp = TO_DATETIME(@timestamp) @@ -302,6 +415,7 @@ sample_data_ts_long | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 multiIndexTsLongRename required_capability: union_types required_capability: metadata_fields +required_capability: union_types_remove_fields FROM sample_data, sample_data_ts_long METADATA _index | EVAL ts = TO_DATETIME(@timestamp) @@ -329,6 +443,7 @@ sample_data_ts_long | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 multiIndexTsLongRenameToString required_capability: union_types required_capability: metadata_fields +required_capability: union_types_remove_fields FROM sample_data, sample_data_ts_long METADATA _index | EVAL ts = TO_STRING(TO_DATETIME(@timestamp)) @@ -356,6 +471,7 @@ sample_data_ts_long | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 multiIndexWhereTsLong required_capability: union_types required_capability: metadata_fields +required_capability: union_types_remove_fields FROM sample_data, sample_data_ts_long METADATA _index | WHERE TO_LONG(@timestamp) < 1698068014937 @@ -372,6 +488,7 @@ sample_data_ts_long | 172.21.2.162 | 3450233 | Connected to 10. multiIndexTsLongStats required_capability: union_types +required_capability: union_types_remove_fields FROM sample_data, sample_data_ts_long | EVAL @timestamp = DATE_TRUNC(1 hour, TO_DATETIME(@timestamp)) @@ -385,8 +502,80 @@ count:long | @timestamp:date 4 | 2023-10-23T12:00:00.000Z ; +multiIndexTsLongStatsDrop +required_capability: union_types +required_capability: union_types_agg_cast +required_capability: casting_operator + +FROM sample_data, sample_data_ts_long +| STATS count=count(*) BY @timestamp::datetime +| KEEP count +; + +count:long +2 +2 +2 +2 +2 +2 +2 +; + +multiIndexTsLongStatsInline2 +required_capability: union_types +required_capability: union_types_agg_cast +required_capability: casting_operator + +FROM sample_data, sample_data_ts_long +| STATS count=count(*) BY @timestamp::datetime +| SORT count DESC, `@timestamp::datetime` DESC +; + +count:long | @timestamp::datetime:datetime +2 | 2023-10-23T13:55:01.543Z +2 | 2023-10-23T13:53:55.832Z +2 | 2023-10-23T13:52:55.015Z +2 | 2023-10-23T13:51:54.732Z +2 | 2023-10-23T13:33:34.937Z +2 | 2023-10-23T12:27:28.948Z +2 | 2023-10-23T12:15:03.360Z +; + +multiIndexTsLongStatsInline3 +required_capability: union_types +required_capability: union_types_agg_cast +required_capability: casting_operator + +FROM sample_data, sample_data_ts_long +| STATS count=count(*) BY @timestamp::datetime +| STATS mc=count(count) BY count +| SORT mc DESC, count ASC +; + +mc:l | count:l +7 | 2 +; + +multiIndexTsLongStatsStats +required_capability: union_types +required_capability: union_types_agg_cast +required_capability: union_types_remove_fields + +FROM sample_data, sample_data_ts_long +| EVAL ts = TO_STRING(@timestamp) +| STATS count = COUNT(*) BY ts +| STATS mc = COUNT(count) BY count +| SORT mc DESC, count ASC +; + +mc:l | count:l +14 | 1 +; + multiIndexTsLongRenameStats required_capability: union_types +required_capability: union_types_remove_fields FROM sample_data, sample_data_ts_long | EVAL hour = DATE_TRUNC(1 hour, TO_DATETIME(@timestamp)) @@ -402,6 +591,7 @@ count:long | hour:date multiIndexTsLongRenameToDatetimeToStringStats required_capability: union_types +required_capability: union_types_remove_fields FROM sample_data, sample_data_ts_long | EVAL hour = LEFT(TO_STRING(TO_DATETIME(@timestamp)), 13) @@ -417,6 +607,7 @@ count:long | hour:keyword multiIndexTsLongRenameToStringStats required_capability: union_types +required_capability: union_types_remove_fields FROM sample_data, sample_data_ts_long | EVAL mess = LEFT(TO_STRING(@timestamp), 7) @@ -435,6 +626,7 @@ count:long | mess:keyword multiIndexTsLongStatsInline required_capability: union_types +required_capability: union_types_remove_fields FROM sample_data, sample_data_ts_long | STATS count=COUNT(*), max=MAX(TO_DATETIME(@timestamp)) @@ -459,6 +651,7 @@ count:long multiIndexWhereTsLongStats required_capability: union_types +required_capability: union_types_remove_fields FROM sample_data, sample_data_ts_long | WHERE TO_LONG(@timestamp) < 1698068014937 @@ -475,6 +668,7 @@ count:long | message:keyword multiIndexIpStringTsLong required_capability: union_types required_capability: metadata_fields +required_capability: union_types_remove_fields FROM sample_data* METADATA _index | EVAL @timestamp = TO_DATETIME(@timestamp), client_ip = TO_IP(client_ip) @@ -543,6 +737,7 @@ sample_data_ts_long | 8268153 | Connection error multiIndexIpStringTsLongRename required_capability: union_types required_capability: metadata_fields +required_capability: union_types_remove_fields FROM sample_data* METADATA _index | EVAL ts = TO_DATETIME(@timestamp), host_ip = TO_IP(client_ip) @@ -611,6 +806,7 @@ sample_data_ts_long | 8268153 | Connection error multiIndexIpStringTsLongRenameToString required_capability: union_types required_capability: metadata_fields +required_capability: union_types_remove_fields FROM sample_data* METADATA _index | EVAL ts = TO_STRING(TO_DATETIME(@timestamp)), host_ip = TO_STRING(TO_IP(client_ip)) @@ -645,6 +841,7 @@ sample_data_ts_long | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 multiIndexWhereIpStringTsLong required_capability: union_types required_capability: metadata_fields +required_capability: union_types_remove_fields FROM sample_data* METADATA _index | WHERE TO_LONG(@timestamp) < 1698068014937 AND TO_STRING(client_ip) == "172.21.2.162" @@ -660,6 +857,7 @@ sample_data_ts_long | 3450233 | Connected to 10.1.0.3 multiIndexWhereIpStringTsLongStats required_capability: union_types +required_capability: union_types_remove_fields FROM sample_data* | WHERE TO_LONG(@timestamp) < 1698068014937 AND TO_STRING(client_ip) == "172.21.2.162" @@ -675,6 +873,7 @@ count:long | message:keyword multiIndexWhereIpStringLikeTsLong required_capability: union_types required_capability: metadata_fields +required_capability: union_types_remove_fields FROM sample_data* METADATA _index | WHERE TO_LONG(@timestamp) < 1698068014937 AND TO_STRING(client_ip) LIKE "172.21.2.16?" @@ -690,6 +889,7 @@ sample_data_ts_long | 3450233 | Connected to 10.1.0.3 multiIndexWhereIpStringLikeTsLongStats required_capability: union_types +required_capability: union_types_remove_fields FROM sample_data* | WHERE TO_LONG(@timestamp) < 1698068014937 AND TO_STRING(client_ip) LIKE "172.21.2.16?" @@ -705,6 +905,7 @@ count:long | message:keyword multiIndexMultiColumnTypesRename required_capability: union_types required_capability: metadata_fields +required_capability: union_types_remove_fields FROM sample_data* METADATA _index | WHERE event_duration > 8000000 @@ -717,3 +918,276 @@ null | null | 8268153 | Connection error | samp null | null | 8268153 | Connection error | sample_data_str | 2023-10-23T13:52:55.015Z | 2023-10-23T13:52:55.015Z | 1698069175015 | 172.21.3.15 | 172.21.3.15 null | null | 8268153 | Connection error | sample_data_ts_long | 2023-10-23T13:52:55.015Z | 1698069175015 | 1698069175015 | 172.21.3.15 | 172.21.3.15 ; + +multiIndexMultiColumnTypesRenameAndKeep +required_capability: union_types +required_capability: metadata_fields +required_capability: union_types_remove_fields + +FROM sample_data* METADATA _index +| WHERE event_duration > 8000000 +| EVAL ts = TO_DATETIME(@timestamp), ts_str = TO_STRING(@timestamp), ts_l = TO_LONG(@timestamp), ip = TO_IP(client_ip), ip_str = TO_STRING(client_ip) +| KEEP _index, ts, ts_str, ts_l, ip, ip_str, event_duration +| SORT _index ASC, ts DESC +; + +_index:keyword | ts:date | ts_str:keyword | ts_l:long | ip:ip | ip_str:k | event_duration:long +sample_data | 2023-10-23T13:52:55.015Z | 2023-10-23T13:52:55.015Z | 1698069175015 | 172.21.3.15 | 172.21.3.15 | 8268153 +sample_data_str | 2023-10-23T13:52:55.015Z | 2023-10-23T13:52:55.015Z | 1698069175015 | 172.21.3.15 | 172.21.3.15 | 8268153 +sample_data_ts_long | 2023-10-23T13:52:55.015Z | 1698069175015 | 1698069175015 | 172.21.3.15 | 172.21.3.15 | 8268153 +; + +multiIndexMultiColumnTypesRenameAndDrop +required_capability: union_types +required_capability: metadata_fields +required_capability: union_types_remove_fields + +FROM sample_data* METADATA _index +| WHERE event_duration > 8000000 +| EVAL ts = TO_DATETIME(@timestamp), ts_str = TO_STRING(@timestamp), ts_l = TO_LONG(@timestamp), ip = TO_IP(client_ip), ip_str = TO_STRING(client_ip) +| DROP @timestamp, client_ip, message +| SORT _index ASC, ts DESC +; + +event_duration:long | _index:keyword | ts:date | ts_str:keyword | ts_l:long | ip:ip | ip_str:k +8268153 | sample_data | 2023-10-23T13:52:55.015Z | 2023-10-23T13:52:55.015Z | 1698069175015 | 172.21.3.15 | 172.21.3.15 +8268153 | sample_data_str | 2023-10-23T13:52:55.015Z | 2023-10-23T13:52:55.015Z | 1698069175015 | 172.21.3.15 | 172.21.3.15 +8268153 | sample_data_ts_long | 2023-10-23T13:52:55.015Z | 1698069175015 | 1698069175015 | 172.21.3.15 | 172.21.3.15 +; + + +multiIndexIndirectUseOfUnionTypesInSort +// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution: +// make the csv tests work with multiple indices. +required_capability: union_types +FROM sample_data, sample_data_ts_long +| SORT client_ip ASC +| LIMIT 1 +; + +@timestamp:null | client_ip:ip | event_duration:long | message:keyword + null | 172.21.0.5 | 1232382 | Disconnected +; + +multiIndexIndirectUseOfUnionTypesInEval +// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution: +// make the csv tests work with multiple indices. +required_capability: union_types +FROM sample_data, sample_data_ts_long +| EVAL foo = event_duration > 1232381 +| SORT client_ip ASC +| LIMIT 1 +; + +@timestamp:null | client_ip:ip | event_duration:long | message:keyword | foo:boolean + null | 172.21.0.5 | 1232382 | Disconnected | true +; + +multiIndexIndirectUseOfUnionTypesInRename +// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution: +// make the csv tests work with multiple indices. +required_capability: union_types +required_capability: union_types_fix_rename_resolution +FROM sample_data, sample_data_ts_long +| RENAME message AS event_message +| SORT client_ip ASC +| LIMIT 1 +; + +@timestamp:null | client_ip:ip | event_duration:long | event_message:keyword + null | 172.21.0.5 | 1232382 | Disconnected +; + +multiIndexIndirectUseOfUnionTypesInKeep +// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution: +// make the csv tests work with multiple indices. +required_capability: union_types +FROM sample_data, sample_data_ts_long +| KEEP client_ip, event_duration, message +| SORT client_ip ASC +| LIMIT 1 +; + +client_ip:ip | event_duration:long | message:keyword +172.21.0.5 | 1232382 | Disconnected +; + +multiIndexIndirectUseOfUnionTypesInWildcardKeep +// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution: +// make the csv tests work with multiple indices. +required_capability: union_types +required_capability: union_types_fix_rename_resolution +FROM sample_data, sample_data_ts_long +| KEEP * +| SORT client_ip ASC +| LIMIT 1 +; + +@timestamp:null | client_ip:ip | event_duration:long | message:keyword + null | 172.21.0.5 | 1232382 | Disconnected +; + +multiIndexIndirectUseOfUnionTypesInWildcardKeep2 +// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution: +// make the csv tests work with multiple indices. +required_capability: union_types +required_capability: union_types_fix_rename_resolution +FROM sample_data, sample_data_ts_long +| KEEP *e* +| SORT client_ip ASC +| LIMIT 1 +; + +@timestamp:null | client_ip:ip | event_duration:long | message:keyword + null | 172.21.0.5 | 1232382 | Disconnected +; + + +multiIndexUseOfUnionTypesInKeep +// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution: +// make the csv tests work with multiple indices. +required_capability: union_types +required_capability: union_types_fix_rename_resolution +FROM sample_data, sample_data_ts_long +| KEEP @timestamp +| LIMIT 1 +; + +@timestamp:null +null +; + +multiIndexUseOfUnionTypesInDrop +// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution: +// make the csv tests work with multiple indices. +required_capability: union_types +required_capability: union_types_fix_rename_resolution +FROM sample_data, sample_data_ts_long +| DROP @timestamp +| SORT client_ip ASC +| LIMIT 1 +; + +client_ip:ip | event_duration:long | message:keyword +172.21.0.5 | 1232382 | Disconnected +; + +multiIndexIndirectUseOfUnionTypesInWildcardDrop +// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution: +// make the csv tests work with multiple indices. +required_capability: union_types +required_capability: union_types_fix_rename_resolution +FROM sample_data, sample_data_ts_long +| DROP *time* +| SORT client_ip ASC +| LIMIT 1 +; + +client_ip:ip | event_duration:long | message:keyword +172.21.0.5 | 1232382 | Disconnected +; + +multiIndexIndirectUseOfUnionTypesInWhere +// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution: +// make the csv tests work with multiple indices. +required_capability: union_types +FROM sample_data, sample_data_ts_long +| WHERE message == "Disconnected" +; + +@timestamp:null | client_ip:ip | event_duration:long | message:keyword + null | 172.21.0.5 | 1232382 | Disconnected + null | 172.21.0.5 | 1232382 | Disconnected +; + +multiIndexIndirectUseOfUnionTypesInDissect +// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution: +// make the csv tests work with multiple indices. +required_capability: union_types +FROM sample_data, sample_data_ts_long +| DISSECT message "%{foo}" +| SORT client_ip ASC +| LIMIT 1 +; + +@timestamp:null | client_ip:ip | event_duration:long | message:keyword | foo:keyword + null | 172.21.0.5 | 1232382 | Disconnected | Disconnected +; + +multiIndexIndirectUseOfUnionTypesInGrok +// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution: +// make the csv tests work with multiple indices. +required_capability: union_types +FROM sample_data, sample_data_ts_long +| GROK message "%{WORD:foo}" +| SORT client_ip ASC +| LIMIT 1 +; + +@timestamp:null | client_ip:ip | event_duration:long | message:keyword | foo:keyword + null | 172.21.0.5 | 1232382 | Disconnected | Disconnected +; + +multiIndexIndirectUseOfUnionTypesInEnrich +// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution: +// make the csv tests work with multiple indices. +required_capability: union_types +required_capability: enrich_load +FROM sample_data, sample_data_ts_long +| EVAL client_ip = client_ip::keyword +| ENRICH clientip_policy ON client_ip WITH env +| SORT client_ip ASC +| LIMIT 1 +; + +@timestamp:null | event_duration:long | message:keyword | client_ip:keyword | env:keyword + null | 1232382 | Disconnected | 172.21.0.5 | Development +; + +multiIndexIndirectUseOfUnionTypesInStats +// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution: +// make the csv tests work with multiple indices. +required_capability: union_types +FROM sample_data, sample_data_ts_long +| STATS foo = max(event_duration) BY client_ip +| SORT client_ip ASC +; + +foo:long | client_ip:ip + 1232382 | 172.21.0.5 + 2764889 | 172.21.2.113 + 3450233 | 172.21.2.162 + 8268153 | 172.21.3.15 +; + +multiIndexIndirectUseOfUnionTypesInLookup +// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution: +// make the csv tests work with multiple indices. +required_capability: union_types +required_capability: lookup_v4 +FROM sample_data, sample_data_ts_long +| SORT client_ip ASC +| LIMIT 1 +| EVAL int = (event_duration - 1232380)::integer +| LOOKUP int_number_names ON int +; + +@timestamp:null | client_ip:ip | event_duration:long | message:keyword | int:integer | name:keyword + null | 172.21.0.5 | 1232382 | Disconnected | 2 | two +; + +multiIndexIndirectUseOfUnionTypesInMvExpand +// TODO: `union_types` is required only because this makes the test skip in the csv tests; better solution: +// make the csv tests work with multiple indices. +required_capability: union_types +FROM sample_data, sample_data_ts_long +| EVAL foo = MV_APPEND(message, message) +| SORT client_ip ASC +| LIMIT 1 +| MV_EXPAND foo +; + +@timestamp:null | client_ip:ip | event_duration:long | message:keyword | foo:keyword + null | 172.21.0.5 | 1232382 | Disconnected | Disconnected + null | 172.21.0.5 | 1232382 | Disconnected | Disconnected +; diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionTaskIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionTaskIT.java index 9778756176574..cde4f10ef556c 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionTaskIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionTaskIT.java @@ -59,6 +59,7 @@ import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.in; import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.hamcrest.Matchers.not; @@ -325,7 +326,7 @@ private void assertCancelled(ActionFuture response) throws Ex */ assertThat( cancelException.getMessage(), - either(equalTo("test cancel")).or(equalTo("task cancelled")).or(equalTo("request cancelled test cancel")) + in(List.of("test cancel", "task cancelled", "request cancelled test cancel", "parent task was cancelled [test cancel]")) ); assertBusy( () -> assertThat( diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 07362311d37a5..98ed8bcb9910c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -106,7 +106,52 @@ public enum Cap { /** * Support for WEIGHTED_AVG function. */ - AGG_WEIGHTED_AVG; + AGG_WEIGHTED_AVG, + + /** + * Fix for union-types when aggregating over an inline conversion with casting operator. Done in #110476. + */ + UNION_TYPES_AGG_CAST, + + /** + * Fix for union-types when aggregating over an inline conversion with conversion function. Done in #110652. + */ + UNION_TYPES_INLINE_FIX, + + /** + * Fix for union-types when sorting a type-casted field. We changed how we remove synthetic union-types fields. + */ + UNION_TYPES_REMOVE_FIELDS, + + /** + * Fix for union-types when renaming unrelated columns. + * https://github.com/elastic/elasticsearch/issues/111452 + */ + UNION_TYPES_FIX_RENAME_RESOLUTION, + + /** + * Fix a parsing issue where numbers below Long.MIN_VALUE threw an exception instead of parsing as doubles. + * see Parsing large numbers is inconsistent #104323 + */ + FIX_PARSING_LARGE_NEGATIVE_NUMBERS, + + /** + * Fix the status code returned when trying to run count_distinct on the _source type (which is not supported). + * see count_distinct(_source) returns a 500 response + */ + FIX_COUNT_DISTINCT_SOURCE_ERROR, + + /** + * Fix for non-unique attribute names in ROW and logical plans. + * https://github.com/elastic/elasticsearch/issues/110541 + */ + UNIQUE_NAMES, + + /** + * Make attributes of GROK/DISSECT adjustable and fix a shadowing bug when pushing them down past PROJECT. + * https://github.com/elastic/elasticsearch/issues/108008 + */ + FIXED_PUSHDOWN_PAST_PROJECT; private final boolean snapshotOnly; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 4fcd37faa311a..ded3e75174ae1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -10,6 +10,7 @@ import org.elasticsearch.common.logging.HeaderWarning; import org.elasticsearch.common.logging.LoggerMessageFormat; import org.elasticsearch.compute.data.Block; +import org.elasticsearch.logging.Logger; import org.elasticsearch.xpack.core.enrich.EnrichPolicy; import org.elasticsearch.xpack.esql.Column; import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; @@ -21,7 +22,6 @@ import org.elasticsearch.xpack.esql.core.common.Failure; import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Attribute; -import org.elasticsearch.xpack.esql.core.expression.AttributeMap; import org.elasticsearch.xpack.esql.core.expression.EmptyAttribute; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Expressions; @@ -42,6 +42,7 @@ import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.core.rule.ParameterizedRule; import org.elasticsearch.xpack.esql.core.rule.ParameterizedRuleExecutor; +import org.elasticsearch.xpack.esql.core.rule.Rule; import org.elasticsearch.xpack.esql.core.rule.RuleExecutor; import org.elasticsearch.xpack.esql.core.session.Configuration; import org.elasticsearch.xpack.esql.core.tree.Source; @@ -63,6 +64,7 @@ import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.DateTimeArithmeticOperation; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.EsqlArithmeticOperation; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In; +import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Drop; import org.elasticsearch.xpack.esql.plan.logical.Enrich; @@ -141,7 +143,7 @@ public class Analyzer extends ParameterizedRuleExecutor("Finish Analysis", Limiter.ONCE, new AddImplicitLimit(), new UnresolveUnionTypes()); + var finish = new Batch<>("Finish Analysis", Limiter.ONCE, new AddImplicitLimit(), new UnionTypesCleanup()); rules = List.of(init, resolution, finish); } @@ -217,13 +219,13 @@ private static List mappingAsAttributes(Source source, Map list, Source source, String parentName, Map mapping) { + private static void mappingAsAttributes(List list, Source source, FieldAttribute parent, Map mapping) { for (Map.Entry entry : mapping.entrySet()) { String name = entry.getKey(); EsField t = entry.getValue(); if (t != null) { - name = parentName == null ? name : parentName + "." + name; + name = parent == null ? name : parent.fieldName() + "." + name; var fieldProperties = t.getProperties(); var type = t.getDataType().widenSmallNumeric(); // due to a bug also copy the field since the Attribute hierarchy extracts the data type @@ -232,19 +234,16 @@ private static void mappingAsAttributes(List list, Source source, Str t = new EsField(t.getName(), type, t.getProperties(), t.isAggregatable(), t.isAlias()); } + FieldAttribute attribute = t instanceof UnsupportedEsField uef + ? new UnsupportedAttribute(source, name, uef) + : new FieldAttribute(source, parent, name, t); // primitive branch if (EsqlDataTypes.isPrimitive(type)) { - Attribute attribute; - if (t instanceof UnsupportedEsField uef) { - attribute = new UnsupportedAttribute(source, name, uef); - } else { - attribute = new FieldAttribute(source, null, name, t); - } list.add(attribute); } // allow compound object even if they are unknown (but not NESTED) if (type != NESTED && fieldProperties.isEmpty() == false) { - mappingAsAttributes(list, source, name, fieldProperties); + mappingAsAttributes(list, source, attribute, fieldProperties); } } } @@ -384,7 +383,7 @@ private LocalRelation tableMapAsRelation(Source source, Map mapT } } - private static class ResolveRefs extends BaseAnalyzerRule { + public static class ResolveRefs extends BaseAnalyzerRule { @Override protected LogicalPlan doRule(LogicalPlan plan) { if (plan.childrenResolved() == false) { @@ -437,6 +436,7 @@ private LogicalPlan resolveAggregate(Aggregate a, List childrenOutput // e.g. STATS a ... GROUP BY a = x + 1 Holder changed = new Holder<>(false); List groupings = a.groupings(); + List aggregates = a.aggregates(); // first resolve groupings since the aggs might refer to them // trying to globally resolve unresolved attributes will lead to some being marked as unresolvable if (Resolvables.resolved(groupings) == false) { @@ -456,16 +456,16 @@ private LogicalPlan resolveAggregate(Aggregate a, List childrenOutput } if (a.expressionsResolved() == false) { - AttributeMap resolved = new AttributeMap<>(); + ArrayList resolved = new ArrayList<>(); for (Expression e : groupings) { Attribute attr = Expressions.attribute(e); if (attr != null && attr.resolved()) { - resolved.put(attr, attr); + resolved.add(attr); } } - List resolvedList = NamedExpressions.mergeOutputAttributes(new ArrayList<>(resolved.keySet()), childrenOutput); - List newAggregates = new ArrayList<>(); + List resolvedList = NamedExpressions.mergeOutputAttributes(resolved, childrenOutput); + List newAggregates = new ArrayList<>(); for (NamedExpression aggregate : a.aggregates()) { var agg = (NamedExpression) aggregate.transformUp(UnresolvedAttribute.class, ua -> { Expression ne = ua; @@ -576,20 +576,28 @@ private LogicalPlan resolveLookup(Lookup l, List childrenOutput) { } private Attribute maybeResolveAttribute(UnresolvedAttribute ua, List childrenOutput) { + return maybeResolveAttribute(ua, childrenOutput, log); + } + + private static Attribute maybeResolveAttribute(UnresolvedAttribute ua, List childrenOutput, Logger logger) { if (ua.customMessage()) { return ua; } - return resolveAttribute(ua, childrenOutput); + return resolveAttribute(ua, childrenOutput, logger); } private Attribute resolveAttribute(UnresolvedAttribute ua, List childrenOutput) { + return resolveAttribute(ua, childrenOutput, log); + } + + private static Attribute resolveAttribute(UnresolvedAttribute ua, List childrenOutput, Logger logger) { Attribute resolved = ua; var named = resolveAgainstList(ua, childrenOutput); // if resolved, return it; otherwise keep it in place to be resolved later if (named.size() == 1) { resolved = named.get(0); - if (log.isTraceEnabled() && resolved.resolved()) { - log.trace("Resolved {} to {}", ua, resolved); + if (logger != null && logger.isTraceEnabled() && resolved.resolved()) { + logger.trace("Resolved {} to {}", ua, resolved); } } else { if (named.size() > 0) { @@ -725,6 +733,12 @@ private LogicalPlan resolveDrop(Drop drop, List childOutput) { } private LogicalPlan resolveRename(Rename rename, List childrenOutput) { + List projections = projectionsForRename(rename, childrenOutput, log); + + return new EsqlProject(rename.source(), rename.child(), projections); + } + + public static List projectionsForRename(Rename rename, List childrenOutput, Logger logger) { List projections = new ArrayList<>(childrenOutput); int renamingsCount = rename.renamings().size(); @@ -737,7 +751,7 @@ private LogicalPlan resolveRename(Rename rename, List childrenOutput) // remove attributes overwritten by a renaming: `| keep a, b, c | rename a as b` projections.removeIf(x -> x.name().equals(alias.name())); - var resolved = maybeResolveAttribute(ua, childrenOutput); + var resolved = maybeResolveAttribute(ua, childrenOutput, logger); if (resolved instanceof UnsupportedAttribute || resolved.resolved()) { var realiased = (NamedExpression) alias.replaceChildren(List.of(resolved)); projections.replaceAll(x -> x.equals(resolved) ? realiased : x); @@ -780,7 +794,7 @@ private LogicalPlan resolveRename(Rename rename, List childrenOutput) // add unresolved renamings to later trip the Verifier. projections.addAll(unresolved); - return new EsqlProject(rename.source(), rename.child(), projections); + return projections; } private LogicalPlan resolveEnrich(Enrich enrich, List childrenOutput) { @@ -795,9 +809,18 @@ private LogicalPlan resolveEnrich(Enrich enrich, List childrenOutput) String matchType = enrich.policy().getType(); DataType[] allowed = allowedEnrichTypes(matchType); if (Arrays.asList(allowed).contains(dataType) == false) { - String suffix = "only " + Arrays.toString(allowed) + " allowed for type [" + matchType + "]"; + String suffix = "only [" + + Arrays.stream(allowed).map(DataType::typeName).collect(Collectors.joining(", ")) + + "] allowed for type [" + + matchType + + "]"; resolved = ua.withUnresolvedMessage( - "Unsupported type [" + resolved.dataType() + "] for enrich matching field [" + ua.name() + "]; " + suffix + "Unsupported type [" + + resolved.dataType().typeName() + + "] for enrich matching field [" + + ua.name() + + "]; " + + suffix ); } } @@ -1050,32 +1073,43 @@ public static Expression castStringLiteral(Expression from, DataType target) { target, e.getMessage() ); - return new UnsupportedAttribute( - from.source(), - String.valueOf(from.fold()), - new UnsupportedEsField(String.valueOf(from.fold()), from.dataType().typeName()), - message - ); + return new UnresolvedAttribute(from.source(), String.valueOf(from.fold()), null, message); } } } /** * The EsqlIndexResolver will create InvalidMappedField instances for fields that are ambiguous (i.e. have multiple mappings). - * During ResolveRefs we do not convert these to UnresolvedAttribute instances, as we want to first determine if they can + * During {@link ResolveRefs} we do not convert these to UnresolvedAttribute instances, as we want to first determine if they can * instead be handled by conversion functions within the query. This rule looks for matching conversion functions and converts * those fields into MultiTypeEsField, which encapsulates the knowledge of how to convert these into a single type. * This knowledge will be used later in generating the FieldExtractExec with built-in type conversion. * Any fields which could not be resolved by conversion functions will be converted to UnresolvedAttribute instances in a later rule - * (See UnresolveUnionTypes below). + * (See {@link UnionTypesCleanup} below). */ - private static class ResolveUnionTypes extends BaseAnalyzerRule { + private static class ResolveUnionTypes extends Rule { record TypeResolutionKey(String fieldName, DataType fieldType) {} + private List unionFieldAttributes; + @Override - protected LogicalPlan doRule(LogicalPlan plan) { - List unionFieldAttributes = new ArrayList<>(); + public LogicalPlan apply(LogicalPlan plan) { + unionFieldAttributes = new ArrayList<>(); + // Collect field attributes from previous runs + plan.forEachUp(EsRelation.class, rel -> { + for (Attribute attr : rel.output()) { + if (attr instanceof FieldAttribute fa && fa.field() instanceof MultiTypeEsField) { + unionFieldAttributes.add(fa); + } + } + }); + + return plan.transformUp(LogicalPlan.class, p -> p.childrenResolved() == false ? p : doRule(p)); + } + + private LogicalPlan doRule(LogicalPlan plan) { + int alreadyAddedUnionFieldAttributes = unionFieldAttributes.size(); // See if the eval function has an unresolved MultiTypeEsField field // Replace the entire convert function with a new FieldAttribute (containing type conversion knowledge) plan = plan.transformExpressionsOnly( @@ -1083,50 +1117,37 @@ protected LogicalPlan doRule(LogicalPlan plan) { convert -> resolveConvertFunction(convert, unionFieldAttributes) ); // If no union fields were generated, return the plan as is - if (unionFieldAttributes.isEmpty()) { + if (unionFieldAttributes.size() == alreadyAddedUnionFieldAttributes) { return plan; } - // Otherwise drop the converted attributes after the alias function, as they are only needed for this function, and - // the original version of the attribute should still be seen as unconverted. - plan = dropConvertedAttributes(plan, unionFieldAttributes); - // And add generated fields to EsRelation, so these new attributes will appear in the OutputExec of the Fragment // and thereby get used in FieldExtractExec plan = plan.transformDown(EsRelation.class, esr -> { - List output = esr.output(); List missing = new ArrayList<>(); for (FieldAttribute fa : unionFieldAttributes) { - if (output.stream().noneMatch(a -> a.id().equals(fa.id()))) { + // Using outputSet().contains looks by NameId, resp. uses semanticEquals. + if (esr.outputSet().contains(fa) == false) { missing.add(fa); } } + if (missing.isEmpty() == false) { - output.addAll(missing); - return new EsRelation(esr.source(), esr.index(), output, esr.indexMode(), esr.frozen()); + List newOutput = new ArrayList<>(esr.output()); + newOutput.addAll(missing); + return new EsRelation(esr.source(), esr.index(), newOutput, esr.indexMode(), esr.frozen()); } return esr; }); return plan; } - private LogicalPlan dropConvertedAttributes(LogicalPlan plan, List unionFieldAttributes) { - List projections = new ArrayList<>(plan.output()); - for (var e : unionFieldAttributes) { - projections.removeIf(p -> p.id().equals(e.id())); - } - if (projections.size() != plan.output().size()) { - return new EsqlProject(plan.source(), plan, projections); - } - return plan; - } - private Expression resolveConvertFunction(AbstractConvertFunction convert, List unionFieldAttributes) { if (convert.field() instanceof FieldAttribute fa && fa.field() instanceof InvalidMappedField imf) { HashMap typeResolutions = new HashMap<>(); Set supportedTypes = convert.supportedTypes(); - imf.getTypesToIndices().keySet().forEach(typeName -> { - DataType type = DataType.fromTypeName(typeName); + imf.types().forEach(type -> { + // TODO: Shouldn't we perform widening of small numerical types here? if (supportedTypes.contains(type)) { TypeResolutionKey key = new TypeResolutionKey(fa.name(), type); var concreteConvert = typeSpecificConvert(convert, fa.source(), type, imf); @@ -1149,7 +1170,13 @@ private Expression createIfDoesNotAlreadyExist( MultiTypeEsField resolvedField, List unionFieldAttributes ) { - var unionFieldAttribute = new FieldAttribute(fa.source(), fa.name(), resolvedField); // Generates new ID for the field + // Generate new ID for the field and suffix it with the data type to maintain unique attribute names. + String unionTypedFieldName = LogicalPlanOptimizer.rawTemporaryName( + fa.name(), + "converted_to", + resolvedField.getDataType().typeName() + ); + FieldAttribute unionFieldAttribute = new FieldAttribute(fa.source(), fa.parent(), unionTypedFieldName, resolvedField); int existingIndex = unionFieldAttributes.indexOf(unionFieldAttribute); if (existingIndex >= 0) { // Do not generate multiple name/type combinations with different IDs @@ -1182,32 +1209,57 @@ private Expression typeSpecificConvert(AbstractConvertFunction convert, Source s } /** - * If there was no AbstractConvertFunction that resolved multi-type fields in the ResolveUnionTypes rules, - * then there could still be some FieldAttributes that contain unresolved MultiTypeEsFields. - * These need to be converted back to actual UnresolvedAttribute in order for validation to generate appropriate failures. + * {@link ResolveUnionTypes} creates new, synthetic attributes for union types: + * If there was no {@code AbstractConvertFunction} that resolved multi-type fields in the {@link ResolveUnionTypes} rule, + * then there could still be some {@code FieldAttribute}s that contain unresolved {@link MultiTypeEsField}s. + * These need to be converted back to actual {@code UnresolvedAttribute} in order for validation to generate appropriate failures. + *

+ * Finally, if {@code client_ip} is present in 2 indices, once with type {@code ip} and once with type {@code keyword}, + * using {@code EVAL x = to_ip(client_ip)} will create a single attribute @{code $$client_ip$converted_to$ip}. + * This should not spill into the query output, so we drop such attributes at the end. */ - private static class UnresolveUnionTypes extends AnalyzerRules.AnalyzerRule { - @Override - protected boolean skipResolved() { - return false; - } + private static class UnionTypesCleanup extends Rule { + public LogicalPlan apply(LogicalPlan plan) { + LogicalPlan planWithCheckedUnionTypes = plan.transformUp( + LogicalPlan.class, + p -> p.transformExpressionsOnly(FieldAttribute.class, UnionTypesCleanup::checkUnresolved) + ); - @Override - protected LogicalPlan rule(LogicalPlan plan) { - if (plan instanceof EsRelation esRelation) { - // Leave esRelation as InvalidMappedField so that UNSUPPORTED fields can still pass through - return esRelation; - } - return plan.transformExpressionsOnly(FieldAttribute.class, UnresolveUnionTypes::checkUnresolved); + // To drop synthetic attributes at the end, we need to compute the plan's output. + // This is only legal to do if the plan is resolved. + return planWithCheckedUnionTypes.resolved() + ? planWithoutSyntheticAttributes(planWithCheckedUnionTypes) + : planWithCheckedUnionTypes; } - private static Attribute checkUnresolved(FieldAttribute fa) { - var field = fa.field(); - if (field instanceof InvalidMappedField imf) { + static Attribute checkUnresolved(FieldAttribute fa) { + if (fa.field() instanceof InvalidMappedField imf) { String unresolvedMessage = "Cannot use field [" + fa.name() + "] due to ambiguities being " + imf.errorMessage(); - return new UnresolvedAttribute(fa.source(), fa.name(), fa.qualifier(), fa.id(), unresolvedMessage, null); + String types = imf.getTypesToIndices().keySet().stream().collect(Collectors.joining(",")); + return new UnsupportedAttribute( + fa.source(), + fa.name(), + new UnsupportedEsField(imf.getName(), types), + unresolvedMessage, + fa.id() + ); } return fa; } + + private static LogicalPlan planWithoutSyntheticAttributes(LogicalPlan plan) { + List output = plan.output(); + List newOutput = new ArrayList<>(output.size()); + + for (Attribute attr : output) { + // TODO: this should really use .synthetic() + // https://github.com/elastic/elasticsearch/issues/105821 + if (attr.name().startsWith(FieldAttribute.SYNTHETIC_ATTRIBUTE_NAME_PREFIX) == false) { + newOutput.add(attr); + } + } + + return newOutput.size() == output.size() ? plan : new Project(Source.EMPTY, plan, newOutput); + } } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java index 514a53b0933e9..dc94353712c6d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java @@ -100,16 +100,23 @@ else if (p.resolved()) { } e.forEachUp(ae -> { - // we're only interested in the children + // Special handling for Project and unsupported/union types: disallow renaming them but pass them through otherwise. + if (p instanceof Project) { + if (ae instanceof Alias as && as.child() instanceof UnsupportedAttribute ua) { + failures.add(fail(ae, ua.unresolvedMessage())); + } + if (ae instanceof UnsupportedAttribute) { + return; + } + } + + // Do not fail multiple times in case the children are already unresolved. if (ae.childrenResolved() == false) { return; } if (ae instanceof Unresolvable u) { - // special handling for Project and unsupported types - if (p instanceof Project == false || u instanceof UnsupportedAttribute == false) { - failures.add(fail(ae, u.unresolvedMessage())); - } + failures.add(fail(ae, u.unresolvedMessage())); } if (ae.typeResolved().unresolved()) { failures.add(fail(ae, ae.typeResolved().message())); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichLookupService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichLookupService.java index 87c558fe5bd1e..2425fa24b17c2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichLookupService.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichLookupService.java @@ -31,7 +31,6 @@ import org.elasticsearch.compute.data.BlockStreamInput; import org.elasticsearch.compute.data.BytesRefBlock; import org.elasticsearch.compute.data.ElementType; -import org.elasticsearch.compute.data.IntBlock; import org.elasticsearch.compute.data.IntVector; import org.elasticsearch.compute.data.LocalCircuitBreaker; import org.elasticsearch.compute.data.OrdinalBytesRefBlock; @@ -43,6 +42,7 @@ import org.elasticsearch.compute.operator.OutputOperator; import org.elasticsearch.core.AbstractRefCounted; import org.elasticsearch.core.RefCounted; +import org.elasticsearch.core.Releasable; import org.elasticsearch.core.Releasables; import org.elasticsearch.index.mapper.BlockLoader; import org.elasticsearch.index.mapper.MappedFieldType; @@ -247,30 +247,53 @@ private void doLookup( ActionListener listener ) { Block inputBlock = inputPage.getBlock(0); - final IntBlock selectedPositions; - final OrdinalBytesRefBlock ordinalsBytesRefBlock; - if (inputBlock instanceof BytesRefBlock bytesRefBlock && (ordinalsBytesRefBlock = bytesRefBlock.asOrdinals()) != null) { - inputBlock = ordinalsBytesRefBlock.getDictionaryVector().asBlock(); - selectedPositions = ordinalsBytesRefBlock.getOrdinalsBlock(); - selectedPositions.mustIncRef(); - } else { - selectedPositions = IntVector.range(0, inputBlock.getPositionCount(), blockFactory).asBlock(); + if (inputBlock.areAllValuesNull()) { + listener.onResponse(createNullResponse(inputPage.getPositionCount(), extractFields)); + return; } - LocalCircuitBreaker localBreaker = null; + final List releasables = new ArrayList<>(6); + boolean started = false; try { - if (inputBlock.areAllValuesNull()) { - listener.onResponse(createNullResponse(inputPage.getPositionCount(), extractFields)); - return; - } - ShardSearchRequest shardSearchRequest = new ShardSearchRequest(shardId, 0, AliasFilter.EMPTY); - SearchContext searchContext = searchService.createSearchContext(shardSearchRequest, SearchService.NO_TIMEOUT); - listener = ActionListener.runBefore(listener, searchContext::close); - localBreaker = new LocalCircuitBreaker( + final ShardSearchRequest shardSearchRequest = new ShardSearchRequest(shardId, 0, AliasFilter.EMPTY); + final SearchContext searchContext = searchService.createSearchContext(shardSearchRequest, SearchService.NO_TIMEOUT); + releasables.add(searchContext); + final LocalCircuitBreaker localBreaker = new LocalCircuitBreaker( blockFactory.breaker(), localBreakerSettings.overReservedBytes(), localBreakerSettings.maxOverReservedBytes() ); - DriverContext driverContext = new DriverContext(bigArrays, blockFactory.newChildFactory(localBreaker)); + releasables.add(localBreaker); + final DriverContext driverContext = new DriverContext(bigArrays, blockFactory.newChildFactory(localBreaker)); + final ElementType[] mergingTypes = new ElementType[extractFields.size()]; + for (int i = 0; i < extractFields.size(); i++) { + mergingTypes[i] = PlannerUtils.toElementType(extractFields.get(i).dataType()); + } + final int[] mergingChannels = IntStream.range(0, extractFields.size()).map(i -> i + 2).toArray(); + final MergePositionsOperator mergePositionsOperator; + final OrdinalBytesRefBlock ordinalsBytesRefBlock; + if (inputBlock instanceof BytesRefBlock bytesRefBlock && (ordinalsBytesRefBlock = bytesRefBlock.asOrdinals()) != null) { + inputBlock = ordinalsBytesRefBlock.getDictionaryVector().asBlock(); + var selectedPositions = ordinalsBytesRefBlock.getOrdinalsBlock(); + mergePositionsOperator = new MergePositionsOperator( + 1, + mergingChannels, + mergingTypes, + selectedPositions, + driverContext.blockFactory() + ); + + } else { + try (var selectedPositions = IntVector.range(0, inputBlock.getPositionCount(), blockFactory).asBlock()) { + mergePositionsOperator = new MergePositionsOperator( + 1, + mergingChannels, + mergingTypes, + selectedPositions, + driverContext.blockFactory() + ); + } + } + releasables.add(mergePositionsOperator); SearchExecutionContext searchExecutionContext = searchContext.getSearchExecutionContext(); MappedFieldType fieldType = searchExecutionContext.getFieldType(matchField); var queryList = switch (matchType) { @@ -284,57 +307,13 @@ private void doLookup( queryList, searchExecutionContext.getIndexReader() ); - List intermediateOperators = new ArrayList<>(extractFields.size() + 2); - final ElementType[] mergingTypes = new ElementType[extractFields.size()]; - // load the fields - List fields = new ArrayList<>(extractFields.size()); - for (int i = 0; i < extractFields.size(); i++) { - NamedExpression extractField = extractFields.get(i); - final ElementType elementType = PlannerUtils.toElementType(extractField.dataType()); - mergingTypes[i] = elementType; - EsPhysicalOperationProviders.ShardContext ctx = new EsPhysicalOperationProviders.DefaultShardContext( - 0, - searchContext.getSearchExecutionContext(), - searchContext.request().getAliasFilter() - ); - BlockLoader loader = ctx.blockLoader( - extractField instanceof Alias a ? ((NamedExpression) a.child()).name() : extractField.name(), - extractField.dataType() == DataType.UNSUPPORTED, - MappedFieldType.FieldExtractPreference.NONE - ); - fields.add( - new ValuesSourceReaderOperator.FieldInfo( - extractField.name(), - PlannerUtils.toElementType(extractField.dataType()), - shardIdx -> { - if (shardIdx != 0) { - throw new IllegalStateException("only one shard"); - } - return loader; - } - ) - ); - } - intermediateOperators.add( - new ValuesSourceReaderOperator( - driverContext.blockFactory(), - fields, - List.of( - new ValuesSourceReaderOperator.ShardContext( - searchContext.searcher().getIndexReader(), - searchContext::newSourceLoader - ) - ), - 0 - ) - ); - // merging field-values by position - final int[] mergingChannels = IntStream.range(0, extractFields.size()).map(i -> i + 2).toArray(); - intermediateOperators.add( - new MergePositionsOperator(1, mergingChannels, mergingTypes, selectedPositions, driverContext.blockFactory()) - ); + releasables.add(queryOperator); + var extractFieldsOperator = extractFieldsOperator(searchContext, driverContext, extractFields); + releasables.add(extractFieldsOperator); + AtomicReference result = new AtomicReference<>(); OutputOperator outputOperator = new OutputOperator(List.of(), Function.identity(), result::set); + releasables.add(outputOperator); Driver driver = new Driver( "enrich-lookup:" + sessionId, System.currentTimeMillis(), @@ -350,18 +329,16 @@ private void doLookup( inputPage.getPositionCount() ), queryOperator, - intermediateOperators, + List.of(extractFieldsOperator, mergePositionsOperator), outputOperator, Driver.DEFAULT_STATUS_INTERVAL, - localBreaker + Releasables.wrap(searchContext, localBreaker) ); task.addListener(() -> { String reason = Objects.requireNonNullElse(task.getReasonCancelled(), "task was cancelled"); driver.cancel(reason); }); - var threadContext = transportService.getThreadPool().getThreadContext(); - localBreaker = null; Driver.start(threadContext, executor, driver, Driver.DEFAULT_MAX_ITERATIONS, listener.map(ignored -> { Page out = result.get(); if (out == null) { @@ -369,11 +346,52 @@ private void doLookup( } return out; })); + started = true; } catch (Exception e) { listener.onFailure(e); } finally { - Releasables.close(selectedPositions, localBreaker); + if (started == false) { + Releasables.close(releasables); + } + } + } + + private static Operator extractFieldsOperator( + SearchContext searchContext, + DriverContext driverContext, + List extractFields + ) { + EsPhysicalOperationProviders.ShardContext shardContext = new EsPhysicalOperationProviders.DefaultShardContext( + 0, + searchContext.getSearchExecutionContext(), + searchContext.request().getAliasFilter() + ); + List fields = new ArrayList<>(extractFields.size()); + for (NamedExpression extractField : extractFields) { + BlockLoader loader = shardContext.blockLoader( + extractField instanceof Alias a ? ((NamedExpression) a.child()).name() : extractField.name(), + extractField.dataType() == DataType.UNSUPPORTED, + MappedFieldType.FieldExtractPreference.NONE + ); + fields.add( + new ValuesSourceReaderOperator.FieldInfo( + extractField.name(), + PlannerUtils.toElementType(extractField.dataType()), + shardIdx -> { + if (shardIdx != 0) { + throw new IllegalStateException("only one shard"); + } + return loader; + } + ) + ); } + return new ValuesSourceReaderOperator( + driverContext.blockFactory(), + fields, + List.of(new ValuesSourceReaderOperator.ShardContext(searchContext.searcher().getIndexReader(), searchContext::newSourceLoader)), + 0 + ); } private Page createNullResponse(int positionCount, List extractFields) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/NamedExpressions.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/NamedExpressions.java index d0c8adfd3c858..624ea9a030208 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/NamedExpressions.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/NamedExpressions.java @@ -33,7 +33,8 @@ public static List mergeOutputAttributes( /** * Merges output expressions of a command given the new attributes plus the existing inputs that are emitted as outputs. * As a general rule, child output will come first in the list, followed by the new fields. - * In case of name collisions, only last entry is preserved (previous expressions with the same name are discarded) + * In case of name collisions, only the last entry is preserved (previous expressions with the same name are discarded) + * and the new attributes have precedence over the child output. * @param fields the fields added by the command * @param childOutput the command input that has to be propagated as output * @return diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/UnresolvedFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/UnresolvedFunction.java index ab3475635ddbd..5c898631b28fb 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/UnresolvedFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/UnresolvedFunction.java @@ -132,6 +132,11 @@ public boolean analyzed() { return analyzed; } + @Override + protected TypeResolution resolveType() { + return new TypeResolution("unresolved function [" + name + "]"); + } + @Override public DataType dataType() { throw new UnresolvedException("dataType", this); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/UnsupportedAttribute.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/UnsupportedAttribute.java index 22c4aa9c6bf07..a553361f60a18 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/UnsupportedAttribute.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/UnsupportedAttribute.java @@ -104,6 +104,13 @@ public UnsupportedEsField field() { return (UnsupportedEsField) super.field(); } + @Override + public String fieldName() { + // The super fieldName uses parents to compute the path; this class ignores parents, so we need to rely on the name instead. + // Using field().getName() would be wrong: for subfields like parent.subfield that would return only the last part, subfield. + return name(); + } + @Override protected NodeInfo info() { return NodeInfo.create(this, UnsupportedAttribute::new, name(), field(), hasCustomMessage ? message : null, id()); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java index 5e61f69758a47..7686d10a03d9e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/CountDistinct.java @@ -116,10 +116,10 @@ protected TypeResolution resolveType() { boolean resolved = resolution.resolved(); resolution = isType( field(), - dt -> resolved && dt != DataType.UNSIGNED_LONG, + dt -> resolved && dt != DataType.UNSIGNED_LONG && dt != DataType.SOURCE, sourceText(), DEFAULT, - "any exact type except unsigned_long or counter types" + "any exact type except unsigned_long, _source, or counter types" ); if (resolution.unresolved() || precision == null) { return resolution; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/ToPartial.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/ToPartial.java index f94c8e0508cd7..c1da400185944 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/ToPartial.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/ToPartial.java @@ -65,12 +65,7 @@ public class ToPartial extends AggregateFunction implements ToAggregator { private final Expression function; - public ToPartial(Source source, AggregateFunction function) { - super(source, function.field(), List.of(function)); - this.function = function; - } - - private ToPartial(Source source, Expression field, Expression function) { + public ToPartial(Source source, Expression field, Expression function) { super(source, field, List.of(function)); this.function = function; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java index 0fed02f89fd92..2e83d048a2f68 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java @@ -37,7 +37,7 @@ import java.util.Set; import java.util.function.Function; -import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isTypeOrUnionType; /** * Base class for functions that converts a field into a function-specific type. @@ -77,14 +77,14 @@ protected final TypeResolution resolveType() { if (childrenResolved() == false) { return new TypeResolution("Unresolved children"); } - return isType(field(), factories()::containsKey, sourceText(), null, supportedTypesNames(supportedTypes())); + return isTypeOrUnionType(field(), factories()::containsKey, sourceText(), null, supportedTypesNames(supportedTypes())); } public Set supportedTypes() { return factories().keySet(); } - public static String supportedTypesNames(Set types) { + private static String supportedTypesNames(Set types) { List supportedTypesNames = new ArrayList<>(types.size()); HashSet supportTypes = new HashSet<>(types); if (supportTypes.containsAll(NUMERIC_TYPES)) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java index ba5e8316a666c..05554a0756a9d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java @@ -140,7 +140,8 @@ else if (plan instanceof Project project) { Map nullLiteral = Maps.newLinkedHashMapWithExpectedSize(DataType.types().size()); for (NamedExpression projection : projections) { - if (projection instanceof FieldAttribute f && stats.exists(f.qualifiedName()) == false) { + // Do not use the attribute name, this can deviate from the field name for union types. + if (projection instanceof FieldAttribute f && stats.exists(f.fieldName()) == false) { DataType dt = f.dataType(); Alias nullAlias = nullLiteral.get(f.dataType()); // save the first field as null (per datatype) @@ -170,7 +171,8 @@ else if (plan instanceof Project project) { || plan instanceof TopN) { plan = plan.transformExpressionsOnlyUp( FieldAttribute.class, - f -> stats.exists(f.qualifiedName()) ? f : Literal.of(f, null) + // Do not use the attribute name, this can deviate from the field name for union types. + f -> stats.exists(f.fieldName()) ? f : Literal.of(f, null) ); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java index ca4b5d17deed3..439289c879c27 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizer.java @@ -15,6 +15,9 @@ import org.elasticsearch.xpack.esql.core.expression.AttributeMap; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Expressions; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.NameId; +import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.expression.Order; import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; @@ -22,6 +25,7 @@ import org.elasticsearch.xpack.esql.core.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.core.rule.ParameterizedRule; import org.elasticsearch.xpack.esql.core.rule.ParameterizedRuleExecutor; +import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; import org.elasticsearch.xpack.esql.optimizer.rules.AddDefaultTopN; import org.elasticsearch.xpack.esql.optimizer.rules.BooleanFunctionEqualsElimination; import org.elasticsearch.xpack.esql.optimizer.rules.BooleanSimplification; @@ -67,6 +71,7 @@ import org.elasticsearch.xpack.esql.optimizer.rules.SubstituteSpatialSurrogates; import org.elasticsearch.xpack.esql.optimizer.rules.SubstituteSurrogates; import org.elasticsearch.xpack.esql.optimizer.rules.TranslateMetricsAggregate; +import org.elasticsearch.xpack.esql.plan.GeneratingPlan; import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.Project; import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation; @@ -74,8 +79,11 @@ import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; +import java.util.Map; import java.util.Set; import static java.util.Arrays.asList; @@ -89,6 +97,34 @@ public LogicalPlanOptimizer(LogicalOptimizerContext optimizerContext) { super(optimizerContext); } + public static String temporaryName(Expression inner, Expression outer, int suffix) { + String in = toString(inner); + String out = toString(outer); + return rawTemporaryName(in, out, String.valueOf(suffix)); + } + + public static String locallyUniqueTemporaryName(String inner, String outer) { + return FieldAttribute.SYNTHETIC_ATTRIBUTE_NAME_PREFIX + inner + "$" + outer + "$" + new NameId(); + } + + public static String rawTemporaryName(String inner, String outer, String suffix) { + return FieldAttribute.SYNTHETIC_ATTRIBUTE_NAME_PREFIX + inner + "$" + outer + "$" + suffix; + } + + static String toString(Expression ex) { + return ex instanceof AggregateFunction af ? af.functionName() : extractString(ex); + } + + static String extractString(Expression ex) { + return ex instanceof NamedExpression ne ? ne.name() : limitToString(ex.sourceText()).replace(' ', '_'); + } + + static int TO_STRING_LIMIT = 16; + + static String limitToString(String string) { + return string.length() > TO_STRING_LIMIT ? string.substring(0, TO_STRING_LIMIT - 1) + ">" : string; + } + public LogicalPlan optimize(LogicalPlan verified) { var optimized = execute(verified); @@ -189,35 +225,26 @@ public static LogicalPlan skipPlan(UnaryPlan plan, LocalSupplier supplier) { /** * Pushes LogicalPlans which generate new attributes (Eval, Grok/Dissect, Enrich), past OrderBys and Projections. - * Although it seems arbitrary whether the OrderBy or the Eval is executed first, this transformation ensures that OrderBys only - * separated by an eval can be combined by PushDownAndCombineOrderBy. - * - * E.g.: - * - * ... | sort a | eval x = b + 1 | sort x - * - * becomes - * - * ... | eval x = b + 1 | sort a | sort x - * - * Ordering the Evals before the OrderBys has the advantage that it's always possible to order the plans like this. + * Although it seems arbitrary whether the OrderBy or the generating plan is executed first, this transformation ensures that OrderBys + * only separated by e.g. an Eval can be combined by {@link PushDownAndCombineOrderBy}. + *

+ * E.g. {@code ... | sort a | eval x = b + 1 | sort x} becomes {@code ... | eval x = b + 1 | sort a | sort x} + *

+ * Ordering the generating plans before the OrderBys has the advantage that it's always possible to order the plans like this. * E.g., in the example above it would not be possible to put the eval after the two orderBys. - * - * In case one of the Eval's fields would shadow the orderBy's attributes, we rename the attribute first. - * - * E.g. - * - * ... | sort a | eval a = b + 1 | ... - * - * becomes - * - * ... | eval $$a = a | eval a = b + 1 | sort $$a | drop $$a + *

+ * In case one of the generating plan's attributes would shadow the OrderBy's attributes, we alias the generated attribute first. + *

+ * E.g. {@code ... | sort a | eval a = b + 1 | ...} becomes {@code ... | eval $$a = a | eval a = b + 1 | sort $$a | drop $$a ...} + *

+ * In case the generating plan's attributes would shadow the Project's attributes, we rename the generated attributes in place. + *

+ * E.g. {@code ... | rename a as z | eval a = b + 1 | ...} becomes {@code ... eval $$a = b + 1 | rename a as z, $$a as a ...} */ - public static LogicalPlan pushGeneratingPlanPastProjectAndOrderBy(UnaryPlan generatingPlan, List generatedAttributes) { + public static > LogicalPlan pushGeneratingPlanPastProjectAndOrderBy(Plan generatingPlan) { LogicalPlan child = generatingPlan.child(); - if (child instanceof OrderBy orderBy) { - Set evalFieldNames = new LinkedHashSet<>(Expressions.names(generatedAttributes)); + Set evalFieldNames = new LinkedHashSet<>(Expressions.names(generatingPlan.generatedAttributes())); // Look for attributes in the OrderBy's expressions and create aliases with temporary names for them. AttributeReplacement nonShadowedOrders = renameAttributesInExpressions(evalFieldNames, orderBy.order()); @@ -238,9 +265,66 @@ public static LogicalPlan pushGeneratingPlanPastProjectAndOrderBy(UnaryPlan gene } return orderBy.replaceChild(generatingPlan.replaceChild(orderBy.child())); - } else if (child instanceof Project) { - var projectWithEvalChild = pushDownPastProject(generatingPlan); - return projectWithEvalChild.withProjections(mergeOutputExpressions(generatedAttributes, projectWithEvalChild.projections())); + } else if (child instanceof Project project) { + // We need to account for attribute shadowing: a rename might rely on a name generated in an Eval/Grok/Dissect/Enrich. + // E.g. in: + // + // Eval[[2 * x{f}#1 AS y]] + // \_Project[[x{f}#1, y{f}#2, y{f}#2 AS z]] + // + // Just moving the Eval down breaks z because we shadow y{f}#2. + // Instead, we use a different alias in the Eval, eventually renaming back to y: + // + // Project[[x{f}#1, y{f}#2 as z, $$y{r}#3 as y]] + // \_Eval[[2 * x{f}#1 as $$y]] + + List generatedAttributes = generatingPlan.generatedAttributes(); + + @SuppressWarnings("unchecked") + Plan generatingPlanWithResolvedExpressions = (Plan) resolveRenamesFromProject(generatingPlan, project); + + Set namesReferencedInRenames = new HashSet<>(); + for (NamedExpression ne : project.projections()) { + if (ne instanceof Alias as) { + namesReferencedInRenames.addAll(as.child().references().names()); + } + } + Map renameGeneratedAttributeTo = newNamesForConflictingAttributes( + generatingPlan.generatedAttributes(), + namesReferencedInRenames + ); + List newNames = generatedAttributes.stream() + .map(attr -> renameGeneratedAttributeTo.getOrDefault(attr.name(), attr.name())) + .toList(); + Plan generatingPlanWithRenamedAttributes = generatingPlanWithResolvedExpressions.withGeneratedNames(newNames); + + // Put the project at the top, but include the generated attributes. + // Any generated attributes that had to be renamed need to be re-renamed to their original names. + List generatedAttributesRenamedToOriginal = new ArrayList<>(generatedAttributes.size()); + List renamedGeneratedAttributes = generatingPlanWithRenamedAttributes.generatedAttributes(); + for (int i = 0; i < generatedAttributes.size(); i++) { + Attribute originalAttribute = generatedAttributes.get(i); + Attribute renamedAttribute = renamedGeneratedAttributes.get(i); + if (originalAttribute.name().equals(renamedAttribute.name())) { + generatedAttributesRenamedToOriginal.add(renamedAttribute); + } else { + generatedAttributesRenamedToOriginal.add( + new Alias( + originalAttribute.source(), + originalAttribute.name(), + originalAttribute.qualifier(), + renamedAttribute, + originalAttribute.id(), + originalAttribute.synthetic() + ) + ); + } + } + + Project projectWithGeneratingChild = project.replaceChild(generatingPlanWithRenamedAttributes.replaceChild(project.child())); + return projectWithGeneratingChild.withProjections( + mergeOutputExpressions(generatedAttributesRenamedToOriginal, projectWithGeneratingChild.projections()) + ); } return generatingPlan; @@ -264,8 +348,9 @@ private static AttributeReplacement renameAttributesInExpressions( rewrittenExpressions.add(expr.transformUp(Attribute.class, attr -> { if (attributeNamesToRename.contains(attr.name())) { Alias renamedAttribute = aliasesForReplacedAttributes.computeIfAbsent(attr, a -> { - String tempName = SubstituteSurrogates.rawTemporaryName(a.name(), "temp_name", a.id().toString()); + String tempName = locallyUniqueTemporaryName(a.name(), "temp_name"); // TODO: this should be synthetic + // blocked on https://github.com/elastic/elasticsearch/issues/98703 return new Alias(a.source(), tempName, null, a, null, false); }); return renamedAttribute.toAttribute(); @@ -278,16 +363,28 @@ private static AttributeReplacement renameAttributesInExpressions( return new AttributeReplacement(rewrittenExpressions, aliasesForReplacedAttributes); } + private static Map newNamesForConflictingAttributes( + List potentiallyConflictingAttributes, + Set reservedNames + ) { + if (reservedNames.isEmpty()) { + return Map.of(); + } + + Map renameAttributeTo = new HashMap<>(); + for (Attribute attr : potentiallyConflictingAttributes) { + String name = attr.name(); + if (reservedNames.contains(name)) { + renameAttributeTo.putIfAbsent(name, locallyUniqueTemporaryName(name, "temp_name")); + } + } + + return renameAttributeTo; + } + public static Project pushDownPastProject(UnaryPlan parent) { if (parent.child() instanceof Project project) { - AttributeMap.Builder aliasBuilder = AttributeMap.builder(); - project.forEachExpression(Alias.class, a -> aliasBuilder.put(a.toAttribute(), a.child())); - var aliases = aliasBuilder.build(); - - var expressionsWithResolvedAliases = (UnaryPlan) parent.transformExpressionsOnly( - ReferenceAttribute.class, - r -> aliases.resolve(r, r) - ); + UnaryPlan expressionsWithResolvedAliases = resolveRenamesFromProject(parent, project); return project.replaceChild(expressionsWithResolvedAliases.replaceChild(project.child())); } else { @@ -295,6 +392,14 @@ public static Project pushDownPastProject(UnaryPlan parent) { } } + private static UnaryPlan resolveRenamesFromProject(UnaryPlan plan, Project project) { + AttributeMap.Builder aliasBuilder = AttributeMap.builder(); + project.forEachExpression(Alias.class, a -> aliasBuilder.put(a.toAttribute(), a.child())); + var aliases = aliasBuilder.build(); + + return (UnaryPlan) plan.transformExpressionsOnly(ReferenceAttribute.class, r -> aliases.resolve(r, r)); + } + public abstract static class ParameterizedOptimizerRule extends ParameterizedRule< SubPlan, LogicalPlan, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java index 4c5d9efb449f7..fe1a66737b17b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java @@ -8,16 +8,17 @@ package org.elasticsearch.xpack.esql.optimizer; import org.elasticsearch.xpack.esql.core.common.Failures; +import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.expression.Expressions; +import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.plan.QueryPlan; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.plan.GeneratingPlan; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Enrich; import org.elasticsearch.xpack.esql.plan.logical.EsRelation; -import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.MvExpand; -import org.elasticsearch.xpack.esql.plan.logical.RegexExtract; import org.elasticsearch.xpack.esql.plan.logical.Row; import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation; import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; @@ -36,6 +37,9 @@ import org.elasticsearch.xpack.esql.plan.physical.RowExec; import org.elasticsearch.xpack.esql.plan.physical.ShowExec; +import java.util.HashSet; +import java.util.Set; + import static org.elasticsearch.xpack.esql.core.common.Failure.fail; class OptimizerRules { @@ -49,9 +53,24 @@ void checkPlan(P p, Failures failures) { AttributeSet input = p.inputSet(); AttributeSet generated = generates(p); AttributeSet missing = refs.subtract(input).subtract(generated); - if (missing.size() > 0) { + if (missing.isEmpty() == false) { failures.add(fail(p, "Plan [{}] optimized incorrectly due to missing references {}", p.nodeString(), missing)); } + + Set outputAttributeNames = new HashSet<>(); + Set outputAttributeIds = new HashSet<>(); + for (Attribute outputAttr : p.output()) { + if (outputAttributeNames.add(outputAttr.name()) == false || outputAttributeIds.add(outputAttr.id()) == false) { + failures.add( + fail( + p, + "Plan [{}] optimized incorrectly due to duplicate output attribute {}", + p.nodeString(), + outputAttr.toString() + ) + ); + } + } } protected AttributeSet references(P p) { @@ -83,18 +102,12 @@ protected AttributeSet generates(LogicalPlan logicalPlan) { || logicalPlan instanceof Aggregate) { return logicalPlan.outputSet(); } - if (logicalPlan instanceof Eval eval) { - return new AttributeSet(Expressions.asAttributes(eval.fields())); - } - if (logicalPlan instanceof RegexExtract extract) { - return new AttributeSet(extract.extractedFields()); + if (logicalPlan instanceof GeneratingPlan generating) { + return new AttributeSet(generating.generatedAttributes()); } if (logicalPlan instanceof MvExpand mvExpand) { return new AttributeSet(mvExpand.expanded()); } - if (logicalPlan instanceof Enrich enrich) { - return new AttributeSet(Expressions.asAttributes(enrich.enrichFields())); - } return AttributeSet.EMPTY; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEnrich.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEnrich.java index f6a0154108f2d..7e102a36828a9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEnrich.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEnrich.java @@ -12,11 +12,9 @@ import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; import org.elasticsearch.xpack.esql.plan.logical.Enrich; -import static org.elasticsearch.xpack.esql.core.expression.Expressions.asAttributes; - public final class PushDownEnrich extends OptimizerRules.OptimizerRule { @Override protected LogicalPlan rule(Enrich en) { - return LogicalPlanOptimizer.pushGeneratingPlanPastProjectAndOrderBy(en, asAttributes(en.enrichFields())); + return LogicalPlanOptimizer.pushGeneratingPlanPastProjectAndOrderBy(en); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEval.java index b936e5569c950..e9b42be8dd397 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownEval.java @@ -12,11 +12,9 @@ import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; import org.elasticsearch.xpack.esql.plan.logical.Eval; -import static org.elasticsearch.xpack.esql.core.expression.Expressions.asAttributes; - public final class PushDownEval extends OptimizerRules.OptimizerRule { @Override protected LogicalPlan rule(Eval eval) { - return LogicalPlanOptimizer.pushGeneratingPlanPastProjectAndOrderBy(eval, asAttributes(eval.fields())); + return LogicalPlanOptimizer.pushGeneratingPlanPastProjectAndOrderBy(eval); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownRegexExtract.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownRegexExtract.java index f247d0a631b29..43e13a582276b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownRegexExtract.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/PushDownRegexExtract.java @@ -15,6 +15,6 @@ public final class PushDownRegexExtract extends OptimizerRules.OptimizerRule { @Override protected LogicalPlan rule(RegexExtract re) { - return LogicalPlanOptimizer.pushGeneratingPlanPastProjectAndOrderBy(re, re.extractedFields()); + return LogicalPlanOptimizer.pushGeneratingPlanPastProjectAndOrderBy(re); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceOrderByExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceOrderByExpressionWithEval.java index 476da7476f7fb..e9900d96710a6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceOrderByExpressionWithEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceOrderByExpressionWithEval.java @@ -19,7 +19,7 @@ import java.util.ArrayList; import java.util.List; -import static org.elasticsearch.xpack.esql.optimizer.rules.SubstituteSurrogates.rawTemporaryName; +import static org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer.rawTemporaryName; public final class ReplaceOrderByExpressionWithEval extends OptimizerRules.OptimizerRule { private static int counter = 0; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsAggExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsAggExpressionWithEval.java index 012d6e307df6c..dbe518770c78d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsAggExpressionWithEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsAggExpressionWithEval.java @@ -18,6 +18,7 @@ import org.elasticsearch.xpack.esql.core.util.CollectionUtils; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; +import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.Project; @@ -150,6 +151,6 @@ protected LogicalPlan rule(Aggregate aggregate) { } static String syntheticName(Expression expression, Expression af, int counter) { - return SubstituteSurrogates.temporaryName(expression, af, counter); + return LogicalPlanOptimizer.temporaryName(expression, af, counter); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsNestedExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsNestedExpressionWithEval.java index 99b0c8047f2ba..099e76010488e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsNestedExpressionWithEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/ReplaceStatsNestedExpressionWithEval.java @@ -16,6 +16,7 @@ import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction; +import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Eval; @@ -141,6 +142,6 @@ protected LogicalPlan rule(Aggregate aggregate) { } static String syntheticName(Expression expression, AggregateFunction af, int counter) { - return SubstituteSurrogates.temporaryName(expression, af, counter); + return LogicalPlanOptimizer.temporaryName(expression, af, counter); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SubstituteSurrogates.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SubstituteSurrogates.java index fa4049b0e5a3a..b119d01715dce 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SubstituteSurrogates.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/SubstituteSurrogates.java @@ -20,6 +20,7 @@ import org.elasticsearch.xpack.esql.expression.SurrogateExpression; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; import org.elasticsearch.xpack.esql.expression.function.aggregate.Rate; +import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.Project; @@ -80,7 +81,7 @@ protected LogicalPlan rule(Aggregate aggregate) { var attr = aggFuncToAttr.get(af); // the agg doesn't exist in the Aggregate, create an alias for it and save its attribute if (attr == null) { - var temporaryName = temporaryName(af, agg, counter[0]++); + var temporaryName = LogicalPlanOptimizer.temporaryName(af, agg, counter[0]++); // create a synthetic alias (so it doesn't clash with a user defined name) var newAlias = new Alias(agg.source(), temporaryName, null, af, null, true); attr = newAlias.toAttribute(); @@ -133,28 +134,4 @@ protected LogicalPlan rule(Aggregate aggregate) { return plan; } - - public static String temporaryName(Expression inner, Expression outer, int suffix) { - String in = toString(inner); - String out = toString(outer); - return rawTemporaryName(in, out, String.valueOf(suffix)); - } - - public static String rawTemporaryName(String inner, String outer, String suffix) { - return "$$" + inner + "$" + outer + "$" + suffix; - } - - static int TO_STRING_LIMIT = 16; - - static String toString(Expression ex) { - return ex instanceof AggregateFunction af ? af.functionName() : extractString(ex); - } - - static String extractString(Expression ex) { - return ex instanceof NamedExpression ne ? ne.name() : limitToString(ex.sourceText()).replace(' ', '_'); - } - - static String limitToString(String string) { - return string.length() > 16 ? string.substring(0, TO_STRING_LIMIT - 1) + ">" : string; - } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/TranslateMetricsAggregate.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/TranslateMetricsAggregate.java index 88486bcb864dc..ca08dd57189cc 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/TranslateMetricsAggregate.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/TranslateMetricsAggregate.java @@ -150,7 +150,7 @@ LogicalPlan translate(Aggregate metrics) { if (changed.get()) { secondPassAggs.add(new Alias(alias.source(), alias.name(), null, outerAgg, agg.id())); } else { - var toPartial = new Alias(agg.source(), alias.name(), new ToPartial(agg.source(), af)); + var toPartial = new Alias(agg.source(), alias.name(), new ToPartial(agg.source(), af.field(), af)); var fromPartial = new FromPartial(agg.source(), toPartial.toAttribute(), af); firstPassAggs.add(toPartial); secondPassAggs.add(new Alias(alias.source(), alias.name(), null, fromPartial, alias.id())); @@ -218,7 +218,7 @@ private static Aggregate toStandardAggregate(Aggregate metrics) { final LogicalPlan child = metrics.child().transformDown(EsRelation.class, r -> { var attributes = new ArrayList<>(new AttributeSet(metrics.inputSet())); attributes.removeIf(a -> a.name().equals(MetadataAttribute.TSID_FIELD)); - if (attributes.stream().noneMatch(a -> a.name().equals(MetadataAttribute.TIMESTAMP_FIELD)) == false) { + if (attributes.stream().noneMatch(a -> a.name().equals(MetadataAttribute.TIMESTAMP_FIELD))) { attributes.removeIf(a -> a.name().equals(MetadataAttribute.TIMESTAMP_FIELD)); } return new EsRelation(r.source(), r.index(), new ArrayList<>(attributes), IndexMode.STANDARD); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/ExpressionBuilder.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/ExpressionBuilder.java index 9769d286b484d..88279b65d2007 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/ExpressionBuilder.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/ExpressionBuilder.java @@ -89,9 +89,22 @@ public abstract class ExpressionBuilder extends IdentifierBuilder { private int expressionDepth = 0; /** - * Maximum depth for nested expressions + * Maximum depth for nested expressions. + * Avoids StackOverflowErrors at parse time with very convoluted expressions, + * eg. EVAL x = sin(sin(sin(sin(sin(sin(sin(sin(sin(....sin(x)....) + * ANTLR parser is recursive, so the only way to prevent a StackOverflow is to detect how + * deep we are in the expression parsing and abort the query execution after a threshold + * + * This value is defined empirically, but the actual stack limit is highly + * dependent on the JVM and on the JIT. + * + * A value of 500 proved to be right below the stack limit, but it still triggered + * some CI failures (once every ~2000 iterations). see https://github.com/elastic/elasticsearch/issues/109846 + * Even though we didn't manage to reproduce the problem in real conditions, we decided + * to reduce the max allowed depth to 400 (that is still a pretty reasonable limit for real use cases) and be more safe. + * */ - public static final int MAX_EXPRESSION_DEPTH = 500; + public static final int MAX_EXPRESSION_DEPTH = 400; protected final QueryParams params; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java index fee51c40a2525..85103109abed1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java @@ -26,7 +26,6 @@ import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.expression.Order; -import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.expression.UnresolvedAttribute; import org.elasticsearch.xpack.esql.core.expression.UnresolvedStar; import org.elasticsearch.xpack.esql.core.parser.ParserUtils; @@ -74,6 +73,7 @@ import static org.elasticsearch.xpack.esql.core.parser.ParserUtils.source; import static org.elasticsearch.xpack.esql.core.parser.ParserUtils.typedParsing; import static org.elasticsearch.xpack.esql.core.parser.ParserUtils.visitList; +import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputExpressions; import static org.elasticsearch.xpack.esql.plan.logical.Enrich.Mode; import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.stringToInt; @@ -146,12 +146,30 @@ public PlanFactory visitEvalCommand(EsqlBaseParser.EvalCommandContext ctx) { @Override public PlanFactory visitGrokCommand(EsqlBaseParser.GrokCommandContext ctx) { return p -> { + Source source = source(ctx); String pattern = visitString(ctx.string()).fold().toString(); - Grok result = new Grok(source(ctx), p, expression(ctx.primaryExpression()), Grok.pattern(source(ctx), pattern)); + Grok.Parser grokParser = Grok.pattern(source, pattern); + validateGrokPattern(source, grokParser, pattern); + Grok result = new Grok(source(ctx), p, expression(ctx.primaryExpression()), grokParser); return result; }; } + private void validateGrokPattern(Source source, Grok.Parser grokParser, String pattern) { + Map definedAttributes = new HashMap<>(); + for (Attribute field : grokParser.extractedFields()) { + String name = field.name(); + DataType type = field.dataType(); + DataType prev = definedAttributes.put(name, type); + if (prev != null) { + throw new ParsingException( + source, + "Invalid GROK pattern [" + pattern + "]: the attribute [" + name + "] is defined multiple times with different types" + ); + } + } + } + @Override public PlanFactory visitDissectCommand(EsqlBaseParser.DissectCommandContext ctx) { return p -> { @@ -175,21 +193,20 @@ public PlanFactory visitDissectCommand(EsqlBaseParser.DissectCommandContext ctx) try { DissectParser parser = new DissectParser(pattern, appendSeparator); + Set referenceKeys = parser.referenceKeys(); - if (referenceKeys.size() > 0) { + if (referenceKeys.isEmpty() == false) { throw new ParsingException( src, "Reference keys not supported in dissect patterns: [%{*{}}]", referenceKeys.iterator().next() ); } - List keys = new ArrayList<>(); - for (var x : parser.outputKeys()) { - if (x.isEmpty() == false) { - keys.add(new ReferenceAttribute(src, x, DataType.KEYWORD)); - } - } - return new Dissect(src, p, expression(ctx.primaryExpression()), new Dissect.Parser(pattern, appendSeparator, parser), keys); + + Dissect.Parser esqlDissectParser = new Dissect.Parser(pattern, appendSeparator, parser); + List keys = esqlDissectParser.keyAttributes(src); + + return new Dissect(src, p, expression(ctx.primaryExpression()), esqlDissectParser, keys); } catch (DissectException e) { throw new ParsingException(src, "Invalid pattern for dissect: [{}]", pattern); } @@ -217,8 +234,9 @@ public Map visitCommandOptions(EsqlBaseParser.CommandOptionsCont } @Override + @SuppressWarnings("unchecked") public LogicalPlan visitRowCommand(EsqlBaseParser.RowCommandContext ctx) { - return new Row(source(ctx), visitFields(ctx.fields())); + return new Row(source(ctx), (List) (List) mergeOutputExpressions(visitFields(ctx.fields()), List.of())); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/GeneratingPlan.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/GeneratingPlan.java new file mode 100644 index 0000000000000..0253ac8dafd84 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/GeneratingPlan.java @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plan; + +import org.elasticsearch.xpack.esql.core.expression.Attribute; + +import java.util.List; + +/** + * A plan that creates new {@link Attribute}s and appends them to the child + * {@link org.elasticsearch.xpack.esql.core.plan.logical.UnaryPlan}'s attributes. + * Attributes are appended on the right hand side of the child's input. In case of name conflicts, the rightmost attribute with + * a given name shadows any attributes left of it + * (c.f. {@link org.elasticsearch.xpack.esql.expression.NamedExpressions#mergeOutputAttributes(List, List)}). + */ +public interface GeneratingPlan> { + List generatedAttributes(); + + /** + * Create a new instance of this node with new output {@link Attribute}s using the given names. + * If an output attribute already has the desired name, we continue using it; otherwise, we + * create a new attribute with a new {@link org.elasticsearch.xpack.esql.core.expression.NameId}. + */ + // TODO: the generated attributes should probably become synthetic once renamed + // blocked on https://github.com/elastic/elasticsearch/issues/98703 + PlanType withGeneratedNames(List newNames); + + default void checkNumberOfNewNames(List newNames) { + if (newNames.size() != generatedAttributes().size()) { + throw new IllegalArgumentException( + "Number of new names is [" + newNames.size() + "] but there are [" + generatedAttributes().size() + "] existing names." + ); + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Dissect.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Dissect.java index 1307d1870bba4..58167381ea9e8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Dissect.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Dissect.java @@ -10,11 +10,14 @@ import org.elasticsearch.dissect.DissectParser; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.core.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import java.util.ArrayList; import java.util.List; import java.util.Objects; @@ -23,6 +26,17 @@ public class Dissect extends RegexExtract { public record Parser(String pattern, String appendSeparator, DissectParser parser) { + public List keyAttributes(Source src) { + List keys = new ArrayList<>(); + for (var x : parser.outputKeys()) { + if (x.isEmpty() == false) { + keys.add(new ReferenceAttribute(src, x, DataType.KEYWORD)); + } + } + + return keys; + } + // Override hashCode and equals since the parser is considered equal if its pattern and // appendSeparator are equal ( and DissectParser uses reference equality ) @Override @@ -54,6 +68,11 @@ protected NodeInfo info() { return NodeInfo.create(this, Dissect::new, child(), input, parser, extractedFields); } + @Override + public Dissect withGeneratedNames(List newNames) { + return new Dissect(source(), child(), input, parser, renameExtractedFields(newNames)); + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java index f418ab5da1c9d..5a3b5b5d1875f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java @@ -10,27 +10,34 @@ import org.elasticsearch.common.util.Maps; import org.elasticsearch.xpack.core.enrich.EnrichPolicy; import org.elasticsearch.xpack.esql.core.capabilities.Resolvables; +import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.EmptyAttribute; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.core.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.plan.GeneratingPlan; +import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Objects; +import static org.elasticsearch.xpack.esql.core.expression.Expressions.asAttributes; import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes; -public class Enrich extends UnaryPlan { +public class Enrich extends UnaryPlan implements GeneratingPlan { private final Expression policyName; private final NamedExpression matchField; private final EnrichPolicy policy; private final Map concreteIndices; // cluster -> enrich indices + // This could be simplified by just always using an Alias. private final List enrichFields; private List output; @@ -128,6 +135,32 @@ public List output() { return output; } + @Override + public List generatedAttributes() { + return asAttributes(enrichFields); + } + + @Override + public Enrich withGeneratedNames(List newNames) { + checkNumberOfNewNames(newNames); + + List newEnrichFields = new ArrayList<>(enrichFields.size()); + for (int i = 0; i < enrichFields.size(); i++) { + NamedExpression enrichField = enrichFields.get(i); + String newName = newNames.get(i); + if (enrichField.name().equals(newName)) { + newEnrichFields.add(enrichField); + } else if (enrichField instanceof ReferenceAttribute ra) { + newEnrichFields.add(new Alias(ra.source(), newName, ra.qualifier(), ra, new NameId(), ra.synthetic())); + } else if (enrichField instanceof Alias a) { + newEnrichFields.add(new Alias(a.source(), newName, a.qualifier(), a.child(), new NameId(), a.synthetic())); + } else { + throw new IllegalArgumentException("Enrich field must be Alias or ReferenceAttribute"); + } + } + return new Enrich(source(), child(), mode(), policyName(), matchField(), policy(), concreteIndices(), newEnrichFields); + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java index 08916c14e91bf..726b35c90f4d6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java @@ -99,7 +99,7 @@ public boolean expressionsResolved() { @Override public int hashCode() { - return Objects.hash(index, indexMode, frozen); + return Objects.hash(index, indexMode, frozen, attrs); } @Override @@ -113,7 +113,10 @@ public boolean equals(Object obj) { } EsRelation other = (EsRelation) obj; - return Objects.equals(index, other.index) && indexMode == other.indexMode() && frozen == other.frozen; + return Objects.equals(index, other.index) + && indexMode == other.indexMode() + && frozen == other.frozen + && Objects.equals(attrs, other.attrs); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Eval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Eval.java index bfe11c3d33d87..108122d4b163c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Eval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Eval.java @@ -10,17 +10,23 @@ import org.elasticsearch.xpack.esql.core.capabilities.Resolvables; import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.AttributeMap; +import org.elasticsearch.xpack.esql.core.expression.NameId; +import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.core.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.plan.GeneratingPlan; +import java.util.ArrayList; import java.util.List; import java.util.Objects; +import static org.elasticsearch.xpack.esql.core.expression.Expressions.asAttributes; import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes; -public class Eval extends UnaryPlan { +public class Eval extends UnaryPlan implements GeneratingPlan { private final List fields; private List lazyOutput; @@ -43,6 +49,50 @@ public List output() { return lazyOutput; } + @Override + public List generatedAttributes() { + return asAttributes(fields); + } + + @Override + public Eval withGeneratedNames(List newNames) { + checkNumberOfNewNames(newNames); + + return new Eval(source(), child(), renameAliases(fields, newNames)); + } + + private List renameAliases(List originalAttributes, List newNames) { + AttributeMap.Builder aliasReplacedByBuilder = AttributeMap.builder(); + List newFields = new ArrayList<>(originalAttributes.size()); + for (int i = 0; i < originalAttributes.size(); i++) { + Alias field = originalAttributes.get(i); + String newName = newNames.get(i); + if (field.name().equals(newName)) { + newFields.add(field); + } else { + Alias newField = new Alias(field.source(), newName, field.qualifier(), field.child(), new NameId(), field.synthetic()); + newFields.add(newField); + aliasReplacedByBuilder.put(field.toAttribute(), newField.toAttribute()); + } + } + AttributeMap aliasReplacedBy = aliasReplacedByBuilder.build(); + + // We need to also update any references to the old attributes in the new attributes; e.g. + // EVAL x = 1, y = x + 1 + // renaming x, y to x1, y1 + // so far became + // EVAL x1 = 1, y1 = x + 1 + // - but x doesn't exist anymore, so replace it by x1 to obtain + // EVAL x1 = 1, y1 = x1 + 1 + + List newFieldsWithUpdatedRefs = new ArrayList<>(originalAttributes.size()); + for (Alias newField : newFields) { + newFieldsWithUpdatedRefs.add((Alias) newField.transformUp(ReferenceAttribute.class, r -> aliasReplacedBy.resolve(r, r))); + } + + return newFieldsWithUpdatedRefs; + } + @Override public boolean expressionsResolved() { return Resolvables.resolved(fields); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Grok.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Grok.java index 0c1c400f3ab4d..3bd870e326157 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Grok.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Grok.java @@ -32,7 +32,7 @@ public class Grok extends RegexExtract { public record Parser(String pattern, org.elasticsearch.grok.Grok grok) { - private List extractedFields() { + public List extractedFields() { return grok.captureConfig() .stream() .sorted(Comparator.comparing(GrokCaptureConfig::name)) @@ -105,6 +105,11 @@ public List output() { return NamedExpressions.mergeOutputAttributes(extractedFields, child().output()); } + @Override + public Grok withGeneratedNames(List newNames) { + return new Grok(source(), child(), input, parser, renameExtractedFields(newNames)); + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/RegexExtract.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/RegexExtract.java index 5bf45fc0f61ad..7c1d457c18e55 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/RegexExtract.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/RegexExtract.java @@ -9,16 +9,19 @@ import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.core.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.plan.GeneratingPlan; +import java.util.ArrayList; import java.util.List; import java.util.Objects; import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes; -public abstract class RegexExtract extends UnaryPlan { +public abstract class RegexExtract extends UnaryPlan implements GeneratingPlan { protected final Expression input; protected final List extractedFields; @@ -42,10 +45,36 @@ public Expression input() { return input; } + /** + * Upon parsing, these are named according to the {@link Dissect} or {@link Grok} pattern, but can be renamed without changing the + * pattern. + */ public List extractedFields() { return extractedFields; } + @Override + public List generatedAttributes() { + return extractedFields; + } + + List renameExtractedFields(List newNames) { + checkNumberOfNewNames(newNames); + + List renamedExtractedFields = new ArrayList<>(extractedFields.size()); + for (int i = 0; i < newNames.size(); i++) { + Attribute extractedField = extractedFields.get(i); + String newName = newNames.get(i); + if (extractedField.name().equals(newName)) { + renamedExtractedFields.add(extractedField); + } else { + renamedExtractedFields.add(extractedFields.get(i).withName(newNames.get(i)).withId(new NameId())); + } + } + + return renamedExtractedFields; + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Rename.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Rename.java index 7d99c566aa0c7..9a296dc58eb43 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Rename.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Rename.java @@ -7,7 +7,11 @@ package org.elasticsearch.xpack.esql.plan.logical; +import org.elasticsearch.xpack.esql.analysis.Analyzer.ResolveRefs; import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.Expressions; +import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.core.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; @@ -30,6 +34,14 @@ public List renamings() { return renamings; } + @Override + public List output() { + // Normally shouldn't reach here, as Rename only exists before resolution. + List projectionsAfterResolution = ResolveRefs.projectionsForRename(this, this.child().output(), null); + + return Expressions.asAttributes(projectionsAfterResolution); + } + @Override public boolean expressionsResolved() { for (var alias : renamings) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java index 91433e42033c5..009e65cda9251 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AggregateMapper.java @@ -264,7 +264,7 @@ private static DataType toDataType(ElementType elementType) { case INT -> DataType.INTEGER; case LONG -> DataType.LONG; case DOUBLE -> DataType.DOUBLE; - default -> throw new EsqlIllegalArgumentException("unsupported agg type: " + elementType); + case FLOAT, NULL, DOC, COMPOSITE, UNKNOWN -> throw new EsqlIllegalArgumentException("unsupported agg type: " + elementType); }; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index 9e1e1a50fe8f0..0c1928c7c9845 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -116,7 +116,8 @@ public final PhysicalOperation fieldExtractPhysicalOperation(FieldExtractExec fi DataType dataType = attr.dataType(); MappedFieldType.FieldExtractPreference fieldExtractPreference = PlannerUtils.extractPreference(docValuesAttrs.contains(attr)); ElementType elementType = PlannerUtils.toElementType(dataType, fieldExtractPreference); - String fieldName = attr.name(); + // Do not use the field attribute name, this can deviate from the field name for union types. + String fieldName = attr instanceof FieldAttribute fa ? fa.fieldName() : attr.name(); boolean isUnsupported = dataType == DataType.UNSUPPORTED; IntFunction loader = s -> getBlockLoaderFor(s, fieldName, isUnsupported, fieldExtractPreference, unionTypes); fields.add(new ValuesSourceReaderOperator.FieldInfo(fieldName, elementType, loader)); @@ -233,8 +234,11 @@ public final Operator.OperatorFactory ordinalGroupingOperatorFactory( // The grouping-by values are ready, let's group on them directly. // Costin: why are they ready and not already exposed in the layout? boolean isUnsupported = attrSource.dataType() == DataType.UNSUPPORTED; + var unionTypes = findUnionTypes(attrSource); + // Do not use the field attribute name, this can deviate from the field name for union types. + String fieldName = attrSource instanceof FieldAttribute fa ? fa.fieldName() : attrSource.name(); return new OrdinalsGroupingOperator.OrdinalsGroupingOperatorFactory( - shardIdx -> shardContexts.get(shardIdx).blockLoader(attrSource.name(), isUnsupported, NONE), + shardIdx -> getBlockLoaderFor(shardIdx, fieldName, isUnsupported, NONE, unionTypes), vsShardContexts, groupElementType, docChannel, @@ -434,12 +438,13 @@ public StoredFieldsSpec rowStrideStoredFieldSpec() { @Override public boolean supportsOrdinals() { - return delegate.supportsOrdinals(); + // Fields with mismatching types cannot use ordinals for uniqueness determination, but must convert the values first + return false; } @Override - public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException { - return delegate.ordinals(context); + public SortedSetDocValues ordinals(LeafReaderContext context) { + throw new IllegalArgumentException("Ordinals are not supported for type conversion"); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java index ddf5fa6eaf8a3..28855abfff73c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java @@ -58,6 +58,8 @@ import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.expression.Order; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.enrich.EnrichLookupOperator; import org.elasticsearch.xpack.esql.enrich.EnrichLookupService; @@ -417,12 +419,14 @@ private PhysicalOperation planDissect(DissectExec dissect, LocalExecutionPlanner Layout.Builder layoutBuilder = source.layout.builder(); layoutBuilder.append(dissect.extractedFields()); final Expression expr = dissect.inputExpression(); - String[] attributeNames = Expressions.names(dissect.extractedFields()).toArray(new String[0]); + // Names in the pattern and layout can differ. + // Attributes need to be rename-able to avoid problems with shadowing - see GeneratingPlan resp. PushDownRegexExtract. + String[] patternNames = Expressions.names(dissect.parser().keyAttributes(Source.EMPTY)).toArray(new String[0]); Layout layout = layoutBuilder.build(); source = source.with( new StringExtractOperator.StringExtractOperatorFactory( - attributeNames, + patternNames, EvalMapper.toEvaluator(expr, layout), () -> (input) -> dissect.parser().parser().parse(input) ), @@ -439,11 +443,15 @@ private PhysicalOperation planGrok(GrokExec grok, LocalExecutionPlannerContext c Map fieldToPos = new HashMap<>(extractedFields.size()); Map fieldToType = new HashMap<>(extractedFields.size()); ElementType[] types = new ElementType[extractedFields.size()]; + List extractedFieldsFromPattern = grok.pattern().extractedFields(); for (int i = 0; i < extractedFields.size(); i++) { - Attribute extractedField = extractedFields.get(i); - ElementType type = PlannerUtils.toElementType(extractedField.dataType()); - fieldToPos.put(extractedField.name(), i); - fieldToType.put(extractedField.name(), type); + DataType extractedFieldType = extractedFields.get(i).dataType(); + // Names in pattern and layout can differ. + // Attributes need to be rename-able to avoid problems with shadowing - see GeneratingPlan resp. PushDownRegexExtract. + String patternName = extractedFieldsFromPattern.get(i).name(); + ElementType type = PlannerUtils.toElementType(extractedFieldType); + fieldToPos.put(patternName, i); + fieldToType.put(patternName, type); types[i] = type; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeListener.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeListener.java new file mode 100644 index 0000000000000..01d50d505f7f2 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeListener.java @@ -0,0 +1,91 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plugin; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.RefCountingListener; +import org.elasticsearch.compute.operator.DriverProfile; +import org.elasticsearch.compute.operator.FailureCollector; +import org.elasticsearch.compute.operator.ResponseHeadersCollector; +import org.elasticsearch.core.Releasable; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; +import org.elasticsearch.tasks.CancellableTask; +import org.elasticsearch.transport.TransportService; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * A variant of {@link RefCountingListener} with the following differences: + * 1. Automatically cancels sub tasks on failure. + * 2. Collects driver profiles from sub tasks. + * 3. Collects response headers from sub tasks, specifically warnings emitted during compute + * 4. Collects failures and returns the most appropriate exception to the caller. + */ +final class ComputeListener implements Releasable { + private static final Logger LOGGER = LogManager.getLogger(ComputeService.class); + + private final RefCountingListener refs; + private final FailureCollector failureCollector = new FailureCollector(); + private final AtomicBoolean cancelled = new AtomicBoolean(); + private final CancellableTask task; + private final TransportService transportService; + private final List collectedProfiles; + private final ResponseHeadersCollector responseHeaders; + + ComputeListener(TransportService transportService, CancellableTask task, ActionListener delegate) { + this.transportService = transportService; + this.task = task; + this.responseHeaders = new ResponseHeadersCollector(transportService.getThreadPool().getThreadContext()); + this.collectedProfiles = Collections.synchronizedList(new ArrayList<>()); + this.refs = new RefCountingListener(1, ActionListener.wrap(ignored -> { + responseHeaders.finish(); + var result = new ComputeResponse(collectedProfiles.isEmpty() ? List.of() : collectedProfiles.stream().toList()); + delegate.onResponse(result); + }, e -> delegate.onFailure(failureCollector.getFailure()))); + } + + /** + * Acquires a new listener that doesn't collect result + */ + ActionListener acquireAvoid() { + return refs.acquire().delegateResponse((l, e) -> { + failureCollector.unwrapAndCollect(e); + try { + if (cancelled.compareAndSet(false, true)) { + LOGGER.debug("cancelling ESQL task {} on failure", task); + transportService.getTaskManager().cancelTaskAndDescendants(task, "cancelled on failure", false, ActionListener.noop()); + } + } finally { + l.onFailure(e); + } + }); + } + + /** + * Acquires a new listener that collects compute result. This listener will also collects warnings emitted during compute + */ + ActionListener acquireCompute() { + return acquireAvoid().map(resp -> { + responseHeaders.collect(); + var profiles = resp.getProfiles(); + if (profiles != null && profiles.isEmpty() == false) { + collectedProfiles.addAll(profiles); + } + return null; + }); + } + + @Override + public void close() { + refs.close(); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java index 4ebc4af258134..b26fbc7ac4804 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java @@ -29,7 +29,6 @@ import org.elasticsearch.compute.operator.Driver; import org.elasticsearch.compute.operator.DriverProfile; import org.elasticsearch.compute.operator.DriverTaskRunner; -import org.elasticsearch.compute.operator.ResponseHeadersCollector; import org.elasticsearch.compute.operator.exchange.ExchangeService; import org.elasticsearch.compute.operator.exchange.ExchangeSink; import org.elasticsearch.compute.operator.exchange.ExchangeSinkHandler; @@ -81,7 +80,6 @@ import java.util.Set; import java.util.concurrent.Executor; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.function.Supplier; import static org.elasticsearch.xpack.esql.plugin.EsqlPlugin.ESQL_WORKER_THREAD_POOL_NAME; @@ -172,13 +170,16 @@ public void execute( null, null ); - runCompute( - rootTask, - computeContext, - coordinatorPlan, - listener.map(driverProfiles -> new Result(collectedPages, driverProfiles)) - ); - return; + try ( + var computeListener = new ComputeListener( + transportService, + rootTask, + listener.map(r -> new Result(collectedPages, r.getProfiles())) + ) + ) { + runCompute(rootTask, computeContext, coordinatorPlan, computeListener.acquireCompute()); + return; + } } else { if (clusterToConcreteIndices.values().stream().allMatch(v -> v.indices().length == 0)) { var error = "expected concrete indices with data node plan but got empty; data node plan " + dataNodePlan; @@ -191,31 +192,25 @@ public void execute( .groupIndices(SearchRequest.DEFAULT_INDICES_OPTIONS, PlannerUtils.planOriginalIndices(physicalPlan)); var localOriginalIndices = clusterToOriginalIndices.remove(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY); var localConcreteIndices = clusterToConcreteIndices.remove(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY); - final var responseHeadersCollector = new ResponseHeadersCollector(transportService.getThreadPool().getThreadContext()); - listener = ActionListener.runBefore(listener, responseHeadersCollector::finish); - final AtomicBoolean cancelled = new AtomicBoolean(); - final List collectedProfiles = configuration.profile() ? Collections.synchronizedList(new ArrayList<>()) : List.of(); final var exchangeSource = new ExchangeSourceHandler( queryPragmas.exchangeBufferSize(), transportService.getThreadPool().executor(ThreadPool.Names.SEARCH) ); try ( Releasable ignored = exchangeSource.addEmptySink(); - RefCountingListener refs = new RefCountingListener(listener.map(unused -> new Result(collectedPages, collectedProfiles))) + var computeListener = new ComputeListener( + transportService, + rootTask, + listener.map(r -> new Result(collectedPages, r.getProfiles())) + ) ) { // run compute on the coordinator - exchangeSource.addCompletionListener(refs.acquire()); + exchangeSource.addCompletionListener(computeListener.acquireAvoid()); runCompute( rootTask, new ComputeContext(sessionId, RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY, List.of(), configuration, exchangeSource, null), coordinatorPlan, - cancelOnFailure(rootTask, cancelled, refs.acquire()).map(driverProfiles -> { - responseHeadersCollector.collect(); - if (configuration.profile()) { - collectedProfiles.addAll(driverProfiles); - } - return null; - }) + computeListener.acquireCompute() ); // starts computes on data nodes on the main cluster if (localConcreteIndices != null && localConcreteIndices.indices().length > 0) { @@ -228,17 +223,10 @@ public void execute( Set.of(localConcreteIndices.indices()), localOriginalIndices.indices(), exchangeSource, - ActionListener.releaseAfter(refs.acquire(), exchangeSource.addEmptySink()), - () -> cancelOnFailure(rootTask, cancelled, refs.acquire()).map(response -> { - responseHeadersCollector.collect(); - if (configuration.profile()) { - collectedProfiles.addAll(response.getProfiles()); - } - return null; - }) + computeListener ); } - // starts computes on remote cluster + // starts computes on remote clusters startComputeOnRemoteClusters( sessionId, rootTask, @@ -246,13 +234,7 @@ public void execute( dataNodePlan, exchangeSource, getRemoteClusters(clusterToConcreteIndices, clusterToOriginalIndices), - () -> cancelOnFailure(rootTask, cancelled, refs.acquire()).map(response -> { - responseHeadersCollector.collect(); - if (configuration.profile()) { - collectedProfiles.addAll(response.getProfiles()); - } - return null; - }) + computeListener ); } } @@ -288,8 +270,7 @@ private void startComputeOnDataNodes( Set concreteIndices, String[] originalIndices, ExchangeSourceHandler exchangeSource, - ActionListener parentListener, - Supplier> dataNodeListenerSupplier + ComputeListener computeListener ) { var planWithReducer = configuration.pragmas().nodeLevelReduction() == false ? dataNodePlan @@ -303,12 +284,12 @@ private void startComputeOnDataNodes( // Since it's used only for @timestamp, it is relatively safe to assume it's not needed // but it would be better to have a proper impl. QueryBuilder requestFilter = PlannerUtils.requestFilter(planWithReducer, x -> true); + var lookupListener = ActionListener.releaseAfter(computeListener.acquireAvoid(), exchangeSource.addEmptySink()); lookupDataNodes(parentTask, clusterAlias, requestFilter, concreteIndices, originalIndices, ActionListener.wrap(dataNodes -> { - try (RefCountingRunnable refs = new RefCountingRunnable(() -> parentListener.onResponse(null))) { + try (RefCountingListener refs = new RefCountingListener(lookupListener)) { // For each target node, first open a remote exchange on the remote node, then link the exchange source to // the new remote exchange sink, and initialize the computation on the target node via data-node-request. for (DataNode node : dataNodes) { - var dataNodeListener = ActionListener.releaseAfter(dataNodeListenerSupplier.get(), refs.acquire()); var queryPragmas = configuration.pragmas(); ExchangeService.openExchange( transportService, @@ -316,9 +297,10 @@ private void startComputeOnDataNodes( sessionId, queryPragmas.exchangeBufferSize(), esqlExecutor, - dataNodeListener.delegateFailureAndWrap((delegate, unused) -> { + refs.acquire().delegateFailureAndWrap((l, unused) -> { var remoteSink = exchangeService.newRemoteSink(parentTask, sessionId, transportService, node.connection); exchangeSource.addRemoteSink(remoteSink, queryPragmas.concurrentExchangeClients()); + var dataNodeListener = ActionListener.runBefore(computeListener.acquireCompute(), () -> l.onResponse(null)); transportService.sendChildRequest( node.connection, DATA_ACTION_NAME, @@ -332,13 +314,13 @@ private void startComputeOnDataNodes( ), parentTask, TransportRequestOptions.EMPTY, - new ActionListenerResponseHandler<>(delegate, ComputeResponse::new, esqlExecutor) + new ActionListenerResponseHandler<>(dataNodeListener, ComputeResponse::new, esqlExecutor) ); }) ); } } - }, parentListener::onFailure)); + }, lookupListener::onFailure)); } private void startComputeOnRemoteClusters( @@ -348,19 +330,19 @@ private void startComputeOnRemoteClusters( PhysicalPlan plan, ExchangeSourceHandler exchangeSource, List clusters, - Supplier> listener + ComputeListener computeListener ) { - try (RefCountingRunnable refs = new RefCountingRunnable(exchangeSource.addEmptySink()::close)) { + var queryPragmas = configuration.pragmas(); + var linkExchangeListeners = ActionListener.releaseAfter(computeListener.acquireAvoid(), exchangeSource.addEmptySink()); + try (RefCountingListener refs = new RefCountingListener(linkExchangeListeners)) { for (RemoteCluster cluster : clusters) { - var targetNodeListener = ActionListener.releaseAfter(listener.get(), refs.acquire()); - var queryPragmas = configuration.pragmas(); ExchangeService.openExchange( transportService, cluster.connection, sessionId, queryPragmas.exchangeBufferSize(), esqlExecutor, - targetNodeListener.delegateFailureAndWrap((l, unused) -> { + refs.acquire().delegateFailureAndWrap((l, unused) -> { var remoteSink = exchangeService.newRemoteSink(rootTask, sessionId, transportService, cluster.connection); exchangeSource.addRemoteSink(remoteSink, queryPragmas.concurrentExchangeClients()); var clusterRequest = new ClusterComputeRequest( @@ -371,13 +353,14 @@ private void startComputeOnRemoteClusters( cluster.concreteIndices, cluster.originalIndices ); + var clusterListener = ActionListener.runBefore(computeListener.acquireCompute(), () -> l.onResponse(null)); transportService.sendChildRequest( cluster.connection, CLUSTER_ACTION_NAME, clusterRequest, rootTask, TransportRequestOptions.EMPTY, - new ActionListenerResponseHandler<>(l, ComputeResponse::new, esqlExecutor) + new ActionListenerResponseHandler<>(clusterListener, ComputeResponse::new, esqlExecutor) ); }) ); @@ -385,17 +368,7 @@ private void startComputeOnRemoteClusters( } } - private ActionListener cancelOnFailure(CancellableTask task, AtomicBoolean cancelled, ActionListener listener) { - return listener.delegateResponse((l, e) -> { - l.onFailure(e); - if (cancelled.compareAndSet(false, true)) { - LOGGER.debug("cancelling ESQL task {} on failure", task); - transportService.getTaskManager().cancelTaskAndDescendants(task, "cancelled", false, ActionListener.noop()); - } - }); - } - - void runCompute(CancellableTask task, ComputeContext context, PhysicalPlan plan, ActionListener> listener) { + void runCompute(CancellableTask task, ComputeContext context, PhysicalPlan plan, ActionListener listener) { listener = ActionListener.runBefore(listener, () -> Releasables.close(context.searchContexts)); List contexts = new ArrayList<>(context.searchContexts.size()); for (int i = 0; i < context.searchContexts.size(); i++) { @@ -445,9 +418,10 @@ void runCompute(CancellableTask task, ComputeContext context, PhysicalPlan plan, } ActionListener listenerCollectingStatus = listener.map(ignored -> { if (context.configuration.profile()) { - return drivers.stream().map(Driver::profile).toList(); + return new ComputeResponse(drivers.stream().map(Driver::profile).toList()); + } else { + return new ComputeResponse(List.of()); } - return null; }); listenerCollectingStatus = ActionListener.releaseAfter(listenerCollectingStatus, () -> Releasables.close(drivers)); driverRunner.executeDrivers( @@ -612,8 +586,7 @@ private class DataNodeRequestExecutor { private final DataNodeRequest request; private final CancellableTask parentTask; private final ExchangeSinkHandler exchangeSink; - private final ActionListener listener; - private final List driverProfiles; + private final ComputeListener computeListener; private final int maxConcurrentShards; private final ExchangeSink blockingSink; // block until we have completed on all shards or the coordinator has enough data @@ -622,14 +595,12 @@ private class DataNodeRequestExecutor { CancellableTask parentTask, ExchangeSinkHandler exchangeSink, int maxConcurrentShards, - List driverProfiles, - ActionListener listener + ComputeListener computeListener ) { this.request = request; this.parentTask = parentTask; this.exchangeSink = exchangeSink; - this.listener = listener; - this.driverProfiles = driverProfiles; + this.computeListener = computeListener; this.maxConcurrentShards = maxConcurrentShards; this.blockingSink = exchangeSink.createExchangeSink(); } @@ -647,40 +618,46 @@ private void runBatch(int startBatchIndex) { final var sessionId = request.sessionId(); final int endBatchIndex = Math.min(startBatchIndex + maxConcurrentShards, request.shardIds().size()); List shardIds = request.shardIds().subList(startBatchIndex, endBatchIndex); + ActionListener batchListener = new ActionListener<>() { + final ActionListener ref = computeListener.acquireCompute(); + + @Override + public void onResponse(ComputeResponse result) { + try { + onBatchCompleted(endBatchIndex); + } finally { + ref.onResponse(result); + } + } + + @Override + public void onFailure(Exception e) { + try { + exchangeService.finishSinkHandler(request.sessionId(), e); + } finally { + ref.onFailure(e); + } + } + }; acquireSearchContexts(clusterAlias, shardIds, configuration, request.aliasFilters(), ActionListener.wrap(searchContexts -> { assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.SEARCH, ESQL_WORKER_THREAD_POOL_NAME); var computeContext = new ComputeContext(sessionId, clusterAlias, searchContexts, configuration, null, exchangeSink); - runCompute( - parentTask, - computeContext, - request.plan(), - ActionListener.wrap(profiles -> onBatchCompleted(endBatchIndex, profiles), this::onFailure) - ); - }, this::onFailure)); + runCompute(parentTask, computeContext, request.plan(), batchListener); + }, batchListener::onFailure)); } - private void onBatchCompleted(int lastBatchIndex, List batchProfiles) { - if (request.configuration().profile()) { - driverProfiles.addAll(batchProfiles); - } + private void onBatchCompleted(int lastBatchIndex) { if (lastBatchIndex < request.shardIds().size() && exchangeSink.isFinished() == false) { runBatch(lastBatchIndex); } else { - blockingSink.finish(); // don't return until all pages are fetched + var completionListener = computeListener.acquireAvoid(); exchangeSink.addCompletionListener( - ContextPreservingActionListener.wrapPreservingContext( - ActionListener.runBefore(listener, () -> exchangeService.finishSinkHandler(request.sessionId(), null)), - transportService.getThreadPool().getThreadContext() - ) + ActionListener.runAfter(completionListener, () -> exchangeService.finishSinkHandler(request.sessionId(), null)) ); + blockingSink.finish(); } } - - private void onFailure(Exception e) { - exchangeService.finishSinkHandler(request.sessionId(), e); - listener.onFailure(e); - } } private void runComputeOnDataNode( @@ -688,17 +665,10 @@ private void runComputeOnDataNode( String externalId, PhysicalPlan reducePlan, DataNodeRequest request, - ActionListener listener + ComputeListener computeListener ) { - final List collectedProfiles = request.configuration().profile() - ? Collections.synchronizedList(new ArrayList<>()) - : List.of(); - final var responseHeadersCollector = new ResponseHeadersCollector(transportService.getThreadPool().getThreadContext()); - final RefCountingListener listenerRefs = new RefCountingListener( - ActionListener.runBefore(listener.map(unused -> new ComputeResponse(collectedProfiles)), responseHeadersCollector::finish) - ); + var parentListener = computeListener.acquireAvoid(); try { - final AtomicBoolean cancelled = new AtomicBoolean(); // run compute with target shards var internalSink = exchangeService.createSinkHandler(request.sessionId(), request.pragmas().exchangeBufferSize()); DataNodeRequestExecutor dataNodeRequestExecutor = new DataNodeRequestExecutor( @@ -706,17 +676,16 @@ private void runComputeOnDataNode( task, internalSink, request.configuration().pragmas().maxConcurrentShardsPerNode(), - collectedProfiles, - ActionListener.runBefore(cancelOnFailure(task, cancelled, listenerRefs.acquire()), responseHeadersCollector::collect) + computeListener ); dataNodeRequestExecutor.start(); // run the node-level reduction var externalSink = exchangeService.getSinkHandler(externalId); task.addListener(() -> exchangeService.finishSinkHandler(externalId, new TaskCancelledException(task.getReasonCancelled()))); var exchangeSource = new ExchangeSourceHandler(1, esqlExecutor); - exchangeSource.addCompletionListener(listenerRefs.acquire()); + exchangeSource.addCompletionListener(computeListener.acquireAvoid()); exchangeSource.addRemoteSink(internalSink::fetchPageAsync, 1); - ActionListener reductionListener = cancelOnFailure(task, cancelled, listenerRefs.acquire()); + ActionListener reductionListener = computeListener.acquireCompute(); runCompute( task, new ComputeContext( @@ -728,26 +697,22 @@ private void runComputeOnDataNode( externalSink ), reducePlan, - ActionListener.wrap(driverProfiles -> { - responseHeadersCollector.collect(); - if (request.configuration().profile()) { - collectedProfiles.addAll(driverProfiles); - } + ActionListener.wrap(resp -> { // don't return until all pages are fetched - externalSink.addCompletionListener( - ActionListener.runBefore(reductionListener, () -> exchangeService.finishSinkHandler(externalId, null)) - ); + externalSink.addCompletionListener(ActionListener.running(() -> { + exchangeService.finishSinkHandler(externalId, null); + reductionListener.onResponse(resp); + })); }, e -> { exchangeService.finishSinkHandler(externalId, e); reductionListener.onFailure(e); }) ); + parentListener.onResponse(null); } catch (Exception e) { exchangeService.finishSinkHandler(externalId, e); exchangeService.finishSinkHandler(request.sessionId(), e); - listenerRefs.acquire().onFailure(e); - } finally { - listenerRefs.close(); + parentListener.onFailure(e); } } @@ -784,7 +749,9 @@ public void messageReceived(DataNodeRequest request, TransportChannel channel, T request.aliasFilters(), request.plan() ); - runComputeOnDataNode((CancellableTask) task, sessionId, reducePlan, request, listener); + try (var computeListener = new ComputeListener(transportService, (CancellableTask) task, listener)) { + runComputeOnDataNode((CancellableTask) task, sessionId, reducePlan, request, computeListener); + } } } @@ -798,16 +765,18 @@ public void messageReceived(ClusterComputeRequest request, TransportChannel chan listener.onFailure(new IllegalStateException("expected exchange sink for a remote compute; got " + request.plan())); return; } - runComputeOnRemoteCluster( - request.clusterAlias(), - request.sessionId(), - (CancellableTask) task, - request.configuration(), - (ExchangeSinkExec) request.plan(), - Set.of(request.indices()), - request.originalIndices(), - listener - ); + try (var computeListener = new ComputeListener(transportService, (CancellableTask) task, listener)) { + runComputeOnRemoteCluster( + request.clusterAlias(), + request.sessionId(), + (CancellableTask) task, + request.configuration(), + (ExchangeSinkExec) request.plan(), + Set.of(request.indices()), + request.originalIndices(), + computeListener + ); + } } } @@ -828,28 +797,20 @@ void runComputeOnRemoteCluster( ExchangeSinkExec plan, Set concreteIndices, String[] originalIndices, - ActionListener listener + ComputeListener computeListener ) { final var exchangeSink = exchangeService.getSinkHandler(globalSessionId); parentTask.addListener( () -> exchangeService.finishSinkHandler(globalSessionId, new TaskCancelledException(parentTask.getReasonCancelled())) ); - ThreadPool threadPool = transportService.getThreadPool(); - final var responseHeadersCollector = new ResponseHeadersCollector(threadPool.getThreadContext()); - listener = ActionListener.runBefore(listener, responseHeadersCollector::finish); - final AtomicBoolean cancelled = new AtomicBoolean(); - final List collectedProfiles = configuration.profile() ? Collections.synchronizedList(new ArrayList<>()) : List.of(); final String localSessionId = clusterAlias + ":" + globalSessionId; var exchangeSource = new ExchangeSourceHandler( configuration.pragmas().exchangeBufferSize(), transportService.getThreadPool().executor(ThreadPool.Names.SEARCH) ); - try ( - Releasable ignored = exchangeSource.addEmptySink(); - RefCountingListener refs = new RefCountingListener(listener.map(unused -> new ComputeResponse(collectedProfiles))) - ) { - exchangeSink.addCompletionListener(refs.acquire()); - exchangeSource.addCompletionListener(refs.acquire()); + try (Releasable ignored = exchangeSource.addEmptySink()) { + exchangeSink.addCompletionListener(computeListener.acquireAvoid()); + exchangeSource.addCompletionListener(computeListener.acquireAvoid()); PhysicalPlan coordinatorPlan = new ExchangeSinkExec( plan.source(), plan.output(), @@ -860,13 +821,7 @@ void runComputeOnRemoteCluster( parentTask, new ComputeContext(localSessionId, clusterAlias, List.of(), configuration, exchangeSource, exchangeSink), coordinatorPlan, - cancelOnFailure(parentTask, cancelled, refs.acquire()).map(driverProfiles -> { - responseHeadersCollector.collect(); - if (configuration.profile()) { - collectedProfiles.addAll(driverProfiles); - } - return null; - }) + computeListener.acquireCompute() ); startComputeOnDataNodes( localSessionId, @@ -877,14 +832,7 @@ void runComputeOnRemoteCluster( concreteIndices, originalIndices, exchangeSource, - ActionListener.releaseAfter(refs.acquire(), exchangeSource.addEmptySink()), - () -> cancelOnFailure(parentTask, cancelled, refs.acquire()).map(r -> { - responseHeadersCollector.collect(); - if (configuration.profile()) { - collectedProfiles.addAll(r.getProfiles()); - } - return null; - }) + computeListener ); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 1f2ec0c236ecf..edf05a3d1f281 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -631,30 +631,6 @@ public void testRenameReuseAlias() { """, "_meta_field", "e", "gender", "job", "job.raw", "languages", "last_name", "long_noidx", "salary"); } - public void testRenameUnsupportedField() { - assertProjectionWithMapping(""" - from test - | rename unsupported as u - | keep int, u, float - """, "mapping-multi-field-variation.json", "int", "u", "float"); - } - - public void testRenameUnsupportedFieldChained() { - assertProjectionWithMapping(""" - from test - | rename unsupported as u1, u1 as u2 - | keep int, u2, float - """, "mapping-multi-field-variation.json", "int", "u2", "float"); - } - - public void testRenameUnsupportedAndResolved() { - assertProjectionWithMapping(""" - from test - | rename unsupported as u, float as f - | keep int, u, f - """, "mapping-multi-field-variation.json", "int", "u", "f"); - } - public void testRenameUnsupportedSubFieldAndResolved() { assertProjectionWithMapping(""" from test @@ -1539,7 +1515,7 @@ public void testEnrichWrongMatchFieldType() { | enrich languages on x | keep first_name, language_name, id """)); - assertThat(e.getMessage(), containsString("Unsupported type [BOOLEAN] for enrich matching field [x]; only [KEYWORD,")); + assertThat(e.getMessage(), containsString("Unsupported type [boolean] for enrich matching field [x]; only [keyword, ")); e = expectThrows(VerificationException.class, () -> analyze(""" FROM airports @@ -1547,7 +1523,7 @@ public void testEnrichWrongMatchFieldType() { | ENRICH city_boundaries ON x | KEEP abbrev, airport, region """, "airports", "mapping-airports.json")); - assertThat(e.getMessage(), containsString("Unsupported type [KEYWORD] for enrich matching field [x]; only [GEO_POINT,")); + assertThat(e.getMessage(), containsString("Unsupported type [keyword] for enrich matching field [x]; only [geo_point, ")); } public void testValidEnrich() { @@ -1830,7 +1806,7 @@ public void testUnsupportedTypesInStats() { Found 8 problems line 2:12: argument of [avg(x)] must be [numeric except unsigned_long or counter types],\ found value [x] type [unsigned_long] - line 2:20: argument of [count_distinct(x)] must be [any exact type except unsigned_long or counter types],\ + line 2:20: argument of [count_distinct(x)] must be [any exact type except unsigned_long, _source, or counter types],\ found value [x] type [unsigned_long] line 2:39: argument of [max(x)] must be [datetime or numeric except unsigned_long or counter types],\ found value [max(x)] type [unsigned_long] diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index ad08130c5b0d9..988f66fa2ef79 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -10,14 +10,23 @@ import org.elasticsearch.Build; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.VerificationException; +import org.elasticsearch.xpack.esql.core.index.EsIndex; +import org.elasticsearch.xpack.esql.core.index.IndexResolution; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.type.EsField; +import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; +import org.elasticsearch.xpack.esql.core.type.UnsupportedEsField; import org.elasticsearch.xpack.esql.parser.EsqlParser; import org.elasticsearch.xpack.esql.parser.QueryParam; import org.elasticsearch.xpack.esql.parser.QueryParams; import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; +import java.util.Map; +import java.util.Set; import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.loadMapping; @@ -46,6 +55,186 @@ public void testIncompatibleTypesInMathOperation() { ); } + public void testUnsupportedAndMultiTypedFields() { + final String unsupported = "unsupported"; + final String multiTyped = "multi_typed"; + + EsField unsupportedField = new UnsupportedEsField(unsupported, "flattened"); + // Use linked maps/sets to fix the order in the error message. + LinkedHashSet ipIndices = new LinkedHashSet<>(); + ipIndices.add("test1"); + ipIndices.add("test2"); + LinkedHashMap> typesToIndices = new LinkedHashMap<>(); + typesToIndices.put("ip", ipIndices); + typesToIndices.put("keyword", Set.of("test3")); + EsField multiTypedField = new InvalidMappedField(multiTyped, typesToIndices); + + // Also add an unsupported/multityped field under the names `int` and `double` so we can use `LOOKUP int_number_names ...` and + // `LOOKUP double_number_names` without renaming the fields first. + IndexResolution indexWithUnsupportedAndMultiTypedField = IndexResolution.valid( + new EsIndex( + "test*", + Map.of(unsupported, unsupportedField, multiTyped, multiTypedField, "int", unsupportedField, "double", multiTypedField) + ) + ); + Analyzer analyzer = AnalyzerTestUtils.analyzer(indexWithUnsupportedAndMultiTypedField); + + assertEquals( + "1:22: Cannot use field [unsupported] with unsupported type [flattened]", + error("from test* | dissect unsupported \"%{foo}\"", analyzer) + ); + assertEquals( + "1:22: Cannot use field [multi_typed] due to ambiguities being mapped as [2] incompatible types:" + + " [ip] in [test1, test2], [keyword] in [test3]", + error("from test* | dissect multi_typed \"%{foo}\"", analyzer) + ); + + assertEquals( + "1:19: Cannot use field [unsupported] with unsupported type [flattened]", + error("from test* | grok unsupported \"%{WORD:foo}\"", analyzer) + ); + assertEquals( + "1:19: Cannot use field [multi_typed] due to ambiguities being mapped as [2] incompatible types:" + + " [ip] in [test1, test2], [keyword] in [test3]", + error("from test* | grok multi_typed \"%{WORD:foo}\"", analyzer) + ); + + assertEquals( + "1:36: Cannot use field [unsupported] with unsupported type [flattened]", + error("from test* | enrich client_cidr on unsupported", analyzer) + ); + assertEquals( + "1:36: Unsupported type [unsupported] for enrich matching field [multi_typed];" + + " only [keyword, text, ip, long, integer, float, double, datetime] allowed for type [range]", + error("from test* | enrich client_cidr on multi_typed", analyzer) + ); + + assertEquals( + "1:23: Cannot use field [unsupported] with unsupported type [flattened]", + error("from test* | eval x = unsupported", analyzer) + ); + assertEquals( + "1:23: Cannot use field [multi_typed] due to ambiguities being mapped as [2] incompatible types:" + + " [ip] in [test1, test2], [keyword] in [test3]", + error("from test* | eval x = multi_typed", analyzer) + ); + + assertEquals( + "1:32: Cannot use field [unsupported] with unsupported type [flattened]", + error("from test* | eval x = to_lower(unsupported)", analyzer) + ); + assertEquals( + "1:32: Cannot use field [multi_typed] due to ambiguities being mapped as [2] incompatible types:" + + " [ip] in [test1, test2], [keyword] in [test3]", + error("from test* | eval x = to_lower(multi_typed)", analyzer) + ); + + assertEquals( + "1:32: Cannot use field [unsupported] with unsupported type [flattened]", + error("from test* | stats count(1) by unsupported", analyzer) + ); + assertEquals( + "1:32: Cannot use field [multi_typed] due to ambiguities being mapped as [2] incompatible types:" + + " [ip] in [test1, test2], [keyword] in [test3]", + error("from test* | stats count(1) by multi_typed", analyzer) + ); + if (Build.current().isSnapshot()) { + assertEquals( + "1:38: Cannot use field [unsupported] with unsupported type [flattened]", + error("from test* | inlinestats count(1) by unsupported", analyzer) + ); + assertEquals( + "1:38: Cannot use field [multi_typed] due to ambiguities being mapped as [2] incompatible types:" + + " [ip] in [test1, test2], [keyword] in [test3]", + error("from test* | inlinestats count(1) by multi_typed", analyzer) + ); + } + + assertEquals( + "1:27: Cannot use field [unsupported] with unsupported type [flattened]", + error("from test* | stats values(unsupported)", analyzer) + ); + assertEquals( + "1:27: Cannot use field [multi_typed] due to ambiguities being mapped as [2] incompatible types:" + + " [ip] in [test1, test2], [keyword] in [test3]", + error("from test* | stats values(multi_typed)", analyzer) + ); + if (Build.current().isSnapshot()) { + assertEquals( + "1:33: Cannot use field [unsupported] with unsupported type [flattened]", + error("from test* | inlinestats values(unsupported)", analyzer) + ); + assertEquals( + "1:33: Cannot use field [multi_typed] due to ambiguities being mapped as [2] incompatible types:" + + " [ip] in [test1, test2], [keyword] in [test3]", + error("from test* | inlinestats values(multi_typed)", analyzer) + ); + } + + assertEquals( + "1:27: Cannot use field [unsupported] with unsupported type [flattened]", + error("from test* | stats values(unsupported)", analyzer) + ); + assertEquals( + "1:27: Cannot use field [multi_typed] due to ambiguities being mapped as [2] incompatible types:" + + " [ip] in [test1, test2], [keyword] in [test3]", + error("from test* | stats values(multi_typed)", analyzer) + ); + + if (Build.current().isSnapshot()) { + // LOOKUP with unsupported type + assertEquals( + "1:41: column type mismatch, table column was [integer] and original column was [unsupported]", + error("from test* | lookup int_number_names on int", analyzer) + ); + // LOOKUP with multi-typed field + assertEquals( + "1:44: column type mismatch, table column was [double] and original column was [unsupported]", + error("from test* | lookup double_number_names on double", analyzer) + ); + } + + assertEquals( + "1:24: Cannot use field [unsupported] with unsupported type [flattened]", + error("from test* | mv_expand unsupported", analyzer) + ); + assertEquals( + "1:24: Cannot use field [multi_typed] due to ambiguities being mapped as [2] incompatible types:" + + " [ip] in [test1, test2], [keyword] in [test3]", + error("from test* | mv_expand multi_typed", analyzer) + ); + + assertEquals( + "1:21: Cannot use field [unsupported] with unsupported type [flattened]", + error("from test* | rename unsupported as x", analyzer) + ); + assertEquals( + "1:21: Cannot use field [multi_typed] due to ambiguities being mapped as [2] incompatible types:" + + " [ip] in [test1, test2], [keyword] in [test3]", + error("from test* | rename multi_typed as x", analyzer) + ); + + assertEquals( + "1:19: Cannot use field [unsupported] with unsupported type [flattened]", + error("from test* | sort unsupported asc", analyzer) + ); + assertEquals( + "1:19: Cannot use field [multi_typed] due to ambiguities being mapped as [2] incompatible types:" + + " [ip] in [test1, test2], [keyword] in [test3]", + error("from test* | sort multi_typed desc", analyzer) + ); + + assertEquals( + "1:20: Cannot use field [unsupported] with unsupported type [flattened]", + error("from test* | where unsupported is null", analyzer) + ); + assertEquals( + "1:20: Cannot use field [multi_typed] due to ambiguities being mapped as [2] incompatible types:" + + " [ip] in [test1, test2], [keyword] in [test3]", + error("from test* | where multi_typed is not null", analyzer) + ); + } + public void testRoundFunctionInvalidInputs() { assertEquals( "1:31: first argument of [round(b, 3)] must be [numeric], found value [b] type [keyword]", diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 6a9e7a4000734..669de17891583 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -12,6 +12,7 @@ import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.compute.aggregation.QuantileStates; import org.elasticsearch.core.Tuple; +import org.elasticsearch.dissect.DissectParser; import org.elasticsearch.index.IndexMode; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.EsqlTestUtils; @@ -21,6 +22,7 @@ import org.elasticsearch.xpack.esql.analysis.AnalyzerContext; import org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils; import org.elasticsearch.xpack.esql.analysis.EnrichResolution; +import org.elasticsearch.xpack.esql.core.common.Failures; import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; @@ -42,6 +44,7 @@ import org.elasticsearch.xpack.esql.core.expression.predicate.regex.WildcardPattern; import org.elasticsearch.xpack.esql.core.index.EsIndex; import org.elasticsearch.xpack.esql.core.index.IndexResolution; +import org.elasticsearch.xpack.esql.core.optimizer.OptimizerRules; import org.elasticsearch.xpack.esql.core.plan.logical.Filter; import org.elasticsearch.xpack.esql.core.plan.logical.Limit; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; @@ -71,6 +74,7 @@ import org.elasticsearch.xpack.esql.expression.function.aggregate.Values; import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString; import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateExtract; @@ -114,9 +118,13 @@ import org.elasticsearch.xpack.esql.optimizer.rules.LiteralsOnTheRight; import org.elasticsearch.xpack.esql.optimizer.rules.PushDownAndCombineFilters; import org.elasticsearch.xpack.esql.optimizer.rules.PushDownAndCombineLimits; +import org.elasticsearch.xpack.esql.optimizer.rules.PushDownEnrich; +import org.elasticsearch.xpack.esql.optimizer.rules.PushDownEval; +import org.elasticsearch.xpack.esql.optimizer.rules.PushDownRegexExtract; import org.elasticsearch.xpack.esql.optimizer.rules.SplitInWithFoldableValue; import org.elasticsearch.xpack.esql.parser.EsqlParser; import org.elasticsearch.xpack.esql.parser.ParsingException; +import org.elasticsearch.xpack.esql.plan.GeneratingPlan; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Dissect; import org.elasticsearch.xpack.esql.plan.logical.Enrich; @@ -140,6 +148,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.function.BiFunction; import java.util.function.Function; import static java.util.Arrays.asList; @@ -157,6 +166,7 @@ import static org.elasticsearch.xpack.esql.EsqlTestUtils.getFieldAttribute; import static org.elasticsearch.xpack.esql.EsqlTestUtils.loadMapping; import static org.elasticsearch.xpack.esql.EsqlTestUtils.localSource; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.referenceAttribute; import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning; import static org.elasticsearch.xpack.esql.analysis.Analyzer.NO_FIELDS; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.analyze; @@ -188,6 +198,7 @@ import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.emptyArray; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.everyItem; import static org.hamcrest.Matchers.hasItem; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; @@ -1021,7 +1032,7 @@ public void testPushDownDissectPastProject() { var keep = as(plan, Project.class); var dissect = as(keep.child(), Dissect.class); - assertThat(dissect.extractedFields(), contains(new ReferenceAttribute(Source.EMPTY, "y", DataType.KEYWORD))); + assertThat(dissect.extractedFields(), contains(referenceAttribute("y", DataType.KEYWORD))); } public void testPushDownGrokPastProject() { @@ -1034,7 +1045,7 @@ public void testPushDownGrokPastProject() { var keep = as(plan, Project.class); var grok = as(keep.child(), Grok.class); - assertThat(grok.extractedFields(), contains(new ReferenceAttribute(Source.EMPTY, "y", DataType.KEYWORD))); + assertThat(grok.extractedFields(), contains(referenceAttribute("y", DataType.KEYWORD))); } public void testPushDownFilterPastProjectUsingEval() { @@ -4254,6 +4265,210 @@ public void testPushdownWithOverwrittenName() { } } + record PushdownShadowingGeneratingPlanTestCase( + BiFunction applyLogicalPlan, + OptimizerRules.OptimizerRule rule + ) {}; + + static PushdownShadowingGeneratingPlanTestCase[] PUSHDOWN_SHADOWING_GENERATING_PLAN_TEST_CASES = { + // | EVAL y = to_integer(x), y = y + 1 + new PushdownShadowingGeneratingPlanTestCase((plan, attr) -> { + Alias y1 = new Alias(EMPTY, "y", new ToInteger(EMPTY, attr)); + Alias y2 = new Alias(EMPTY, "y", new Add(EMPTY, y1.toAttribute(), new Literal(EMPTY, 1, INTEGER))); + return new Eval(EMPTY, plan, List.of(y1, y2)); + }, new PushDownEval()), + // | DISSECT x "%{y} %{y}" + new PushdownShadowingGeneratingPlanTestCase( + (plan, attr) -> new Dissect( + EMPTY, + plan, + attr, + new Dissect.Parser("%{y} %{y}", ",", new DissectParser("%{y} %{y}", ",")), + List.of(new ReferenceAttribute(EMPTY, "y", KEYWORD), new ReferenceAttribute(EMPTY, "y", KEYWORD)) + ), + new PushDownRegexExtract() + ), + // | GROK x "%{WORD:y} %{WORD:y}" + new PushdownShadowingGeneratingPlanTestCase( + (plan, attr) -> new Grok(EMPTY, plan, attr, Grok.pattern(EMPTY, "%{WORD:y} %{WORD:y}")), + new PushDownRegexExtract() + ), + // | ENRICH some_policy ON x WITH y = some_enrich_idx_field, y = some_other_enrich_idx_field + new PushdownShadowingGeneratingPlanTestCase( + (plan, attr) -> new Enrich( + EMPTY, + plan, + Enrich.Mode.ANY, + new Literal(EMPTY, "some_policy", KEYWORD), + attr, + null, + Map.of(), + List.of( + new Alias(EMPTY, "y", new ReferenceAttribute(EMPTY, "some_enrich_idx_field", KEYWORD)), + new Alias(EMPTY, "y", new ReferenceAttribute(EMPTY, "some_other_enrich_idx_field", KEYWORD)) + ) + ), + new PushDownEnrich() + ) }; + + /** + * Consider + * + * Eval[[TO_INTEGER(x{r}#2) AS y, y{r}#4 + 1[INTEGER] AS y]] + * \_Project[[y{r}#3, x{r}#2]] + * \_Row[[1[INTEGER] AS x, 2[INTEGER] AS y]] + * + * We can freely push down the Eval without renaming, but need to update the Project's references. + * + * Project[[x{r}#2, y{r}#6 AS y]] + * \_Eval[[TO_INTEGER(x{r}#2) AS y, y{r}#4 + 1[INTEGER] AS y]] + * \_Row[[1[INTEGER] AS x, 2[INTEGER] AS y]] + * + * And similarly for dissect, grok and enrich. + */ + public void testPushShadowingGeneratingPlanPastProject() { + Alias x = new Alias(EMPTY, "x", new Literal(EMPTY, "1", KEYWORD)); + Alias y = new Alias(EMPTY, "y", new Literal(EMPTY, "2", KEYWORD)); + LogicalPlan initialRow = new Row(EMPTY, List.of(x, y)); + LogicalPlan initialProject = new Project(EMPTY, initialRow, List.of(y.toAttribute(), x.toAttribute())); + + for (PushdownShadowingGeneratingPlanTestCase testCase : PUSHDOWN_SHADOWING_GENERATING_PLAN_TEST_CASES) { + LogicalPlan initialPlan = testCase.applyLogicalPlan.apply(initialProject, x.toAttribute()); + @SuppressWarnings("unchecked") + List initialGeneratedExprs = ((GeneratingPlan) initialPlan).generatedAttributes(); + LogicalPlan optimizedPlan = testCase.rule.apply(initialPlan); + + Failures inconsistencies = LogicalVerifier.INSTANCE.verify(optimizedPlan); + assertFalse(inconsistencies.hasFailures()); + + Project project = as(optimizedPlan, Project.class); + LogicalPlan pushedDownGeneratingPlan = project.child(); + + List projections = project.projections(); + @SuppressWarnings("unchecked") + List newGeneratedExprs = ((GeneratingPlan) pushedDownGeneratingPlan).generatedAttributes(); + assertEquals(newGeneratedExprs, initialGeneratedExprs); + // The rightmost generated attribute makes it into the final output as "y". + Attribute rightmostGenerated = newGeneratedExprs.get(newGeneratedExprs.size() - 1); + + assertThat(Expressions.names(projections), contains("x", "y")); + assertThat(projections, everyItem(instanceOf(ReferenceAttribute.class))); + ReferenceAttribute yShadowed = as(projections.get(1), ReferenceAttribute.class); + assertTrue(yShadowed.semanticEquals(rightmostGenerated)); + } + } + + /** + * Consider + * + * Eval[[TO_INTEGER(x{r}#2) AS y, y{r}#4 + 1[INTEGER] AS y]] + * \_Project[[x{r}#2, y{r}#3, y{r}#3 AS z]] + * \_Row[[1[INTEGER] AS x, 2[INTEGER] AS y]] + * + * To push down the Eval, we must not shadow the reference y{r}#3, so we rename. + * + * Project[[x{r}#2, y{r}#3 AS z, $$y$temp_name$10{r}#12 AS y]] + * Eval[[TO_INTEGER(x{r}#2) AS $$y$temp_name$10, $$y$temp_name$10{r}#11 + 1[INTEGER] AS $$y$temp_name$10]] + * \_Row[[1[INTEGER] AS x, 2[INTEGER] AS y]] + * + * And similarly for dissect, grok and enrich. + */ + public void testPushShadowingGeneratingPlanPastRenamingProject() { + Alias x = new Alias(EMPTY, "x", new Literal(EMPTY, "1", KEYWORD)); + Alias y = new Alias(EMPTY, "y", new Literal(EMPTY, "2", KEYWORD)); + LogicalPlan initialRow = new Row(EMPTY, List.of(x, y)); + LogicalPlan initialProject = new Project( + EMPTY, + initialRow, + List.of(x.toAttribute(), y.toAttribute(), new Alias(EMPTY, "z", y.toAttribute())) + ); + + for (PushdownShadowingGeneratingPlanTestCase testCase : PUSHDOWN_SHADOWING_GENERATING_PLAN_TEST_CASES) { + LogicalPlan initialPlan = testCase.applyLogicalPlan.apply(initialProject, x.toAttribute()); + @SuppressWarnings("unchecked") + List initialGeneratedExprs = ((GeneratingPlan) initialPlan).generatedAttributes(); + LogicalPlan optimizedPlan = testCase.rule.apply(initialPlan); + + Failures inconsistencies = LogicalVerifier.INSTANCE.verify(optimizedPlan); + assertFalse(inconsistencies.hasFailures()); + + Project project = as(optimizedPlan, Project.class); + LogicalPlan pushedDownGeneratingPlan = project.child(); + + List projections = project.projections(); + @SuppressWarnings("unchecked") + List newGeneratedExprs = ((GeneratingPlan) pushedDownGeneratingPlan).generatedAttributes(); + List newNames = Expressions.names(newGeneratedExprs); + assertThat(newNames.size(), equalTo(initialGeneratedExprs.size())); + assertThat(newNames, everyItem(startsWith("$$y$temp_name$"))); + // The rightmost generated attribute makes it into the final output as "y". + Attribute rightmostGeneratedWithNewName = newGeneratedExprs.get(newGeneratedExprs.size() - 1); + + assertThat(Expressions.names(projections), contains("x", "z", "y")); + assertThat(projections.get(0), instanceOf(ReferenceAttribute.class)); + Alias zAlias = as(projections.get(1), Alias.class); + ReferenceAttribute yRenamed = as(zAlias.child(), ReferenceAttribute.class); + assertEquals(yRenamed.name(), "y"); + Alias yAlias = as(projections.get(2), Alias.class); + ReferenceAttribute yTempRenamed = as(yAlias.child(), ReferenceAttribute.class); + assertTrue(yTempRenamed.semanticEquals(rightmostGeneratedWithNewName)); + } + } + + /** + * Consider + * + * Eval[[TO_INTEGER(x{r}#2) AS y, y{r}#3 + 1[INTEGER] AS y]] + * \_Project[[y{r}#1, y{r}#1 AS x]] + * \_Row[[2[INTEGER] AS y]] + * + * To push down the Eval, we must not shadow the reference y{r}#1, so we rename. + * Additionally, the rename "y AS x" needs to be propagated into the Eval. + * + * Project[[y{r}#1 AS x, $$y$temp_name$10{r}#12 AS y]] + * Eval[[TO_INTEGER(y{r}#1) AS $$y$temp_name$10, $$y$temp_name$10{r}#11 + 1[INTEGER] AS $$y$temp_name$10]] + * \_Row[[2[INTEGER] AS y]] + * + * And similarly for dissect, grok and enrich. + */ + public void testPushShadowingGeneratingPlanPastRenamingProjectWithResolution() { + Alias y = new Alias(EMPTY, "y", new Literal(EMPTY, "2", KEYWORD)); + Alias yAliased = new Alias(EMPTY, "x", y.toAttribute()); + LogicalPlan initialRow = new Row(EMPTY, List.of(y)); + LogicalPlan initialProject = new Project(EMPTY, initialRow, List.of(y.toAttribute(), yAliased)); + + for (PushdownShadowingGeneratingPlanTestCase testCase : PUSHDOWN_SHADOWING_GENERATING_PLAN_TEST_CASES) { + LogicalPlan initialPlan = testCase.applyLogicalPlan.apply(initialProject, yAliased.toAttribute()); + @SuppressWarnings("unchecked") + List initialGeneratedExprs = ((GeneratingPlan) initialPlan).generatedAttributes(); + LogicalPlan optimizedPlan = testCase.rule.apply(initialPlan); + + // This ensures that our generating plan doesn't use invalid references, resp. that any rename from the Project has + // been propagated into the generating plan. + Failures inconsistencies = LogicalVerifier.INSTANCE.verify(optimizedPlan); + assertFalse(inconsistencies.hasFailures()); + + Project project = as(optimizedPlan, Project.class); + LogicalPlan pushedDownGeneratingPlan = project.child(); + + List projections = project.projections(); + @SuppressWarnings("unchecked") + List newGeneratedExprs = ((GeneratingPlan) pushedDownGeneratingPlan).generatedAttributes(); + List newNames = Expressions.names(newGeneratedExprs); + assertThat(newNames.size(), equalTo(initialGeneratedExprs.size())); + assertThat(newNames, everyItem(startsWith("$$y$temp_name$"))); + // The rightmost generated attribute makes it into the final output as "y". + Attribute rightmostGeneratedWithNewName = newGeneratedExprs.get(newGeneratedExprs.size() - 1); + + assertThat(Expressions.names(projections), contains("x", "y")); + Alias yRenamed = as(projections.get(0), Alias.class); + assertTrue(yRenamed.child().semanticEquals(y.toAttribute())); + Alias yTempRenamed = as(projections.get(1), Alias.class); + ReferenceAttribute yTemp = as(yTempRenamed.child(), ReferenceAttribute.class); + assertTrue(yTemp.semanticEquals(rightmostGeneratedWithNewName)); + } + } + /** * Expects * Project[[min{r}#4, languages{f}#11]] @@ -5477,6 +5692,58 @@ METRICS k8s avg(round(1.05 * rate(network.total_bytes_in))) BY bucket(@timestamp assertThat(Expressions.attribute(values.field()).name(), equalTo("cluster")); } + public void testMetricsWithoutRate() { + assumeTrue("requires snapshot builds", Build.current().isSnapshot()); + List queries = List.of(""" + METRICS k8s count(to_long(network.total_bytes_in)) BY bucket(@timestamp, 1 minute) + | LIMIT 10 + """, """ + METRICS k8s | STATS count(to_long(network.total_bytes_in)) BY bucket(@timestamp, 1 minute) + | LIMIT 10 + """, """ + FROM k8s | STATS count(to_long(network.total_bytes_in)) BY bucket(@timestamp, 1 minute) + | LIMIT 10 + """); + List plans = new ArrayList<>(); + for (String query : queries) { + var plan = logicalOptimizer.optimize(metricsAnalyzer.analyze(parser.createStatement(query))); + plans.add(plan); + } + for (LogicalPlan plan : plans) { + Limit limit = as(plan, Limit.class); + Aggregate aggregate = as(limit.child(), Aggregate.class); + assertThat(aggregate.aggregateType(), equalTo(Aggregate.AggregateType.STANDARD)); + assertThat(aggregate.aggregates(), hasSize(2)); + assertThat(aggregate.groupings(), hasSize(1)); + Eval eval = as(aggregate.child(), Eval.class); + assertThat(eval.fields(), hasSize(2)); + assertThat(Alias.unwrap(eval.fields().get(0)), instanceOf(Bucket.class)); + assertThat(Alias.unwrap(eval.fields().get(1)), instanceOf(ToLong.class)); + EsRelation relation = as(eval.child(), EsRelation.class); + assertThat(relation.indexMode(), equalTo(IndexMode.STANDARD)); + } + // TODO: Unmute this part + // https://github.com/elastic/elasticsearch/issues/110827 + // for (int i = 1; i < plans.size(); i++) { + // assertThat(plans.get(i), equalTo(plans.get(0))); + // } + } + + public void testRateInStats() { + assumeTrue("requires snapshot builds", Build.current().isSnapshot()); + var query = """ + METRICS k8s | STATS max(rate(network.total_bytes_in)) BY bucket(@timestamp, 1 minute) + | LIMIT 10 + """; + VerificationException error = expectThrows( + VerificationException.class, + () -> logicalOptimizer.optimize(metricsAnalyzer.analyze(parser.createStatement(query))) + ); + assertThat(error.getMessage(), equalTo(""" + Found 1 problem + line 1:25: the rate aggregate[rate(network.total_bytes_in)] can only be used within the metrics command""")); + } + private Literal nullOf(DataType dataType) { return new Literal(Source.EMPTY, null, dataType); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java index 545f3efe8ca79..63204b4dd797d 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java @@ -10,7 +10,6 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.VerificationException; import org.elasticsearch.xpack.esql.core.expression.Literal; -import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.expression.UnresolvedAttribute; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -55,10 +54,6 @@ static UnresolvedAttribute attribute(String name) { return new UnresolvedAttribute(EMPTY, name); } - static ReferenceAttribute referenceAttribute(String name, DataType type) { - return new ReferenceAttribute(EMPTY, name, type); - } - static Literal integer(int i) { return new Literal(EMPTY, i, DataType.INTEGER); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java index 2e2ca4feafa41..a195fb8180bf3 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java @@ -53,6 +53,7 @@ import java.util.function.Function; import static org.elasticsearch.xpack.esql.EsqlTestUtils.as; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.referenceAttribute; import static org.elasticsearch.xpack.esql.core.expression.Literal.FALSE; import static org.elasticsearch.xpack.esql.core.expression.Literal.TRUE; import static org.elasticsearch.xpack.esql.core.tree.Source.EMPTY; @@ -103,6 +104,17 @@ public void testRowCommandHugeInt() { ); } + public void testRowCommandHugeNegativeInt() { + assertEquals( + new Row(EMPTY, List.of(new Alias(EMPTY, "c", literalDouble(-92233720368547758080d)))), + statement("row c = -92233720368547758080") + ); + assertEquals( + new Row(EMPTY, List.of(new Alias(EMPTY, "c", literalDouble(-18446744073709551616d)))), + statement("row c = -18446744073709551616") + ); + } + public void testRowCommandDouble() { assertEquals(new Row(EMPTY, List.of(new Alias(EMPTY, "c", literalDouble(1.0)))), statement("row c = 1.0")); } @@ -758,15 +770,27 @@ public void testDissectPattern() { public void testGrokPattern() { LogicalPlan cmd = processingCommand("grok a \"%{WORD:foo}\""); assertEquals(Grok.class, cmd.getClass()); - Grok dissect = (Grok) cmd; - assertEquals("%{WORD:foo}", dissect.parser().pattern()); - assertEquals(List.of(referenceAttribute("foo", KEYWORD)), dissect.extractedFields()); + Grok grok = (Grok) cmd; + assertEquals("%{WORD:foo}", grok.parser().pattern()); + assertEquals(List.of(referenceAttribute("foo", KEYWORD)), grok.extractedFields()); ParsingException pe = expectThrows(ParsingException.class, () -> statement("row a = \"foo bar\" | grok a \"%{_invalid_:x}\"")); assertThat( pe.getMessage(), containsString("Invalid pattern [%{_invalid_:x}] for grok: Unable to find pattern [_invalid_] in Grok's pattern dictionary") ); + + cmd = processingCommand("grok a \"%{WORD:foo} %{WORD:foo}\""); + assertEquals(Grok.class, cmd.getClass()); + grok = (Grok) cmd; + assertEquals("%{WORD:foo} %{WORD:foo}", grok.parser().pattern()); + assertEquals(List.of(referenceAttribute("foo", KEYWORD)), grok.extractedFields()); + + expectError( + "row a = \"foo bar\" | GROK a \"%{NUMBER:foo} %{WORD:foo}\"", + "line 1:22: Invalid GROK pattern [%{NUMBER:foo} %{WORD:foo}]:" + + " the attribute [foo] is defined multiple times with different types" + ); } public void testLikeRLike() { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ComputeListenerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ComputeListenerTests.java new file mode 100644 index 0000000000000..c93f3b9e0e350 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ComputeListenerTests.java @@ -0,0 +1,246 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plugin; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ActionRunnable; +import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.cluster.node.VersionInformation; +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.breaker.CircuitBreakingException; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.concurrent.ConcurrentCollections; +import org.elasticsearch.compute.operator.DriverProfile; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.tasks.CancellableTask; +import org.elasticsearch.tasks.TaskCancellationService; +import org.elasticsearch.tasks.TaskCancelledException; +import org.elasticsearch.tasks.TaskId; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.TransportVersionUtils; +import org.elasticsearch.test.transport.MockTransportService; +import org.elasticsearch.threadpool.TestThreadPool; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; +import org.junit.After; +import org.junit.Before; +import org.mockito.Mockito; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Queue; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static org.elasticsearch.test.tasks.MockTaskManager.SPY_TASK_MANAGER_SETTING; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.lessThan; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; + +public class ComputeListenerTests extends ESTestCase { + private ThreadPool threadPool; + private TransportService transportService; + + @Before + public void setUpTransportService() { + threadPool = new TestThreadPool(getTestName()); + transportService = MockTransportService.createNewService( + Settings.builder().put(SPY_TASK_MANAGER_SETTING.getKey(), true).build(), + VersionInformation.CURRENT, + TransportVersionUtils.randomVersion(), + threadPool + ); + transportService.start(); + TaskCancellationService cancellationService = new TaskCancellationService(transportService); + transportService.getTaskManager().setTaskCancellationService(cancellationService); + Mockito.clearInvocations(transportService.getTaskManager()); + } + + @After + public void shutdownTransportService() { + transportService.close(); + terminate(threadPool); + } + + private CancellableTask newTask() { + return new CancellableTask( + randomIntBetween(1, 100), + "test-type", + "test-action", + "test-description", + TaskId.EMPTY_TASK_ID, + Map.of() + ); + } + + private ComputeResponse randomResponse() { + int numProfiles = randomIntBetween(0, 2); + List profiles = new ArrayList<>(numProfiles); + for (int i = 0; i < numProfiles; i++) { + profiles.add(new DriverProfile(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), List.of())); + } + return new ComputeResponse(profiles); + } + + public void testEmpty() { + PlainActionFuture results = new PlainActionFuture<>(); + try (ComputeListener ignored = new ComputeListener(transportService, newTask(), results)) { + assertFalse(results.isDone()); + } + assertTrue(results.isDone()); + assertThat(results.actionGet(10, TimeUnit.SECONDS).getProfiles(), empty()); + } + + public void testCollectComputeResults() { + PlainActionFuture future = new PlainActionFuture<>(); + List allProfiles = new ArrayList<>(); + try (ComputeListener computeListener = new ComputeListener(transportService, newTask(), future)) { + int tasks = randomIntBetween(1, 100); + for (int t = 0; t < tasks; t++) { + if (randomBoolean()) { + ActionListener subListener = computeListener.acquireAvoid(); + threadPool.schedule( + ActionRunnable.wrap(subListener, l -> l.onResponse(null)), + TimeValue.timeValueNanos(between(0, 100)), + threadPool.generic() + ); + } else { + ComputeResponse resp = randomResponse(); + allProfiles.addAll(resp.getProfiles()); + ActionListener subListener = computeListener.acquireCompute(); + threadPool.schedule( + ActionRunnable.wrap(subListener, l -> l.onResponse(resp)), + TimeValue.timeValueNanos(between(0, 100)), + threadPool.generic() + ); + } + } + } + ComputeResponse result = future.actionGet(10, TimeUnit.SECONDS); + assertThat( + result.getProfiles().stream().collect(Collectors.toMap(p -> p, p -> 1, Integer::sum)), + equalTo(allProfiles.stream().collect(Collectors.toMap(p -> p, p -> 1, Integer::sum))) + ); + Mockito.verifyNoInteractions(transportService.getTaskManager()); + } + + public void testCancelOnFailure() throws Exception { + Queue rootCauseExceptions = ConcurrentCollections.newQueue(); + IntStream.range(0, between(1, 100)) + .forEach( + n -> rootCauseExceptions.add(new CircuitBreakingException("breaking exception " + n, CircuitBreaker.Durability.TRANSIENT)) + ); + int successTasks = between(1, 50); + int failedTasks = between(1, 100); + PlainActionFuture rootListener = new PlainActionFuture<>(); + CancellableTask rootTask = newTask(); + try (ComputeListener computeListener = new ComputeListener(transportService, rootTask, rootListener)) { + for (int i = 0; i < successTasks; i++) { + ActionListener subListener = computeListener.acquireCompute(); + threadPool.schedule( + ActionRunnable.wrap(subListener, l -> l.onResponse(randomResponse())), + TimeValue.timeValueNanos(between(0, 100)), + threadPool.generic() + ); + } + for (int i = 0; i < failedTasks; i++) { + ActionListener subListener = randomBoolean() ? computeListener.acquireAvoid() : computeListener.acquireCompute(); + threadPool.schedule(ActionRunnable.wrap(subListener, l -> { + Exception ex = rootCauseExceptions.poll(); + if (ex == null) { + ex = new TaskCancelledException("task was cancelled"); + } + l.onFailure(ex); + }), TimeValue.timeValueNanos(between(0, 100)), threadPool.generic()); + } + } + assertBusy(rootListener::isDone); + ExecutionException failure = expectThrows(ExecutionException.class, () -> rootListener.get(1, TimeUnit.SECONDS)); + Throwable cause = failure.getCause(); + assertNotNull(failure); + assertThat(cause, instanceOf(CircuitBreakingException.class)); + assertThat(failure.getSuppressed().length, lessThan(10)); + Mockito.verify(transportService.getTaskManager(), Mockito.times(1)) + .cancelTaskAndDescendants(eq(rootTask), eq("cancelled on failure"), eq(false), any()); + } + + public void testCollectWarnings() throws Exception { + List allProfiles = new ArrayList<>(); + Map> allWarnings = new HashMap<>(); + ActionListener rootListener = new ActionListener<>() { + @Override + public void onResponse(ComputeResponse result) { + assertThat( + result.getProfiles().stream().collect(Collectors.toMap(p -> p, p -> 1, Integer::sum)), + equalTo(allProfiles.stream().collect(Collectors.toMap(p -> p, p -> 1, Integer::sum))) + ); + Map> responseHeaders = threadPool.getThreadContext() + .getResponseHeaders() + .entrySet() + .stream() + .collect(Collectors.toMap(Map.Entry::getKey, e -> new HashSet<>(e.getValue()))); + assertThat(responseHeaders, equalTo(allWarnings)); + } + + @Override + public void onFailure(Exception e) { + throw new AssertionError(e); + } + }; + CountDownLatch latch = new CountDownLatch(1); + try ( + ComputeListener computeListener = new ComputeListener( + transportService, + newTask(), + ActionListener.runAfter(rootListener, latch::countDown) + ) + ) { + int tasks = randomIntBetween(1, 100); + for (int t = 0; t < tasks; t++) { + if (randomBoolean()) { + ActionListener subListener = computeListener.acquireAvoid(); + threadPool.schedule( + ActionRunnable.wrap(subListener, l -> l.onResponse(null)), + TimeValue.timeValueNanos(between(0, 100)), + threadPool.generic() + ); + } else { + ComputeResponse resp = randomResponse(); + allProfiles.addAll(resp.getProfiles()); + int numWarnings = randomIntBetween(1, 5); + Map warnings = new HashMap<>(); + for (int i = 0; i < numWarnings; i++) { + warnings.put("key" + between(1, 10), "value" + between(1, 10)); + } + for (Map.Entry e : warnings.entrySet()) { + allWarnings.computeIfAbsent(e.getKey(), v -> new HashSet<>()).add(e.getValue()); + } + ActionListener subListener = computeListener.acquireCompute(); + threadPool.schedule(ActionRunnable.wrap(subListener, l -> { + for (Map.Entry e : warnings.entrySet()) { + threadPool.getThreadContext().addResponseHeader(e.getKey(), e.getValue()); + } + l.onResponse(resp); + }), TimeValue.timeValueNanos(between(0, 100)), threadPool.generic()); + } + } + } + assertTrue(latch.await(10, TimeUnit.SECONDS)); + Mockito.verifyNoInteractions(transportService.getTaskManager()); + } +} diff --git a/x-pack/plugin/geoip-enterprise-downloader/build.gradle b/x-pack/plugin/geoip-enterprise-downloader/build.gradle new file mode 100644 index 0000000000000..ab16609ac7aad --- /dev/null +++ b/x-pack/plugin/geoip-enterprise-downloader/build.gradle @@ -0,0 +1,19 @@ +apply plugin: 'elasticsearch.internal-es-plugin' +apply plugin: 'elasticsearch.internal-yaml-rest-test' +apply plugin: 'elasticsearch.internal-cluster-test' +esplugin { + name 'x-pack-geoip-enterprise-downloader' + description 'Elasticsearch Expanded Pack Plugin - Geoip Enterprise Downloader' + classname 'org.elasticsearch.xpack.geoip.EnterpriseDownloaderPlugin' + extendedPlugins = ['x-pack-core'] +} +base { + archivesName = 'x-pack-geoip-enterprise-downloader' +} + +dependencies { + compileOnly project(path: xpackModule('core')) + testImplementation(testArtifact(project(xpackModule('core')))) +} + +addQaCheckDependencies(project) diff --git a/x-pack/plugin/geoip-enterprise-downloader/src/main/java/org/elasticsearch/xpack/geoip/EnterpriseDownloaderPlugin.java b/x-pack/plugin/geoip-enterprise-downloader/src/main/java/org/elasticsearch/xpack/geoip/EnterpriseDownloaderPlugin.java new file mode 100644 index 0000000000000..e34ecdda81d72 --- /dev/null +++ b/x-pack/plugin/geoip-enterprise-downloader/src/main/java/org/elasticsearch/xpack/geoip/EnterpriseDownloaderPlugin.java @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +package org.elasticsearch.xpack.geoip; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.license.XPackLicenseState; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.xpack.core.XPackPlugin; + +import java.util.Collection; +import java.util.List; + +/** + * This plugin is used to start the enterprise geoip downloader task (See {@link org.elasticsearch.ingest.EnterpriseGeoIpTask}). That task + * requires having a platinum license. But the geoip code is in a non-xpack module that doesn't know about licensing. This plugin has a + * license listener that will start the task if the license is valid, and will stop the task if it becomes invalid. This lets us enforce + * the license without having to either put license logic into a non-xpack module, or put a lot of shared geoip code (much of which does + * not require a platinum license) into xpack. + */ +public class EnterpriseDownloaderPlugin extends Plugin { + + private final Settings settings; + private EnterpriseGeoIpDownloaderLicenseListener enterpriseGeoIpDownloaderLicenseListener; + + public EnterpriseDownloaderPlugin(final Settings settings) { + this.settings = settings; + } + + protected XPackLicenseState getLicenseState() { + return XPackPlugin.getSharedLicenseState(); + } + + @Override + public Collection createComponents(PluginServices services) { + enterpriseGeoIpDownloaderLicenseListener = new EnterpriseGeoIpDownloaderLicenseListener( + services.client(), + services.clusterService(), + services.threadPool(), + getLicenseState() + ); + enterpriseGeoIpDownloaderLicenseListener.init(); + return List.of(enterpriseGeoIpDownloaderLicenseListener); + } +} diff --git a/x-pack/plugin/geoip-enterprise-downloader/src/main/java/org/elasticsearch/xpack/geoip/EnterpriseGeoIpDownloaderLicenseListener.java b/x-pack/plugin/geoip-enterprise-downloader/src/main/java/org/elasticsearch/xpack/geoip/EnterpriseGeoIpDownloaderLicenseListener.java new file mode 100644 index 0000000000000..d6e6f57f10976 --- /dev/null +++ b/x-pack/plugin/geoip-enterprise-downloader/src/main/java/org/elasticsearch/xpack/geoip/EnterpriseGeoIpDownloaderLicenseListener.java @@ -0,0 +1,145 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.geoip; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.ResourceAlreadyExistsException; +import org.elasticsearch.ResourceNotFoundException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.client.internal.Client; +import org.elasticsearch.cluster.ClusterChangedEvent; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.ClusterStateListener; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.core.UpdateForV9; +import org.elasticsearch.ingest.EnterpriseGeoIpTask.EnterpriseGeoIpTaskParams; +import org.elasticsearch.license.License; +import org.elasticsearch.license.LicenseStateListener; +import org.elasticsearch.license.LicensedFeature; +import org.elasticsearch.license.XPackLicenseState; +import org.elasticsearch.persistent.PersistentTasksCustomMetadata; +import org.elasticsearch.persistent.PersistentTasksService; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.RemoteTransportException; +import org.elasticsearch.xpack.core.XPackField; + +import java.util.Objects; + +import static org.elasticsearch.ingest.EnterpriseGeoIpTask.ENTERPRISE_GEOIP_DOWNLOADER; + +public class EnterpriseGeoIpDownloaderLicenseListener implements LicenseStateListener, ClusterStateListener { + private static final Logger logger = LogManager.getLogger(EnterpriseGeoIpDownloaderLicenseListener.class); + // Note: This custom type is GeoIpMetadata.TYPE, but that class is not exposed to this plugin + static final String INGEST_GEOIP_CUSTOM_METADATA_TYPE = "ingest_geoip"; + + private final PersistentTasksService persistentTasksService; + private final ClusterService clusterService; + private final XPackLicenseState licenseState; + private static final LicensedFeature.Momentary ENTERPRISE_GEOIP_FEATURE = LicensedFeature.momentary( + null, + XPackField.ENTERPRISE_GEOIP_DOWNLOADER, + License.OperationMode.PLATINUM + ); + private volatile boolean licenseIsValid = false; + private volatile boolean hasIngestGeoIpMetadata = false; + + protected EnterpriseGeoIpDownloaderLicenseListener( + Client client, + ClusterService clusterService, + ThreadPool threadPool, + XPackLicenseState licenseState + ) { + this.persistentTasksService = new PersistentTasksService(clusterService, threadPool, client); + this.clusterService = clusterService; + this.licenseState = licenseState; + } + + @UpdateForV9 // use MINUS_ONE once that means no timeout + private static final TimeValue MASTER_TIMEOUT = TimeValue.MAX_VALUE; + private volatile boolean licenseStateListenerRegistered; + + public void init() { + listenForLicenseStateChanges(); + clusterService.addListener(this); + } + + void listenForLicenseStateChanges() { + assert licenseStateListenerRegistered == false : "listenForLicenseStateChanges() should only be called once"; + licenseStateListenerRegistered = true; + licenseState.addListener(this); + } + + @Override + public void licenseStateChanged() { + licenseIsValid = ENTERPRISE_GEOIP_FEATURE.checkWithoutTracking(licenseState); + maybeUpdateTaskState(clusterService.state()); + } + + @Override + public void clusterChanged(ClusterChangedEvent event) { + hasIngestGeoIpMetadata = event.state().metadata().custom(INGEST_GEOIP_CUSTOM_METADATA_TYPE) != null; + final boolean ingestGeoIpCustomMetaChangedInEvent = event.metadataChanged() + && event.changedCustomMetadataSet().contains(INGEST_GEOIP_CUSTOM_METADATA_TYPE); + final boolean masterNodeChanged = Objects.equals( + event.state().nodes().getMasterNode(), + event.previousState().nodes().getMasterNode() + ) == false; + /* + * We don't want to potentially start the task on every cluster state change, so only maybeUpdateTaskState if this cluster change + * event involved the modification of custom geoip metadata OR a master node change + */ + if (ingestGeoIpCustomMetaChangedInEvent || (masterNodeChanged && hasIngestGeoIpMetadata)) { + maybeUpdateTaskState(event.state()); + } + } + + private void maybeUpdateTaskState(ClusterState state) { + // We should only start/stop task from single node, master is the best as it will go through it anyway + if (state.nodes().isLocalNodeElectedMaster()) { + if (licenseIsValid) { + if (hasIngestGeoIpMetadata) { + ensureTaskStarted(); + } + } else { + ensureTaskStopped(); + } + } + } + + private void ensureTaskStarted() { + assert licenseIsValid : "Task should never be started without valid license"; + persistentTasksService.sendStartRequest( + ENTERPRISE_GEOIP_DOWNLOADER, + ENTERPRISE_GEOIP_DOWNLOADER, + new EnterpriseGeoIpTaskParams(), + MASTER_TIMEOUT, + ActionListener.wrap(r -> logger.debug("Started enterprise geoip downloader task"), e -> { + Throwable t = e instanceof RemoteTransportException ? ExceptionsHelper.unwrapCause(e) : e; + if (t instanceof ResourceAlreadyExistsException == false) { + logger.error("failed to create enterprise geoip downloader task", e); + } + }) + ); + } + + private void ensureTaskStopped() { + ActionListener> listener = ActionListener.wrap( + r -> logger.debug("Stopped enterprise geoip downloader task"), + e -> { + Throwable t = e instanceof RemoteTransportException ? ExceptionsHelper.unwrapCause(e) : e; + if (t instanceof ResourceNotFoundException == false) { + logger.error("failed to remove enterprise geoip downloader task", e); + } + } + ); + persistentTasksService.sendRemoveRequest(ENTERPRISE_GEOIP_DOWNLOADER, MASTER_TIMEOUT, listener); + } +} diff --git a/x-pack/plugin/geoip-enterprise-downloader/src/test/java/org/elasticsearch/xpack/geoip/EnterpriseGeoIpDownloaderLicenseListenerTests.java b/x-pack/plugin/geoip-enterprise-downloader/src/test/java/org/elasticsearch/xpack/geoip/EnterpriseGeoIpDownloaderLicenseListenerTests.java new file mode 100644 index 0000000000000..5a5aacd392f3c --- /dev/null +++ b/x-pack/plugin/geoip-enterprise-downloader/src/test/java/org/elasticsearch/xpack/geoip/EnterpriseGeoIpDownloaderLicenseListenerTests.java @@ -0,0 +1,219 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.geoip; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ActionRequest; +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.action.ActionType; +import org.elasticsearch.cluster.ClusterChangedEvent; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.node.DiscoveryNodeUtils; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.license.License; +import org.elasticsearch.license.TestUtils; +import org.elasticsearch.license.XPackLicenseState; +import org.elasticsearch.license.internal.XPackLicenseStatus; +import org.elasticsearch.node.Node; +import org.elasticsearch.persistent.PersistentTasksCustomMetadata; +import org.elasticsearch.persistent.RemovePersistentTaskAction; +import org.elasticsearch.persistent.StartPersistentTaskAction; +import org.elasticsearch.telemetry.metric.MeterRegistry; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.client.NoOpClient; +import org.elasticsearch.threadpool.ThreadPool; +import org.junit.After; +import org.junit.Before; + +import java.util.Map; +import java.util.UUID; + +import static org.elasticsearch.xpack.geoip.EnterpriseGeoIpDownloaderLicenseListener.INGEST_GEOIP_CUSTOM_METADATA_TYPE; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class EnterpriseGeoIpDownloaderLicenseListenerTests extends ESTestCase { + + private ThreadPool threadPool; + + @Before + public void setup() { + threadPool = new ThreadPool(Settings.builder().put(Node.NODE_NAME_SETTING.getKey(), "test").build(), MeterRegistry.NOOP); + } + + @After + public void tearDown() throws Exception { + super.tearDown(); + threadPool.shutdownNow(); + } + + public void testAllConditionsMetOnStart() { + // Should never start if not master node, even if all other conditions have been met + final XPackLicenseState licenseState = getAlwaysValidLicense(); + ClusterService clusterService = createClusterService(true, false); + TaskStartAndRemoveMockClient client = new TaskStartAndRemoveMockClient(threadPool, true, false); + EnterpriseGeoIpDownloaderLicenseListener listener = new EnterpriseGeoIpDownloaderLicenseListener( + client, + clusterService, + threadPool, + licenseState + ); + listener.init(); + listener.licenseStateChanged(); + listener.clusterChanged(new ClusterChangedEvent("test", createClusterState(true, true), clusterService.state())); + client.assertTaskStartHasBeenCalled(); + } + + public void testLicenseChanges() { + final TestUtils.UpdatableLicenseState licenseState = new TestUtils.UpdatableLicenseState(); + licenseState.update(new XPackLicenseStatus(License.OperationMode.TRIAL, false, "")); + ClusterService clusterService = createClusterService(true, true); + TaskStartAndRemoveMockClient client = new TaskStartAndRemoveMockClient(threadPool, false, true); + EnterpriseGeoIpDownloaderLicenseListener listener = new EnterpriseGeoIpDownloaderLicenseListener( + client, + clusterService, + threadPool, + licenseState + ); + listener.init(); + listener.licenseStateChanged(); + listener.clusterChanged(new ClusterChangedEvent("test", clusterService.state(), clusterService.state())); + client.expectStartTask = true; + client.expectRemoveTask = false; + licenseState.update(new XPackLicenseStatus(License.OperationMode.TRIAL, true, "")); + listener.licenseStateChanged(); + client.assertTaskStartHasBeenCalled(); + client.expectStartTask = false; + client.expectRemoveTask = true; + licenseState.update(new XPackLicenseStatus(License.OperationMode.TRIAL, false, "")); + listener.licenseStateChanged(); + client.assertTaskRemoveHasBeenCalled(); + } + + public void testDatabaseChanges() { + final XPackLicenseState licenseState = getAlwaysValidLicense(); + ClusterService clusterService = createClusterService(true, false); + TaskStartAndRemoveMockClient client = new TaskStartAndRemoveMockClient(threadPool, false, false); + EnterpriseGeoIpDownloaderLicenseListener listener = new EnterpriseGeoIpDownloaderLicenseListener( + client, + clusterService, + threadPool, + licenseState + ); + listener.init(); + listener.licenseStateChanged(); + listener.clusterChanged(new ClusterChangedEvent("test", clusterService.state(), clusterService.state())); + // add a geoip database, so the task ought to be started: + client.expectStartTask = true; + listener.clusterChanged(new ClusterChangedEvent("test", createClusterState(true, true), clusterService.state())); + client.assertTaskStartHasBeenCalled(); + // Now we remove the geoip databases. The task ought to just be left alone. + client.expectStartTask = false; + client.expectRemoveTask = false; + listener.clusterChanged(new ClusterChangedEvent("test", createClusterState(true, false), clusterService.state())); + } + + public void testMasterChanges() { + // Should never start if not master node, even if all other conditions have been met + final XPackLicenseState licenseState = getAlwaysValidLicense(); + ClusterService clusterService = createClusterService(false, false); + TaskStartAndRemoveMockClient client = new TaskStartAndRemoveMockClient(threadPool, false, false); + EnterpriseGeoIpDownloaderLicenseListener listener = new EnterpriseGeoIpDownloaderLicenseListener( + client, + clusterService, + threadPool, + licenseState + ); + listener.init(); + listener.licenseStateChanged(); + listener.clusterChanged(new ClusterChangedEvent("test", createClusterState(false, true), clusterService.state())); + client.expectStartTask = true; + listener.clusterChanged(new ClusterChangedEvent("test", createClusterState(true, true), clusterService.state())); + } + + private XPackLicenseState getAlwaysValidLicense() { + return new XPackLicenseState(() -> 0); + } + + private ClusterService createClusterService(boolean isMasterNode, boolean hasGeoIpDatabases) { + ClusterService clusterService = mock(ClusterService.class); + ClusterState state = createClusterState(isMasterNode, hasGeoIpDatabases); + when(clusterService.state()).thenReturn(state); + return clusterService; + } + + private ClusterState createClusterState(boolean isMasterNode, boolean hasGeoIpDatabases) { + String indexName = randomAlphaOfLength(5); + Index index = new Index(indexName, UUID.randomUUID().toString()); + IndexMetadata.Builder idxMeta = IndexMetadata.builder(index.getName()) + .settings(indexSettings(IndexVersion.current(), 1, 0).put("index.uuid", index.getUUID())); + String nodeId = ESTestCase.randomAlphaOfLength(8); + DiscoveryNodes.Builder discoveryNodesBuilder = DiscoveryNodes.builder().add(DiscoveryNodeUtils.create(nodeId)).localNodeId(nodeId); + if (isMasterNode) { + discoveryNodesBuilder.masterNodeId(nodeId); + } + ClusterState.Builder clusterStateBuilder = ClusterState.builder(new ClusterName("name")); + if (hasGeoIpDatabases) { + PersistentTasksCustomMetadata tasksCustomMetadata = new PersistentTasksCustomMetadata(1L, Map.of()); + clusterStateBuilder.metadata(Metadata.builder().putCustom(INGEST_GEOIP_CUSTOM_METADATA_TYPE, tasksCustomMetadata).put(idxMeta)); + } + return clusterStateBuilder.nodes(discoveryNodesBuilder).build(); + } + + private static class TaskStartAndRemoveMockClient extends NoOpClient { + + boolean expectStartTask; + boolean expectRemoveTask; + private boolean taskStartCalled = false; + private boolean taskRemoveCalled = false; + + private TaskStartAndRemoveMockClient(ThreadPool threadPool, boolean expectStartTask, boolean expectRemoveTask) { + super(threadPool); + this.expectStartTask = expectStartTask; + this.expectRemoveTask = expectRemoveTask; + } + + @Override + protected void doExecute( + ActionType action, + Request request, + ActionListener listener + ) { + if (action.equals(StartPersistentTaskAction.INSTANCE)) { + if (expectStartTask) { + taskStartCalled = true; + } else { + fail("Should not start task"); + } + } else if (action.equals(RemovePersistentTaskAction.INSTANCE)) { + if (expectRemoveTask) { + taskRemoveCalled = true; + } else { + fail("Should not remove task"); + } + } else { + throw new IllegalStateException("unexpected action called [" + action.name() + "]"); + } + } + + void assertTaskStartHasBeenCalled() { + assertTrue(taskStartCalled); + } + + void assertTaskRemoveHasBeenCalled() { + assertTrue(taskRemoveCalled); + } + } +} diff --git a/x-pack/plugin/inference/build.gradle b/x-pack/plugin/inference/build.gradle index 41ca9966c1336..beeec94f21ebf 100644 --- a/x-pack/plugin/inference/build.gradle +++ b/x-pack/plugin/inference/build.gradle @@ -27,6 +27,10 @@ base { archivesName = 'x-pack-inference' } +versions << [ + 'awsbedrockruntime': '1.12.740' +] + dependencies { implementation project(path: ':libs:elasticsearch-logging') compileOnly project(":server") @@ -53,10 +57,19 @@ dependencies { implementation 'com.google.http-client:google-http-client-appengine:1.42.3' implementation 'com.google.http-client:google-http-client-jackson2:1.42.3' implementation "com.fasterxml.jackson.core:jackson-core:${versions.jackson}" + implementation "com.fasterxml.jackson.core:jackson-databind:${versions.jackson}" + implementation "com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}" + implementation "com.fasterxml.jackson.dataformat:jackson-dataformat-cbor:${versions.jackson}" + implementation "com.fasterxml.jackson:jackson-bom:${versions.jackson}" implementation 'com.google.api:gax-httpjson:0.105.1' implementation 'io.grpc:grpc-context:1.49.2' implementation 'io.opencensus:opencensus-api:0.31.1' implementation 'io.opencensus:opencensus-contrib-http-util:0.31.1' + implementation "com.amazonaws:aws-java-sdk-bedrockruntime:${versions.awsbedrockruntime}" + implementation "com.amazonaws:aws-java-sdk-core:${versions.aws}" + implementation "com.amazonaws:jmespath-java:${versions.aws}" + implementation "joda-time:joda-time:2.10.10" + implementation 'javax.xml.bind:jaxb-api:2.2.2' } tasks.named("dependencyLicenses").configure { @@ -66,6 +79,9 @@ tasks.named("dependencyLicenses").configure { mapping from: /protobuf.*/, to: 'protobuf' mapping from: /proto-google.*/, to: 'proto-google' mapping from: /jackson.*/, to: 'jackson' + mapping from: /aws-java-sdk-.*/, to: 'aws-java-sdk' + mapping from: /jmespath-java.*/, to: 'aws-java-sdk' + mapping from: /jaxb-.*/, to: 'jaxb' } tasks.named("thirdPartyAudit").configure { @@ -199,10 +215,21 @@ tasks.named("thirdPartyAudit").configure { 'com.google.appengine.api.urlfetch.HTTPRequest', 'com.google.appengine.api.urlfetch.HTTPResponse', 'com.google.appengine.api.urlfetch.URLFetchService', - 'com.google.appengine.api.urlfetch.URLFetchServiceFactory' + 'com.google.appengine.api.urlfetch.URLFetchServiceFactory', + 'software.amazon.ion.IonReader', + 'software.amazon.ion.IonSystem', + 'software.amazon.ion.IonType', + 'software.amazon.ion.IonWriter', + 'software.amazon.ion.Timestamp', + 'software.amazon.ion.system.IonBinaryWriterBuilder', + 'software.amazon.ion.system.IonSystemBuilder', + 'software.amazon.ion.system.IonTextWriterBuilder', + 'software.amazon.ion.system.IonWriterBuilder', + 'javax.activation.DataHandler' ) } tasks.named('yamlRestTest') { usesDefaultDistribution() } + diff --git a/x-pack/plugin/inference/licenses/aws-java-sdk-LICENSE.txt b/x-pack/plugin/inference/licenses/aws-java-sdk-LICENSE.txt new file mode 100644 index 0000000000000..98d1f9319f374 --- /dev/null +++ b/x-pack/plugin/inference/licenses/aws-java-sdk-LICENSE.txt @@ -0,0 +1,63 @@ +Apache License +Version 2.0, January 2004 + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. + +"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: + + 1. You must give any other recipients of the Work or Derivative Works a copy of this License; and + 2. You must cause any modified files to carry prominent notices stating that You changed the files; and + 3. You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and + 4. If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. + +You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +Note: Other license terms may apply to certain, identified software files contained within or distributed with the accompanying software if such terms are included in the directory containing the accompanying software. Such other license terms will then apply in lieu of the terms of the software license above. + +JSON processing code subject to the JSON License from JSON.org: + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +The Software shall be used for Good, not Evil. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/x-pack/plugin/inference/licenses/aws-java-sdk-NOTICE.txt b/x-pack/plugin/inference/licenses/aws-java-sdk-NOTICE.txt new file mode 100644 index 0000000000000..565bd6085c71a --- /dev/null +++ b/x-pack/plugin/inference/licenses/aws-java-sdk-NOTICE.txt @@ -0,0 +1,15 @@ +AWS SDK for Java +Copyright 2010-2014 Amazon.com, Inc. or its affiliates. All Rights Reserved. + +This product includes software developed by +Amazon Technologies, Inc (http://www.amazon.com/). + +********************** +THIRD PARTY COMPONENTS +********************** +This software includes third party software subject to the following copyrights: +- XML parsing and utility functions from JetS3t - Copyright 2006-2009 James Murty. +- JSON parsing and utility functions from JSON.org - Copyright 2002 JSON.org. +- PKCS#1 PEM encoded private key parsing and utility functions from oauth.googlecode.com - Copyright 1998-2010 AOL Inc. + +The licenses for these third party components are included in LICENSE.txt diff --git a/x-pack/plugin/inference/licenses/jaxb-LICENSE.txt b/x-pack/plugin/inference/licenses/jaxb-LICENSE.txt new file mode 100644 index 0000000000000..833a843cfeee1 --- /dev/null +++ b/x-pack/plugin/inference/licenses/jaxb-LICENSE.txt @@ -0,0 +1,274 @@ +COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL)Version 1.1 + +1. Definitions. + + 1.1. "Contributor" means each individual or entity that creates or contributes to the creation of Modifications. + + 1.2. "Contributor Version" means the combination of the Original Software, prior Modifications used by a Contributor (if any), and the Modifications made by that particular Contributor. + + 1.3. "Covered Software" means (a) the Original Software, or (b) Modifications, or (c) the combination of files containing Original Software with files containing Modifications, in each case including portions thereof. + + 1.4. "Executable" means the Covered Software in any form other than Source Code. + + 1.5. "Initial Developer" means the individual or entity that first makes Original Software available under this License. + + 1.6. "Larger Work" means a work which combines Covered Software or portions thereof with code not governed by the terms of this License. + + 1.7. "License" means this document. + + 1.8. "Licensable" means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently acquired, any and all of the rights conveyed herein. + + 1.9. "Modifications" means the Source Code and Executable form of any of the following: + + A. Any file that results from an addition to, deletion from or modification of the contents of a file containing Original Software or previous Modifications; + + B. Any new file that contains any part of the Original Software or previous Modification; or + + C. Any new file that is contributed or otherwise made available under the terms of this License. + + 1.10. "Original Software" means the Source Code and Executable form of computer software code that is originally released under this License. + + 1.11. "Patent Claims" means any patent claim(s), now owned or hereafter acquired, including without limitation, method, process, and apparatus claims, in any patent Licensable by grantor. + + 1.12. "Source Code" means (a) the common form of computer software code in which modifications are made and (b) associated documentation included in or with such code. + + 1.13. "You" (or "Your") means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, "You" includes any entity which controls, is controlled by, or is under common control with You. For purposes of this definition, "control" means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. + +2. License Grants. + + 2.1. The Initial Developer Grant. + + Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, the Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive license: + + (a) under intellectual property rights (other than patent or trademark) Licensable by Initial Developer, to use, reproduce, modify, display, perform, sublicense and distribute the Original Software (or portions thereof), with or without Modifications, and/or as part of a Larger Work; and + + (b) under Patent Claims infringed by the making, using or selling of Original Software, to make, have made, use, practice, sell, and offer for sale, and/or otherwise dispose of the Original Software (or portions thereof). + + (c) The licenses granted in Sections 2.1(a) and (b) are effective on the date Initial Developer first distributes or otherwise makes the Original Software available to a third party under the terms of this License. + + (d) Notwithstanding Section 2.1(b) above, no patent license is granted: (1) for code that You delete from the Original Software, or (2) for infringements caused by: (i) the modification of the Original Software, or (ii) the combination of the Original Software with other software or devices. + + 2.2. Contributor Grant. + + Conditioned upon Your compliance with Section 3.1 below and subject to third party intellectual property claims, each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license: + + (a) under intellectual property rights (other than patent or trademark) Licensable by Contributor to use, reproduce, modify, display, perform, sublicense and distribute the Modifications created by such Contributor (or portions thereof), either on an unmodified basis, with other Modifications, as Covered Software and/or as part of a Larger Work; and + + (b) under Patent Claims infringed by the making, using, or selling of Modifications made by that Contributor either alone and/or in combination with its Contributor Version (or portions of such combination), to make, use, sell, offer for sale, have made, and/or otherwise dispose of: (1) Modifications made by that Contributor (or portions thereof); and (2) the combination of Modifications made by that Contributor with its Contributor Version (or portions of such combination). + + (c) The licenses granted in Sections 2.2(a) and 2.2(b) are effective on the date Contributor first distributes or otherwise makes the Modifications available to a third party. + + (d) Notwithstanding Section 2.2(b) above, no patent license is granted: (1) for any code that Contributor has deleted from the Contributor Version; (2) for infringements caused by: (i) third party modifications of Contributor Version, or (ii) the combination of Modifications made by that Contributor with other software (except as part of the Contributor Version) or other devices; or (3) under Patent Claims infringed by Covered Software in the absence of Modifications made by that Contributor. + +3. Distribution Obligations. + + 3.1. Availability of Source Code. + + Any Covered Software that You distribute or otherwise make available in Executable form must also be made available in Source Code form and that Source Code form must be distributed only under the terms of this License. You must include a copy of this License with every copy of the Source Code form of the Covered Software You distribute or otherwise make available. You must inform recipients of any such Covered Software in Executable form as to how they can obtain such Covered Software in Source Code form in a reasonable manner on or through a medium customarily used for software exchange. + + 3.2. Modifications. + + The Modifications that You create or to which You contribute are governed by the terms of this License. You represent that You believe Your Modifications are Your original creation(s) and/or You have sufficient rights to grant the rights conveyed by this License. + + 3.3. Required Notices. + + You must include a notice in each of Your Modifications that identifies You as the Contributor of the Modification. You may not remove or alter any copyright, patent or trademark notices contained within the Covered Software, or any notices of licensing or any descriptive text giving attribution to any Contributor or the Initial Developer. + + 3.4. Application of Additional Terms. + + You may not offer or impose any terms on any Covered Software in Source Code form that alters or restricts the applicable version of this License or the recipients' rights hereunder. You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, you may do so only on Your own behalf, and not on behalf of the Initial Developer or any Contributor. You must make it absolutely clear that any such warranty, support, indemnity or liability obligation is offered by You alone, and You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of warranty, support, indemnity or liability terms You offer. + + 3.5. Distribution of Executable Versions. + + You may distribute the Executable form of the Covered Software under the terms of this License or under the terms of a license of Your choice, which may contain terms different from this License, provided that You are in compliance with the terms of this License and that the license for the Executable form does not attempt to limit or alter the recipient's rights in the Source Code form from the rights set forth in this License. If You distribute the Covered Software in Executable form under a different license, You must make it absolutely clear that any terms which differ from this License are offered by You alone, not by the Initial Developer or Contributor. You hereby agree to indemnify the Initial Developer and every Contributor for any liability incurred by the Initial Developer or such Contributor as a result of any such terms You offer. + + 3.6. Larger Works. + + You may create a Larger Work by combining Covered Software with other code not governed by the terms of this License and distribute the Larger Work as a single product. In such a case, You must make sure the requirements of this License are fulfilled for the Covered Software. + +4. Versions of the License. + + 4.1. New Versions. + + Oracle is the initial license steward and may publish revised and/or new versions of this License from time to time. Each version will be given a distinguishing version number. Except as provided in Section 4.3, no one other than the license steward has the right to modify this License. + + 4.2. Effect of New Versions. + + You may always continue to use, distribute or otherwise make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. If the Initial Developer includes a notice in the Original Software prohibiting it from being distributed or otherwise made available under any subsequent version of the License, You must distribute and make the Covered Software available under the terms of the version of the License under which You originally received the Covered Software. Otherwise, You may also choose to use, distribute or otherwise make the Covered Software available under the terms of any subsequent version of the License published by the license steward. + + 4.3. Modified Versions. + + When You are an Initial Developer and You want to create a new license for Your Original Software, You may create and use a modified version of this License if You: (a) rename the license and remove any references to the name of the license steward (except to note that the license differs from this License); and (b) otherwise make it clear that the license contains terms which differ from this License. + +5. DISCLAIMER OF WARRANTY. + + COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. + +6. TERMINATION. + + 6.1. This License and the rights granted hereunder will terminate automatically if You fail to comply with terms herein and fail to cure such breach within 30 days of becoming aware of the breach. Provisions which, by their nature, must remain in effect beyond the termination of this License shall survive. + + 6.2. If You assert a patent infringement claim (excluding declaratory judgment actions) against Initial Developer or a Contributor (the Initial Developer or Contributor against whom You assert such claim is referred to as "Participant") alleging that the Participant Software (meaning the Contributor Version where the Participant is a Contributor or the Original Software where the Participant is the Initial Developer) directly or indirectly infringes any patent, then any and all rights granted directly or indirectly to You by such Participant, the Initial Developer (if the Initial Developer is not the Participant) and all Contributors under Sections 2.1 and/or 2.2 of this License shall, upon 60 days notice from Participant terminate prospectively and automatically at the expiration of such 60 day notice period, unless if within such 60 day period You withdraw Your claim with respect to the Participant Software against such Participant either unilaterally or pursuant to a written agreement with Participant. + + 6.3. If You assert a patent infringement claim against Participant alleging that the Participant Software directly or indirectly infringes any patent where such claim is resolved (such as by license or settlement) prior to the initiation of patent infringement litigation, then the reasonable value of the licenses granted by such Participant under Sections 2.1 or 2.2 shall be taken into account in determining the amount or value of any payment or license. + + 6.4. In the event of termination under Sections 6.1 or 6.2 above, all end user licenses that have been validly granted by You or any distributor hereunder prior to termination (excluding licenses granted to You by any distributor) shall survive termination. + +7. LIMITATION OF LIABILITY. + + UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. + +8. U.S. GOVERNMENT END USERS. + + The Covered Software is a "commercial item," as that term is defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial computer software" (as that term is defined at 48 C.F.R. ? 252.227-7014(a)(1)) and "commercial computer software documentation" as such terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government End Users acquire Covered Software with only those rights set forth herein. This U.S. Government Rights clause is in lieu of, and supersedes, any other FAR, DFAR, or other clause or provision that addresses Government rights in computer software under this License. + +9. MISCELLANEOUS. + + This License represents the complete agreement concerning subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. This License shall be governed by the law of the jurisdiction specified in a notice contained within the Original Software (except to the extent applicable law, if any, provides otherwise), excluding such jurisdiction's conflict-of-law provisions. Any litigation relating to this License shall be subject to the jurisdiction of the courts located in the jurisdiction and venue specified in a notice contained within the Original Software, with the losing party responsible for costs, including, without limitation, court costs and reasonable attorneys' fees and expenses. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not apply to this License. You agree that You alone are responsible for compliance with the United States export administration regulations (and the export control laws and regulation of any other countries) when You use, distribute or otherwise make available any Covered Software. + +10. RESPONSIBILITY FOR CLAIMS. + + As between Initial Developer and the Contributors, each party is responsible for claims and damages arising, directly or indirectly, out of its utilization of rights under this License and You agree to work with Initial Developer and Contributors to distribute such responsibility on an equitable basis. Nothing herein is intended or shall be deemed to constitute any admission of liability. + +---------- +NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) +The code released under the CDDL shall be governed by the laws of the State of California (excluding conflict-of-law provisions). Any litigation relating to this License shall be subject to the jurisdiction of the Federal Courts of the Northern District of California and the state courts of the State of California, with venue lying in Santa Clara County, California. + + + + +The GNU General Public License (GPL) Version 2, June 1991 + + +Copyright (C) 1989, 1991 Free Software Foundation, Inc. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. + +Preamble + +The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. + +When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. + +To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. + +For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. + +We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. + +Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. + +Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. + +The precise terms and conditions for copying, distribution and modification follow. + + +TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + +0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. + +1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. + +You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. + +2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. + + c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. + +3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. + +If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. + +4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. + +5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. + +6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. + +7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. + +This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. + +8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. + +9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. + +10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. + +NO WARRANTY + +11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +END OF TERMS AND CONDITIONS + + +How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. + + One line to give the program's name and a brief idea of what it does. + + Copyright (C) + + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. + + signature of Ty Coon, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. + + +"CLASSPATH" EXCEPTION TO THE GPL VERSION 2 + +Certain source files distributed by Oracle are subject to the following clarification and special exception to the GPL Version 2, but only where Oracle has expressly included in the particular source file's header the words "Oracle designates this particular file as subject to the "Classpath" exception as provided by Oracle in the License file that accompanied this code." + +Linking this library statically or dynamically with other modules is making a combined work based on this library. Thus, the terms and conditions of the GNU General Public License Version 2 cover the whole combination. + +As a special exception, the copyright holders of this library give you permission to link this library with independent modules to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify this library, you may extend this exception to your version of the library, but you are not obligated to do so. If you do not wish to do so, delete this exception statement from your version. diff --git a/x-pack/plugin/inference/licenses/jaxb-NOTICE.txt b/x-pack/plugin/inference/licenses/jaxb-NOTICE.txt new file mode 100644 index 0000000000000..8d1c8b69c3fce --- /dev/null +++ b/x-pack/plugin/inference/licenses/jaxb-NOTICE.txt @@ -0,0 +1 @@ + diff --git a/x-pack/plugin/core/licenses/nimbus-jose-jwt-LICENSE.txt b/x-pack/plugin/inference/licenses/joda-time-LICENSE.txt similarity index 100% rename from x-pack/plugin/core/licenses/nimbus-jose-jwt-LICENSE.txt rename to x-pack/plugin/inference/licenses/joda-time-LICENSE.txt diff --git a/x-pack/plugin/inference/licenses/joda-time-NOTICE.txt b/x-pack/plugin/inference/licenses/joda-time-NOTICE.txt new file mode 100644 index 0000000000000..dffbcf31cacf6 --- /dev/null +++ b/x-pack/plugin/inference/licenses/joda-time-NOTICE.txt @@ -0,0 +1,5 @@ +============================================================================= += NOTICE file corresponding to section 4d of the Apache License Version 2.0 = +============================================================================= +This product includes software developed by +Joda.org (http://www.joda.org/). diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceCrudIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceCrudIT.java index 75e392b6d155f..2325e17c23944 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceCrudIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceCrudIT.java @@ -48,7 +48,7 @@ public void testGet() throws IOException { var singleModel = getModels("se_model_1", TaskType.SPARSE_EMBEDDING); assertThat(singleModel, hasSize(1)); - assertEquals("se_model_1", singleModel.get(0).get("model_id")); + assertEquals("se_model_1", singleModel.get(0).get("inference_id")); for (int i = 0; i < 5; i++) { deleteModel("se_model_" + i, TaskType.SPARSE_EMBEDDING); @@ -81,7 +81,7 @@ public void testGetModelWithAnyTaskType() throws IOException { String inferenceEntityId = "sparse_embedding_model"; putModel(inferenceEntityId, mockSparseServiceModelConfig(), TaskType.SPARSE_EMBEDDING); var singleModel = getModels(inferenceEntityId, TaskType.ANY); - assertEquals(inferenceEntityId, singleModel.get(0).get("model_id")); + assertEquals(inferenceEntityId, singleModel.get(0).get("inference_id")); assertEquals(TaskType.SPARSE_EMBEDDING.toString(), singleModel.get(0).get("task_type")); } @@ -90,7 +90,7 @@ public void testApisWithoutTaskType() throws IOException { String modelId = "no_task_type_in_url"; putModel(modelId, mockSparseServiceModelConfig(TaskType.SPARSE_EMBEDDING)); var singleModel = getModel(modelId); - assertEquals(modelId, singleModel.get("model_id")); + assertEquals(modelId, singleModel.get("inference_id")); assertEquals(TaskType.SPARSE_EMBEDDING.toString(), singleModel.get("task_type")); var inference = inferOnMockService(modelId, List.of(randomAlphaOfLength(10))); diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockDenseInferenceServiceIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockDenseInferenceServiceIT.java index 833b1fd3673fc..5f6bad5687407 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockDenseInferenceServiceIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockDenseInferenceServiceIT.java @@ -22,7 +22,7 @@ public void testMockService() throws IOException { var model = getModels(inferenceEntityId, TaskType.TEXT_EMBEDDING).get(0); for (var modelMap : List.of(putModel, model)) { - assertEquals(inferenceEntityId, modelMap.get("model_id")); + assertEquals(inferenceEntityId, modelMap.get("inference_id")); assertEquals(TaskType.TEXT_EMBEDDING, TaskType.fromString((String) modelMap.get("task_type"))); assertEquals("text_embedding_test_service", modelMap.get("service")); } diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockSparseInferenceServiceIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockSparseInferenceServiceIT.java index 97e0641f37c33..24ba2708f5de4 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockSparseInferenceServiceIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/MockSparseInferenceServiceIT.java @@ -24,7 +24,7 @@ public void testMockService() throws IOException { var model = getModels(inferenceEntityId, TaskType.SPARSE_EMBEDDING).get(0); for (var modelMap : List.of(putModel, model)) { - assertEquals(inferenceEntityId, modelMap.get("model_id")); + assertEquals(inferenceEntityId, modelMap.get("inference_id")); assertEquals(TaskType.SPARSE_EMBEDDING, TaskType.fromString((String) modelMap.get("task_type"))); assertEquals("test_service", modelMap.get("service")); } @@ -77,7 +77,7 @@ public void testMockService_DoesNotReturnHiddenField_InModelResponses() throws I var model = getModels(inferenceEntityId, TaskType.SPARSE_EMBEDDING).get(0); for (var modelMap : List.of(putModel, model)) { - assertEquals(inferenceEntityId, modelMap.get("model_id")); + assertEquals(inferenceEntityId, modelMap.get("inference_id")); assertThat(modelMap.get("service_settings"), is(Map.of("model", "my_model"))); assertEquals(TaskType.SPARSE_EMBEDDING, TaskType.fromString((String) modelMap.get("task_type"))); assertEquals("test_service", modelMap.get("service")); @@ -95,7 +95,7 @@ public void testMockService_DoesReturnHiddenField_InModelResponses() throws IOEx var model = getModels(inferenceEntityId, TaskType.SPARSE_EMBEDDING).get(0); for (var modelMap : List.of(putModel, model)) { - assertEquals(inferenceEntityId, modelMap.get("model_id")); + assertEquals(inferenceEntityId, modelMap.get("inference_id")); assertThat(modelMap.get("service_settings"), is(Map.of("model", "my_model", "hidden_field", "my_hidden_value"))); assertEquals(TaskType.SPARSE_EMBEDDING, TaskType.fromString((String) modelMap.get("task_type"))); assertEquals("test_service", modelMap.get("service")); diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/ModelRegistryIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/ModelRegistryIT.java index 776232f1e29e6..abe73e11a4873 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/ModelRegistryIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/ModelRegistryIT.java @@ -405,7 +405,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); builder.field("unknown_field", "foo"); - builder.field(MODEL_ID, getInferenceEntityId()); + builder.field(INDEX_ONLY_ID_FIELD_NAME, getInferenceEntityId()); builder.field(TaskType.NAME, getTaskType().toString()); builder.field(SERVICE, getService()); builder.field(SERVICE_SETTINGS, getServiceSettings()); @@ -431,7 +431,7 @@ private static class ModelWithUnknownField extends ModelConfigurations { public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); builder.field("unknown_field", "foo"); - builder.field(MODEL_ID, getInferenceEntityId()); + builder.field(INDEX_ONLY_ID_FIELD_NAME, getInferenceEntityId()); builder.field(TaskType.NAME, getTaskType().toString()); builder.field(SERVICE, getService()); builder.field(SERVICE_SETTINGS, getServiceSettings()); diff --git a/x-pack/plugin/inference/src/main/java/module-info.java b/x-pack/plugin/inference/src/main/java/module-info.java index aa907a236884a..a7e5718a0920e 100644 --- a/x-pack/plugin/inference/src/main/java/module-info.java +++ b/x-pack/plugin/inference/src/main/java/module-info.java @@ -20,8 +20,13 @@ requires org.apache.lucene.join; requires com.ibm.icu; requires com.google.auth.oauth2; + requires com.google.auth; requires com.google.api.client; requires com.google.gson; + requires aws.java.sdk.bedrockruntime; + requires aws.java.sdk.core; + requires com.fasterxml.jackson.databind; + requires org.joda.time; exports org.elasticsearch.xpack.inference.action; exports org.elasticsearch.xpack.inference.registry; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java index f3799b824fc0e..f8ce9df1fb194 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java @@ -24,6 +24,10 @@ import org.elasticsearch.xpack.core.inference.results.LegacyTextEmbeddingResults; import org.elasticsearch.xpack.core.inference.results.RankedDocsResults; import org.elasticsearch.xpack.core.inference.results.SparseEmbeddingResults; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockSecretSettings; +import org.elasticsearch.xpack.inference.services.amazonbedrock.completion.AmazonBedrockChatCompletionServiceSettings; +import org.elasticsearch.xpack.inference.services.amazonbedrock.completion.AmazonBedrockChatCompletionTaskSettings; +import org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings.AmazonBedrockEmbeddingsServiceSettings; import org.elasticsearch.xpack.inference.services.anthropic.completion.AnthropicChatCompletionServiceSettings; import org.elasticsearch.xpack.inference.services.anthropic.completion.AnthropicChatCompletionTaskSettings; import org.elasticsearch.xpack.inference.services.azureaistudio.completion.AzureAiStudioChatCompletionServiceSettings; @@ -122,10 +126,46 @@ public static List getNamedWriteables() { addMistralNamedWriteables(namedWriteables); addCustomElandWriteables(namedWriteables); addAnthropicNamedWritables(namedWriteables); + addAmazonBedrockNamedWriteables(namedWriteables); return namedWriteables; } + private static void addAmazonBedrockNamedWriteables(List namedWriteables) { + namedWriteables.add( + new NamedWriteableRegistry.Entry( + AmazonBedrockSecretSettings.class, + AmazonBedrockSecretSettings.NAME, + AmazonBedrockSecretSettings::new + ) + ); + + namedWriteables.add( + new NamedWriteableRegistry.Entry( + ServiceSettings.class, + AmazonBedrockEmbeddingsServiceSettings.NAME, + AmazonBedrockEmbeddingsServiceSettings::new + ) + ); + + // no task settings for Amazon Bedrock Embeddings + + namedWriteables.add( + new NamedWriteableRegistry.Entry( + ServiceSettings.class, + AmazonBedrockChatCompletionServiceSettings.NAME, + AmazonBedrockChatCompletionServiceSettings::new + ) + ); + namedWriteables.add( + new NamedWriteableRegistry.Entry( + TaskSettings.class, + AmazonBedrockChatCompletionTaskSettings.NAME, + AmazonBedrockChatCompletionTaskSettings::new + ) + ); + } + private static void addMistralNamedWriteables(List namedWriteables) { namedWriteables.add( new NamedWriteableRegistry.Entry( diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java index 1db5b4135ee94..1c388f7399260 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java @@ -53,6 +53,7 @@ import org.elasticsearch.xpack.inference.action.TransportPutInferenceModelAction; import org.elasticsearch.xpack.inference.action.filter.ShardBulkInferenceActionFilter; import org.elasticsearch.xpack.inference.common.Truncator; +import org.elasticsearch.xpack.inference.external.amazonbedrock.AmazonBedrockRequestSender; import org.elasticsearch.xpack.inference.external.http.HttpClientManager; import org.elasticsearch.xpack.inference.external.http.HttpSettings; import org.elasticsearch.xpack.inference.external.http.retry.RetrySettings; @@ -70,6 +71,7 @@ import org.elasticsearch.xpack.inference.rest.RestInferenceAction; import org.elasticsearch.xpack.inference.rest.RestPutInferenceModelAction; import org.elasticsearch.xpack.inference.services.ServiceComponents; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockService; import org.elasticsearch.xpack.inference.services.anthropic.AnthropicService; import org.elasticsearch.xpack.inference.services.azureaistudio.AzureAiStudioService; import org.elasticsearch.xpack.inference.services.azureopenai.AzureOpenAiService; @@ -117,6 +119,7 @@ public class InferencePlugin extends Plugin implements ActionPlugin, ExtensibleP private final Settings settings; private final SetOnce httpFactory = new SetOnce<>(); + private final SetOnce amazonBedrockFactory = new SetOnce<>(); private final SetOnce serviceComponents = new SetOnce<>(); private final SetOnce inferenceServiceRegistry = new SetOnce<>(); @@ -170,6 +173,9 @@ public Collection createComponents(PluginServices services) { var httpRequestSenderFactory = new HttpRequestSender.Factory(serviceComponents.get(), httpClientManager, services.clusterService()); httpFactory.set(httpRequestSenderFactory); + var amazonBedrockRequestSenderFactory = new AmazonBedrockRequestSender.Factory(serviceComponents.get(), services.clusterService()); + amazonBedrockFactory.set(amazonBedrockRequestSenderFactory); + ModelRegistry modelRegistry = new ModelRegistry(services.client()); if (inferenceServiceExtensions == null) { @@ -209,6 +215,7 @@ public List getInferenceServiceFactories() { context -> new GoogleVertexAiService(httpFactory.get(), serviceComponents.get()), context -> new MistralService(httpFactory.get(), serviceComponents.get()), context -> new AnthropicService(httpFactory.get(), serviceComponents.get()), + context -> new AmazonBedrockService(httpFactory.get(), amazonBedrockFactory.get(), serviceComponents.get()), ElasticsearchInternalService::new ); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportDeleteInferenceEndpointAction.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportDeleteInferenceEndpointAction.java index 07d5e1e618578..4f255d0cc5e47 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportDeleteInferenceEndpointAction.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportDeleteInferenceEndpointAction.java @@ -11,6 +11,7 @@ import org.apache.logging.log4j.Logger; import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ActionRunnable; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.SubscribableListener; import org.elasticsearch.action.support.master.TransportMasterNodeAction; @@ -34,6 +35,9 @@ import org.elasticsearch.xpack.inference.registry.ModelRegistry; import java.util.Set; +import java.util.concurrent.Executor; + +import static org.elasticsearch.xpack.inference.InferencePlugin.UTILITY_THREAD_POOL_NAME; public class TransportDeleteInferenceEndpointAction extends TransportMasterNodeAction< DeleteInferenceEndpointAction.Request, @@ -42,6 +46,7 @@ public class TransportDeleteInferenceEndpointAction extends TransportMasterNodeA private final ModelRegistry modelRegistry; private final InferenceServiceRegistry serviceRegistry; private static final Logger logger = LogManager.getLogger(TransportDeleteInferenceEndpointAction.class); + private final Executor executor; @Inject public TransportDeleteInferenceEndpointAction( @@ -66,6 +71,7 @@ public TransportDeleteInferenceEndpointAction( ); this.modelRegistry = modelRegistry; this.serviceRegistry = serviceRegistry; + this.executor = threadPool.executor(UTILITY_THREAD_POOL_NAME); } @Override @@ -74,6 +80,15 @@ protected void masterOperation( DeleteInferenceEndpointAction.Request request, ClusterState state, ActionListener masterListener + ) { + // workaround for https://github.com/elastic/elasticsearch/issues/97916 - TODO remove this when we can + executor.execute(ActionRunnable.wrap(masterListener, l -> doExecuteForked(request, state, l))); + } + + private void doExecuteForked( + DeleteInferenceEndpointAction.Request request, + ClusterState state, + ActionListener masterListener ) { SubscribableListener.newForked(modelConfigListener -> { // Get the model from the registry diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/amazonbedrock/AmazonBedrockActionCreator.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/amazonbedrock/AmazonBedrockActionCreator.java new file mode 100644 index 0000000000000..5f9fc532e33b2 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/amazonbedrock/AmazonBedrockActionCreator.java @@ -0,0 +1,56 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.action.amazonbedrock; + +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.xpack.inference.external.action.ExecutableAction; +import org.elasticsearch.xpack.inference.external.http.sender.AmazonBedrockChatCompletionRequestManager; +import org.elasticsearch.xpack.inference.external.http.sender.AmazonBedrockEmbeddingsRequestManager; +import org.elasticsearch.xpack.inference.external.http.sender.Sender; +import org.elasticsearch.xpack.inference.services.ServiceComponents; +import org.elasticsearch.xpack.inference.services.amazonbedrock.completion.AmazonBedrockChatCompletionModel; +import org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings.AmazonBedrockEmbeddingsModel; + +import java.util.Map; +import java.util.Objects; + +import static org.elasticsearch.xpack.inference.external.action.ActionUtils.constructFailedToSendRequestMessage; + +public class AmazonBedrockActionCreator implements AmazonBedrockActionVisitor { + private final Sender sender; + private final ServiceComponents serviceComponents; + private final TimeValue timeout; + + public AmazonBedrockActionCreator(Sender sender, ServiceComponents serviceComponents, @Nullable TimeValue timeout) { + this.sender = Objects.requireNonNull(sender); + this.serviceComponents = Objects.requireNonNull(serviceComponents); + this.timeout = timeout; + } + + @Override + public ExecutableAction create(AmazonBedrockEmbeddingsModel embeddingsModel, Map taskSettings) { + var overriddenModel = AmazonBedrockEmbeddingsModel.of(embeddingsModel, taskSettings); + var requestManager = new AmazonBedrockEmbeddingsRequestManager( + overriddenModel, + serviceComponents.truncator(), + serviceComponents.threadPool(), + timeout + ); + var errorMessage = constructFailedToSendRequestMessage(null, "Amazon Bedrock embeddings"); + return new AmazonBedrockEmbeddingsAction(sender, requestManager, errorMessage); + } + + @Override + public ExecutableAction create(AmazonBedrockChatCompletionModel completionModel, Map taskSettings) { + var overriddenModel = AmazonBedrockChatCompletionModel.of(completionModel, taskSettings); + var requestManager = new AmazonBedrockChatCompletionRequestManager(overriddenModel, serviceComponents.threadPool(), timeout); + var errorMessage = constructFailedToSendRequestMessage(null, "Amazon Bedrock completion"); + return new AmazonBedrockChatCompletionAction(sender, requestManager, errorMessage); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/amazonbedrock/AmazonBedrockActionVisitor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/amazonbedrock/AmazonBedrockActionVisitor.java new file mode 100644 index 0000000000000..b540d030eb3f7 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/amazonbedrock/AmazonBedrockActionVisitor.java @@ -0,0 +1,20 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.action.amazonbedrock; + +import org.elasticsearch.xpack.inference.external.action.ExecutableAction; +import org.elasticsearch.xpack.inference.services.amazonbedrock.completion.AmazonBedrockChatCompletionModel; +import org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings.AmazonBedrockEmbeddingsModel; + +import java.util.Map; + +public interface AmazonBedrockActionVisitor { + ExecutableAction create(AmazonBedrockEmbeddingsModel embeddingsModel, Map taskSettings); + + ExecutableAction create(AmazonBedrockChatCompletionModel completionModel, Map taskSettings); +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/amazonbedrock/AmazonBedrockChatCompletionAction.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/amazonbedrock/AmazonBedrockChatCompletionAction.java new file mode 100644 index 0000000000000..9d3c39d3ac4d9 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/amazonbedrock/AmazonBedrockChatCompletionAction.java @@ -0,0 +1,47 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.action.amazonbedrock; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xpack.inference.external.action.ExecutableAction; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; +import org.elasticsearch.xpack.inference.external.http.sender.RequestManager; +import org.elasticsearch.xpack.inference.external.http.sender.Sender; + +import java.util.Objects; + +import static org.elasticsearch.xpack.inference.external.action.ActionUtils.createInternalServerError; +import static org.elasticsearch.xpack.inference.external.action.ActionUtils.wrapFailuresInElasticsearchException; + +public class AmazonBedrockChatCompletionAction implements ExecutableAction { + private final Sender sender; + private final RequestManager requestManager; + private final String errorMessage; + + public AmazonBedrockChatCompletionAction(Sender sender, RequestManager requestManager, String errorMessage) { + this.sender = Objects.requireNonNull(sender); + this.requestManager = Objects.requireNonNull(requestManager); + this.errorMessage = Objects.requireNonNull(errorMessage); + } + + @Override + public void execute(InferenceInputs inferenceInputs, TimeValue timeout, ActionListener listener) { + try { + ActionListener wrappedListener = wrapFailuresInElasticsearchException(errorMessage, listener); + + sender.send(requestManager, inferenceInputs, timeout, wrappedListener); + } catch (ElasticsearchException e) { + listener.onFailure(e); + } catch (Exception e) { + listener.onFailure(createInternalServerError(e, errorMessage)); + } + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/amazonbedrock/AmazonBedrockEmbeddingsAction.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/amazonbedrock/AmazonBedrockEmbeddingsAction.java new file mode 100644 index 0000000000000..3f8be0c3cccbe --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/action/amazonbedrock/AmazonBedrockEmbeddingsAction.java @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.action.amazonbedrock; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xpack.inference.external.action.ExecutableAction; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; +import org.elasticsearch.xpack.inference.external.http.sender.RequestManager; +import org.elasticsearch.xpack.inference.external.http.sender.Sender; + +import java.util.Objects; + +import static org.elasticsearch.xpack.inference.external.action.ActionUtils.createInternalServerError; +import static org.elasticsearch.xpack.inference.external.action.ActionUtils.wrapFailuresInElasticsearchException; + +public class AmazonBedrockEmbeddingsAction implements ExecutableAction { + + private final Sender sender; + private final RequestManager requestManager; + private final String errorMessage; + + public AmazonBedrockEmbeddingsAction(Sender sender, RequestManager requestManager, String errorMessage) { + this.sender = Objects.requireNonNull(sender); + this.requestManager = Objects.requireNonNull(requestManager); + this.errorMessage = Objects.requireNonNull(errorMessage); + } + + @Override + public void execute(InferenceInputs inferenceInputs, TimeValue timeout, ActionListener listener) { + try { + ActionListener wrappedListener = wrapFailuresInElasticsearchException(errorMessage, listener); + + sender.send(requestManager, inferenceInputs, timeout, wrappedListener); + } catch (ElasticsearchException e) { + listener.onFailure(e); + } catch (Exception e) { + listener.onFailure(createInternalServerError(e, errorMessage)); + } + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockBaseClient.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockBaseClient.java new file mode 100644 index 0000000000000..f9e403582a0ec --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockBaseClient.java @@ -0,0 +1,37 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.amazonbedrock; + +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockModel; + +import java.time.Clock; +import java.util.Objects; + +public abstract class AmazonBedrockBaseClient implements AmazonBedrockClient { + protected final Integer modelKeysAndRegionHashcode; + protected Clock clock = Clock.systemUTC(); + + protected AmazonBedrockBaseClient(AmazonBedrockModel model, @Nullable TimeValue timeout) { + Objects.requireNonNull(model); + this.modelKeysAndRegionHashcode = getModelKeysAndRegionHashcode(model, timeout); + } + + public static Integer getModelKeysAndRegionHashcode(AmazonBedrockModel model, @Nullable TimeValue timeout) { + var secretSettings = model.getSecretSettings(); + var serviceSettings = model.getServiceSettings(); + return Objects.hash(secretSettings.accessKey, secretSettings.secretKey, serviceSettings.region(), timeout); + } + + public final void setClock(Clock clock) { + this.clock = clock; + } + + abstract void close(); +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockChatCompletionExecutor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockChatCompletionExecutor.java new file mode 100644 index 0000000000000..a4e0c399517c1 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockChatCompletionExecutor.java @@ -0,0 +1,43 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.amazonbedrock; + +import org.apache.logging.log4j.Logger; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockChatCompletionRequest; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.AmazonBedrockResponseHandler; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.completion.AmazonBedrockChatCompletionResponseListener; + +import java.util.function.Supplier; + +public class AmazonBedrockChatCompletionExecutor extends AmazonBedrockExecutor { + private final AmazonBedrockChatCompletionRequest chatCompletionRequest; + + protected AmazonBedrockChatCompletionExecutor( + AmazonBedrockChatCompletionRequest request, + AmazonBedrockResponseHandler responseHandler, + Logger logger, + Supplier hasRequestCompletedFunction, + ActionListener inferenceResultsListener, + AmazonBedrockClientCache clientCache + ) { + super(request, responseHandler, logger, hasRequestCompletedFunction, inferenceResultsListener, clientCache); + this.chatCompletionRequest = request; + } + + @Override + protected void executeClientRequest(AmazonBedrockBaseClient awsBedrockClient) { + var chatCompletionResponseListener = new AmazonBedrockChatCompletionResponseListener( + chatCompletionRequest, + responseHandler, + inferenceResultsListener + ); + chatCompletionRequest.executeChatCompletionRequest(awsBedrockClient, chatCompletionResponseListener); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockClient.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockClient.java new file mode 100644 index 0000000000000..812e76129c420 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockClient.java @@ -0,0 +1,29 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.amazonbedrock; + +import com.amazonaws.services.bedrockruntime.model.ConverseRequest; +import com.amazonaws.services.bedrockruntime.model.ConverseResult; +import com.amazonaws.services.bedrockruntime.model.InvokeModelRequest; +import com.amazonaws.services.bedrockruntime.model.InvokeModelResult; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.ActionListener; + +import java.time.Instant; + +public interface AmazonBedrockClient { + void converse(ConverseRequest converseRequest, ActionListener responseListener) throws ElasticsearchException; + + void invokeModel(InvokeModelRequest invokeModelRequest, ActionListener responseListener) + throws ElasticsearchException; + + boolean isExpired(Instant currentTimestampMs); + + void resetExpiration(); +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockClientCache.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockClientCache.java new file mode 100644 index 0000000000000..e6bb99620b581 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockClientCache.java @@ -0,0 +1,19 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.amazonbedrock; + +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockModel; + +import java.io.Closeable; +import java.io.IOException; + +public interface AmazonBedrockClientCache extends Closeable { + AmazonBedrockBaseClient getOrCreateClient(AmazonBedrockModel model, @Nullable TimeValue timeout) throws IOException; +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockEmbeddingsExecutor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockEmbeddingsExecutor.java new file mode 100644 index 0000000000000..6da3f86e0909a --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockEmbeddingsExecutor.java @@ -0,0 +1,44 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.amazonbedrock; + +import org.apache.logging.log4j.Logger; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.embeddings.AmazonBedrockEmbeddingsRequest; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.AmazonBedrockResponseHandler; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.embeddings.AmazonBedrockEmbeddingsResponseListener; + +import java.util.function.Supplier; + +public class AmazonBedrockEmbeddingsExecutor extends AmazonBedrockExecutor { + + private final AmazonBedrockEmbeddingsRequest embeddingsRequest; + + protected AmazonBedrockEmbeddingsExecutor( + AmazonBedrockEmbeddingsRequest request, + AmazonBedrockResponseHandler responseHandler, + Logger logger, + Supplier hasRequestCompletedFunction, + ActionListener inferenceResultsListener, + AmazonBedrockClientCache clientCache + ) { + super(request, responseHandler, logger, hasRequestCompletedFunction, inferenceResultsListener, clientCache); + this.embeddingsRequest = request; + } + + @Override + protected void executeClientRequest(AmazonBedrockBaseClient awsBedrockClient) { + var embeddingsResponseListener = new AmazonBedrockEmbeddingsResponseListener( + embeddingsRequest, + responseHandler, + inferenceResultsListener + ); + embeddingsRequest.executeEmbeddingsRequest(awsBedrockClient, embeddingsResponseListener); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockExecuteOnlyRequestSender.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockExecuteOnlyRequestSender.java new file mode 100644 index 0000000000000..a08acab655936 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockExecuteOnlyRequestSender.java @@ -0,0 +1,124 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.amazonbedrock; + +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xpack.inference.external.http.retry.RequestSender; +import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler; +import org.elasticsearch.xpack.inference.external.request.Request; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.AmazonBedrockRequest; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockChatCompletionRequest; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.embeddings.AmazonBedrockEmbeddingsRequest; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.AmazonBedrockResponseHandler; +import org.elasticsearch.xpack.inference.logging.ThrottlerManager; + +import java.io.IOException; +import java.util.Objects; +import java.util.function.Supplier; + +import static org.elasticsearch.core.Strings.format; + +/** + * The AWS SDK uses its own internal retrier and timeout values on the client + */ +public class AmazonBedrockExecuteOnlyRequestSender implements RequestSender { + + protected final AmazonBedrockClientCache clientCache; + private final ThrottlerManager throttleManager; + + public AmazonBedrockExecuteOnlyRequestSender(AmazonBedrockClientCache clientCache, ThrottlerManager throttlerManager) { + this.clientCache = Objects.requireNonNull(clientCache); + this.throttleManager = Objects.requireNonNull(throttlerManager); + } + + @Override + public void send( + Logger logger, + Request request, + HttpClientContext context, + Supplier hasRequestTimedOutFunction, + ResponseHandler responseHandler, + ActionListener listener + ) { + if (request instanceof AmazonBedrockRequest awsRequest && responseHandler instanceof AmazonBedrockResponseHandler awsResponse) { + try { + var executor = createExecutor(awsRequest, awsResponse, logger, hasRequestTimedOutFunction, listener); + + // the run method will call the listener to return the proper value + executor.run(); + return; + } catch (Exception e) { + logException(logger, request, e); + listener.onFailure(wrapWithElasticsearchException(e, request.getInferenceEntityId())); + } + } + + listener.onFailure(new ElasticsearchException("Amazon Bedrock request was not the correct type")); + } + + // allow this to be overridden for testing + protected AmazonBedrockExecutor createExecutor( + AmazonBedrockRequest awsRequest, + AmazonBedrockResponseHandler awsResponse, + Logger logger, + Supplier hasRequestTimedOutFunction, + ActionListener listener + ) { + switch (awsRequest.taskType()) { + case COMPLETION -> { + return new AmazonBedrockChatCompletionExecutor( + (AmazonBedrockChatCompletionRequest) awsRequest, + awsResponse, + logger, + hasRequestTimedOutFunction, + listener, + clientCache + ); + } + case TEXT_EMBEDDING -> { + return new AmazonBedrockEmbeddingsExecutor( + (AmazonBedrockEmbeddingsRequest) awsRequest, + awsResponse, + logger, + hasRequestTimedOutFunction, + listener, + clientCache + ); + } + default -> { + throw new UnsupportedOperationException("Unsupported task type [" + awsRequest.taskType() + "] for Amazon Bedrock request"); + } + } + } + + private void logException(Logger logger, Request request, Exception exception) { + var causeException = ExceptionsHelper.unwrapCause(exception); + + throttleManager.warn( + logger, + format("Failed while sending request from inference entity id [%s] of type [amazonbedrock]", request.getInferenceEntityId()), + causeException + ); + } + + private Exception wrapWithElasticsearchException(Exception e, String inferenceEntityId) { + return new ElasticsearchException( + format("Amazon Bedrock client failed to send request from inference entity id [%s]", inferenceEntityId), + e + ); + } + + public void shutdown() throws IOException { + this.clientCache.close(); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockExecutor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockExecutor.java new file mode 100644 index 0000000000000..fa220ee5d2831 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockExecutor.java @@ -0,0 +1,68 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.amazonbedrock; + +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.common.Strings; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.AmazonBedrockRequest; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.AmazonBedrockResponseHandler; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockModel; + +import java.util.Objects; +import java.util.function.Supplier; + +public abstract class AmazonBedrockExecutor implements Runnable { + protected final AmazonBedrockModel baseModel; + protected final AmazonBedrockResponseHandler responseHandler; + protected final Logger logger; + protected final AmazonBedrockRequest request; + protected final Supplier hasRequestCompletedFunction; + protected final ActionListener inferenceResultsListener; + protected final AmazonBedrockClientCache clientCache; + + protected AmazonBedrockExecutor( + AmazonBedrockRequest request, + AmazonBedrockResponseHandler responseHandler, + Logger logger, + Supplier hasRequestCompletedFunction, + ActionListener inferenceResultsListener, + AmazonBedrockClientCache clientCache + ) { + this.request = Objects.requireNonNull(request); + this.responseHandler = Objects.requireNonNull(responseHandler); + this.logger = Objects.requireNonNull(logger); + this.hasRequestCompletedFunction = Objects.requireNonNull(hasRequestCompletedFunction); + this.inferenceResultsListener = Objects.requireNonNull(inferenceResultsListener); + this.clientCache = Objects.requireNonNull(clientCache); + this.baseModel = request.model(); + } + + @Override + public void run() { + if (hasRequestCompletedFunction.get()) { + // has already been run + return; + } + + var inferenceEntityId = baseModel.getInferenceEntityId(); + + try { + var awsBedrockClient = clientCache.getOrCreateClient(baseModel, request.timeout()); + executeClientRequest(awsBedrockClient); + } catch (Exception e) { + var errorMessage = Strings.format("Failed to send request from inference entity id [%s]", inferenceEntityId); + logger.warn(errorMessage, e); + inferenceResultsListener.onFailure(new ElasticsearchException(errorMessage, e)); + } + } + + protected abstract void executeClientRequest(AmazonBedrockBaseClient awsBedrockClient); +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockInferenceClient.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockInferenceClient.java new file mode 100644 index 0000000000000..c3d458925268c --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockInferenceClient.java @@ -0,0 +1,166 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.amazonbedrock; + +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.services.bedrockruntime.AmazonBedrockRuntimeAsync; +import com.amazonaws.services.bedrockruntime.AmazonBedrockRuntimeAsyncClientBuilder; +import com.amazonaws.services.bedrockruntime.model.AmazonBedrockRuntimeException; +import com.amazonaws.services.bedrockruntime.model.ConverseRequest; +import com.amazonaws.services.bedrockruntime.model.ConverseResult; +import com.amazonaws.services.bedrockruntime.model.InvokeModelRequest; +import com.amazonaws.services.bedrockruntime.model.InvokeModelResult; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.SpecialPermission; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.Strings; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.xpack.core.common.socket.SocketAccess; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockModel; + +import java.security.AccessController; +import java.security.PrivilegedExceptionAction; +import java.time.Duration; +import java.time.Instant; +import java.util.Objects; + +/** + * Not marking this as "final" so we can subclass it for mocking + */ +public class AmazonBedrockInferenceClient extends AmazonBedrockBaseClient { + + // package-private for testing + static final int CLIENT_CACHE_EXPIRY_MINUTES = 5; + private static final int DEFAULT_CLIENT_TIMEOUT_MS = 10000; + + private final AmazonBedrockRuntimeAsync internalClient; + private volatile Instant expiryTimestamp; + + public static AmazonBedrockBaseClient create(AmazonBedrockModel model, @Nullable TimeValue timeout) { + try { + return new AmazonBedrockInferenceClient(model, timeout); + } catch (Exception e) { + throw new ElasticsearchException("Failed to create Amazon Bedrock Client", e); + } + } + + protected AmazonBedrockInferenceClient(AmazonBedrockModel model, @Nullable TimeValue timeout) { + super(model, timeout); + this.internalClient = createAmazonBedrockClient(model, timeout); + setExpiryTimestamp(); + } + + @Override + public void converse(ConverseRequest converseRequest, ActionListener responseListener) throws ElasticsearchException { + try { + var responseFuture = internalClient.converseAsync(converseRequest); + responseListener.onResponse(responseFuture.get()); + } catch (AmazonBedrockRuntimeException amazonBedrockRuntimeException) { + responseListener.onFailure( + new ElasticsearchException( + Strings.format("AmazonBedrock converse failure: [%s]", amazonBedrockRuntimeException.getMessage()), + amazonBedrockRuntimeException + ) + ); + } catch (ElasticsearchException elasticsearchException) { + // just throw the exception if we have one + responseListener.onFailure(elasticsearchException); + } catch (Exception e) { + responseListener.onFailure(new ElasticsearchException("Amazon Bedrock client converse call failed", e)); + } + } + + @Override + public void invokeModel(InvokeModelRequest invokeModelRequest, ActionListener responseListener) + throws ElasticsearchException { + try { + var responseFuture = internalClient.invokeModelAsync(invokeModelRequest); + responseListener.onResponse(responseFuture.get()); + } catch (AmazonBedrockRuntimeException amazonBedrockRuntimeException) { + responseListener.onFailure( + new ElasticsearchException( + Strings.format("AmazonBedrock invoke model failure: [%s]", amazonBedrockRuntimeException.getMessage()), + amazonBedrockRuntimeException + ) + ); + } catch (ElasticsearchException elasticsearchException) { + // just throw the exception if we have one + responseListener.onFailure(elasticsearchException); + } catch (Exception e) { + responseListener.onFailure(new ElasticsearchException(e)); + } + } + + // allow this to be overridden for test mocks + protected AmazonBedrockRuntimeAsync createAmazonBedrockClient(AmazonBedrockModel model, @Nullable TimeValue timeout) { + var secretSettings = model.getSecretSettings(); + var credentials = new BasicAWSCredentials(secretSettings.accessKey.toString(), secretSettings.secretKey.toString()); + var credentialsProvider = new AWSStaticCredentialsProvider(credentials); + var clientConfig = timeout == null + ? new ClientConfiguration().withConnectionTimeout(DEFAULT_CLIENT_TIMEOUT_MS) + : new ClientConfiguration().withConnectionTimeout((int) timeout.millis()); + + var serviceSettings = model.getServiceSettings(); + + try { + SpecialPermission.check(); + AmazonBedrockRuntimeAsyncClientBuilder builder = AccessController.doPrivileged( + (PrivilegedExceptionAction) () -> AmazonBedrockRuntimeAsyncClientBuilder.standard() + .withCredentials(credentialsProvider) + .withRegion(serviceSettings.region()) + .withClientConfiguration(clientConfig) + ); + + return SocketAccess.doPrivileged(builder::build); + } catch (AmazonBedrockRuntimeException amazonBedrockRuntimeException) { + throw new ElasticsearchException( + Strings.format("failed to create AmazonBedrockRuntime client: [%s]", amazonBedrockRuntimeException.getMessage()), + amazonBedrockRuntimeException + ); + } catch (Exception e) { + throw new ElasticsearchException("failed to create AmazonBedrockRuntime client", e); + } + } + + private void setExpiryTimestamp() { + this.expiryTimestamp = clock.instant().plus(Duration.ofMinutes(CLIENT_CACHE_EXPIRY_MINUTES)); + } + + @Override + public boolean isExpired(Instant currentTimestampMs) { + Objects.requireNonNull(currentTimestampMs); + return currentTimestampMs.isAfter(expiryTimestamp); + } + + public void resetExpiration() { + setExpiryTimestamp(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AmazonBedrockInferenceClient that = (AmazonBedrockInferenceClient) o; + return Objects.equals(modelKeysAndRegionHashcode, that.modelKeysAndRegionHashcode); + } + + @Override + public int hashCode() { + return this.modelKeysAndRegionHashcode; + } + + // make this package-private so only the cache can close it + @Override + void close() { + internalClient.shutdown(); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockInferenceClientCache.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockInferenceClientCache.java new file mode 100644 index 0000000000000..e245365c214af --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockInferenceClientCache.java @@ -0,0 +1,137 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.amazonbedrock; + +import com.amazonaws.http.IdleConnectionReaper; + +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockModel; + +import java.io.IOException; +import java.time.Clock; +import java.util.ArrayList; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.function.BiFunction; + +public final class AmazonBedrockInferenceClientCache implements AmazonBedrockClientCache { + + private final BiFunction creator; + private final Map clientsCache = new ConcurrentHashMap<>(); + private final ReentrantReadWriteLock cacheLock = new ReentrantReadWriteLock(); + + // not final for testing + private Clock clock; + + public AmazonBedrockInferenceClientCache( + BiFunction creator, + @Nullable Clock clock + ) { + this.creator = Objects.requireNonNull(creator); + this.clock = Objects.requireNonNullElse(clock, Clock.systemUTC()); + } + + public AmazonBedrockBaseClient getOrCreateClient(AmazonBedrockModel model, @Nullable TimeValue timeout) { + var returnClient = internalGetOrCreateClient(model, timeout); + flushExpiredClients(); + return returnClient; + } + + private AmazonBedrockBaseClient internalGetOrCreateClient(AmazonBedrockModel model, @Nullable TimeValue timeout) { + final Integer modelHash = AmazonBedrockInferenceClient.getModelKeysAndRegionHashcode(model, timeout); + cacheLock.readLock().lock(); + try { + return clientsCache.computeIfAbsent(modelHash, hashKey -> { + final AmazonBedrockBaseClient builtClient = creator.apply(model, timeout); + builtClient.setClock(clock); + builtClient.resetExpiration(); + return builtClient; + }); + } finally { + cacheLock.readLock().unlock(); + } + } + + private void flushExpiredClients() { + var currentTimestampMs = clock.instant(); + var expiredClients = new ArrayList>(); + + cacheLock.readLock().lock(); + try { + for (final Map.Entry client : clientsCache.entrySet()) { + if (client.getValue().isExpired(currentTimestampMs)) { + expiredClients.add(client); + } + } + + if (expiredClients.isEmpty()) { + return; + } + + cacheLock.readLock().unlock(); + cacheLock.writeLock().lock(); + try { + for (final Map.Entry client : expiredClients) { + var removed = clientsCache.remove(client.getKey()); + if (removed != null) { + removed.close(); + } + } + } finally { + cacheLock.readLock().lock(); + cacheLock.writeLock().unlock(); + } + } finally { + cacheLock.readLock().unlock(); + } + } + + @Override + public void close() throws IOException { + releaseCachedClients(); + } + + private void releaseCachedClients() { + // as we're closing and flushing all of these - we'll use a write lock + // across the whole operation to ensure this stays in sync + cacheLock.writeLock().lock(); + try { + // ensure all the clients are closed before we clear + for (final AmazonBedrockBaseClient client : clientsCache.values()) { + client.close(); + } + + // clear previously cached clients, they will be build lazily + clientsCache.clear(); + } finally { + cacheLock.writeLock().unlock(); + } + + // shutdown IdleConnectionReaper background thread + // it will be restarted on new client usage + IdleConnectionReaper.shutdown(); + } + + // used for testing + int clientCount() { + cacheLock.readLock().lock(); + try { + return clientsCache.size(); + } finally { + cacheLock.readLock().unlock(); + } + } + + // used for testing + void setClock(Clock newClock) { + this.clock = Objects.requireNonNull(newClock); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockRequestSender.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockRequestSender.java new file mode 100644 index 0000000000000..e23b0274ede26 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockRequestSender.java @@ -0,0 +1,126 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.amazonbedrock; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xpack.inference.external.http.sender.AmazonBedrockRequestExecutorService; +import org.elasticsearch.xpack.inference.external.http.sender.AmazonBedrockRequestManager; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; +import org.elasticsearch.xpack.inference.external.http.sender.RequestExecutorServiceSettings; +import org.elasticsearch.xpack.inference.external.http.sender.RequestManager; +import org.elasticsearch.xpack.inference.external.http.sender.Sender; +import org.elasticsearch.xpack.inference.services.ServiceComponents; + +import java.io.IOException; +import java.util.Objects; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + +import static org.elasticsearch.xpack.inference.InferencePlugin.UTILITY_THREAD_POOL_NAME; + +public class AmazonBedrockRequestSender implements Sender { + + public static class Factory { + private final ServiceComponents serviceComponents; + private final ClusterService clusterService; + + public Factory(ServiceComponents serviceComponents, ClusterService clusterService) { + this.serviceComponents = Objects.requireNonNull(serviceComponents); + this.clusterService = Objects.requireNonNull(clusterService); + } + + public Sender createSender() { + var clientCache = new AmazonBedrockInferenceClientCache(AmazonBedrockInferenceClient::create, null); + return createSender(new AmazonBedrockExecuteOnlyRequestSender(clientCache, serviceComponents.throttlerManager())); + } + + Sender createSender(AmazonBedrockExecuteOnlyRequestSender requestSender) { + var sender = new AmazonBedrockRequestSender( + serviceComponents.threadPool(), + clusterService, + serviceComponents.settings(), + Objects.requireNonNull(requestSender) + ); + // ensure this is started + sender.start(); + return sender; + } + } + + private static final TimeValue START_COMPLETED_WAIT_TIME = TimeValue.timeValueSeconds(5); + + private final ThreadPool threadPool; + private final AmazonBedrockRequestExecutorService executorService; + private final AtomicBoolean started = new AtomicBoolean(false); + private final CountDownLatch startCompleted = new CountDownLatch(1); + + protected AmazonBedrockRequestSender( + ThreadPool threadPool, + ClusterService clusterService, + Settings settings, + AmazonBedrockExecuteOnlyRequestSender requestSender + ) { + this.threadPool = Objects.requireNonNull(threadPool); + executorService = new AmazonBedrockRequestExecutorService( + threadPool, + startCompleted, + new RequestExecutorServiceSettings(settings, clusterService), + requestSender + ); + } + + @Override + public void start() { + if (started.compareAndSet(false, true)) { + // The manager must be started before the executor service. That way we guarantee that the http client + // is ready prior to the service attempting to use the http client to send a request + threadPool.executor(UTILITY_THREAD_POOL_NAME).execute(executorService::start); + waitForStartToComplete(); + } + } + + private void waitForStartToComplete() { + try { + if (startCompleted.await(START_COMPLETED_WAIT_TIME.getSeconds(), TimeUnit.SECONDS) == false) { + throw new IllegalStateException("Amazon Bedrock sender startup did not complete in time"); + } + } catch (InterruptedException e) { + throw new IllegalStateException("Amazon Bedrock sender interrupted while waiting for startup to complete"); + } + } + + @Override + public void send( + RequestManager requestCreator, + InferenceInputs inferenceInputs, + TimeValue timeout, + ActionListener listener + ) { + assert started.get() : "Amazon Bedrock request sender: call start() before sending a request"; + waitForStartToComplete(); + + if (requestCreator instanceof AmazonBedrockRequestManager amazonBedrockRequestManager) { + executorService.execute(amazonBedrockRequestManager, inferenceInputs, timeout, listener); + return; + } + + listener.onFailure(new ElasticsearchException("Amazon Bedrock request sender did not receive a valid request request manager")); + } + + @Override + public void close() throws IOException { + executorService.shutdown(); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/AmazonBedrockChatCompletionRequestManager.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/AmazonBedrockChatCompletionRequestManager.java new file mode 100644 index 0000000000000..1d8226664979c --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/AmazonBedrockChatCompletionRequestManager.java @@ -0,0 +1,65 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.http.sender; + +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.common.Strings; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xpack.inference.external.http.retry.RequestSender; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockChatCompletionEntityFactory; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockChatCompletionRequest; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.completion.AmazonBedrockChatCompletionResponseHandler; +import org.elasticsearch.xpack.inference.services.amazonbedrock.completion.AmazonBedrockChatCompletionModel; + +import java.util.List; +import java.util.function.Supplier; + +public class AmazonBedrockChatCompletionRequestManager extends AmazonBedrockRequestManager { + private static final Logger logger = LogManager.getLogger(AmazonBedrockChatCompletionRequestManager.class); + private final AmazonBedrockChatCompletionModel model; + + public AmazonBedrockChatCompletionRequestManager( + AmazonBedrockChatCompletionModel model, + ThreadPool threadPool, + @Nullable TimeValue timeout + ) { + super(model, threadPool, timeout); + this.model = model; + } + + @Override + public void execute( + String query, + List input, + RequestSender requestSender, + Supplier hasRequestCompletedFunction, + ActionListener listener + ) { + var requestEntity = AmazonBedrockChatCompletionEntityFactory.createEntity(model, input); + var request = new AmazonBedrockChatCompletionRequest(model, requestEntity, timeout); + var responseHandler = new AmazonBedrockChatCompletionResponseHandler(); + + try { + requestSender.send(logger, request, HttpClientContext.create(), hasRequestCompletedFunction, responseHandler, listener); + } catch (Exception e) { + var errorMessage = Strings.format( + "Failed to send [completion] request from inference entity id [%s]", + request.getInferenceEntityId() + ); + logger.warn(errorMessage, e); + listener.onFailure(new ElasticsearchException(errorMessage, e)); + } + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/AmazonBedrockEmbeddingsRequestManager.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/AmazonBedrockEmbeddingsRequestManager.java new file mode 100644 index 0000000000000..e9bc6b574865c --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/AmazonBedrockEmbeddingsRequestManager.java @@ -0,0 +1,74 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.http.sender; + +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.common.Strings; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xpack.inference.common.Truncator; +import org.elasticsearch.xpack.inference.external.http.retry.RequestSender; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.embeddings.AmazonBedrockEmbeddingsEntityFactory; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.embeddings.AmazonBedrockEmbeddingsRequest; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.embeddings.AmazonBedrockEmbeddingsResponseHandler; +import org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings.AmazonBedrockEmbeddingsModel; + +import java.util.List; +import java.util.Objects; +import java.util.function.Supplier; + +import static org.elasticsearch.xpack.inference.common.Truncator.truncate; + +public class AmazonBedrockEmbeddingsRequestManager extends AmazonBedrockRequestManager { + private static final Logger logger = LogManager.getLogger(AmazonBedrockEmbeddingsRequestManager.class); + + private final AmazonBedrockEmbeddingsModel embeddingsModel; + private final Truncator truncator; + + public AmazonBedrockEmbeddingsRequestManager( + AmazonBedrockEmbeddingsModel model, + Truncator truncator, + ThreadPool threadPool, + @Nullable TimeValue timeout + ) { + super(model, threadPool, timeout); + this.embeddingsModel = model; + this.truncator = Objects.requireNonNull(truncator); + } + + @Override + public void execute( + String query, + List input, + RequestSender requestSender, + Supplier hasRequestCompletedFunction, + ActionListener listener + ) { + var serviceSettings = embeddingsModel.getServiceSettings(); + var truncatedInput = truncate(input, serviceSettings.maxInputTokens()); + var requestEntity = AmazonBedrockEmbeddingsEntityFactory.createEntity(embeddingsModel, truncatedInput); + var responseHandler = new AmazonBedrockEmbeddingsResponseHandler(); + var request = new AmazonBedrockEmbeddingsRequest(truncator, truncatedInput, embeddingsModel, requestEntity, timeout); + try { + requestSender.send(logger, request, HttpClientContext.create(), hasRequestCompletedFunction, responseHandler, listener); + } catch (Exception e) { + var errorMessage = Strings.format( + "Failed to send [text_embedding] request from inference entity id [%s]", + request.getInferenceEntityId() + ); + logger.warn(errorMessage, e); + listener.onFailure(new ElasticsearchException(errorMessage, e)); + } + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/AmazonBedrockRequestExecutorService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/AmazonBedrockRequestExecutorService.java new file mode 100644 index 0000000000000..8b4672d45c250 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/AmazonBedrockRequestExecutorService.java @@ -0,0 +1,42 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.http.sender; + +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xpack.inference.external.amazonbedrock.AmazonBedrockExecuteOnlyRequestSender; + +import java.io.IOException; +import java.util.concurrent.CountDownLatch; + +/** + * Allows this to have a public interface for Amazon Bedrock support + */ +public class AmazonBedrockRequestExecutorService extends RequestExecutorService { + + private final AmazonBedrockExecuteOnlyRequestSender requestSender; + + public AmazonBedrockRequestExecutorService( + ThreadPool threadPool, + CountDownLatch startupLatch, + RequestExecutorServiceSettings settings, + AmazonBedrockExecuteOnlyRequestSender requestSender + ) { + super(threadPool, startupLatch, settings, requestSender); + this.requestSender = requestSender; + } + + @Override + public void shutdown() { + super.shutdown(); + try { + requestSender.shutdown(); + } catch (IOException e) { + // swallow the exception + } + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/AmazonBedrockRequestManager.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/AmazonBedrockRequestManager.java new file mode 100644 index 0000000000000..f75343b038368 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/AmazonBedrockRequestManager.java @@ -0,0 +1,54 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.http.sender; + +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockModel; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; + +import java.util.Objects; + +public abstract class AmazonBedrockRequestManager implements RequestManager { + + protected final ThreadPool threadPool; + protected final TimeValue timeout; + private final AmazonBedrockModel baseModel; + + protected AmazonBedrockRequestManager(AmazonBedrockModel baseModel, ThreadPool threadPool, @Nullable TimeValue timeout) { + this.baseModel = Objects.requireNonNull(baseModel); + this.threadPool = Objects.requireNonNull(threadPool); + this.timeout = timeout; + } + + @Override + public String inferenceEntityId() { + return baseModel.getInferenceEntityId(); + } + + @Override + public RateLimitSettings rateLimitSettings() { + return baseModel.rateLimitSettings(); + } + + record RateLimitGrouping(int keyHash) { + public static AmazonBedrockRequestManager.RateLimitGrouping of(AmazonBedrockModel model) { + Objects.requireNonNull(model); + + var awsSecretSettings = model.getSecretSettings(); + + return new RateLimitGrouping(Objects.hash(awsSecretSettings.accessKey, awsSecretSettings.secretKey)); + } + } + + @Override + public Object rateLimitGrouping() { + return RateLimitGrouping.of(this.baseModel); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/AmazonBedrockJsonBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/AmazonBedrockJsonBuilder.java new file mode 100644 index 0000000000000..829e899beba5e --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/AmazonBedrockJsonBuilder.java @@ -0,0 +1,30 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.xcontent.ToXContent; + +import java.io.IOException; + +import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; + +public class AmazonBedrockJsonBuilder { + + private final ToXContent jsonWriter; + + public AmazonBedrockJsonBuilder(ToXContent jsonWriter) { + this.jsonWriter = jsonWriter; + } + + public String getStringContent() throws IOException { + try (var builder = jsonBuilder()) { + return Strings.toString(jsonWriter.toXContent(builder, null)); + } + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/AmazonBedrockJsonWriter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/AmazonBedrockJsonWriter.java new file mode 100644 index 0000000000000..83ebcb4563a8c --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/AmazonBedrockJsonWriter.java @@ -0,0 +1,20 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock; + +import com.fasterxml.jackson.core.JsonGenerator; + +import java.io.IOException; + +/** + * This is needed as the input for the Amazon Bedrock SDK does not like + * the formatting of XContent JSON output + */ +public interface AmazonBedrockJsonWriter { + JsonGenerator writeJson(JsonGenerator generator) throws IOException; +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/AmazonBedrockRequest.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/AmazonBedrockRequest.java new file mode 100644 index 0000000000000..e356212ed07fb --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/AmazonBedrockRequest.java @@ -0,0 +1,85 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock; + +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.xpack.inference.external.amazonbedrock.AmazonBedrockBaseClient; +import org.elasticsearch.xpack.inference.external.request.HttpRequest; +import org.elasticsearch.xpack.inference.external.request.Request; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockModel; + +import java.net.URI; + +public abstract class AmazonBedrockRequest implements Request { + + protected final AmazonBedrockModel amazonBedrockModel; + protected final String inferenceId; + protected final TimeValue timeout; + + protected AmazonBedrockRequest(AmazonBedrockModel model, @Nullable TimeValue timeout) { + this.amazonBedrockModel = model; + this.inferenceId = model.getInferenceEntityId(); + this.timeout = timeout; + } + + protected abstract void executeRequest(AmazonBedrockBaseClient client); + + public AmazonBedrockModel model() { + return amazonBedrockModel; + } + + /** + * Amazon Bedrock uses the AWS SDK, and will not create its own Http Request + * But, this is needed for the ExecutableInferenceRequest to get the inferenceEntityId + * @return NoOp request + */ + @Override + public final HttpRequest createHttpRequest() { + return new HttpRequest(new NoOpHttpRequest(), inferenceId); + } + + /** + * Amazon Bedrock uses the AWS SDK, and will not create its own URI + * @return null + */ + @Override + public final URI getURI() { + throw new UnsupportedOperationException(); + } + + /** + * Should be overridden for text embeddings requests + * @return null + */ + @Override + public Request truncate() { + return this; + } + + /** + * Should be overridden for text embeddings requests + * @return boolean[0] + */ + @Override + public boolean[] getTruncationInfo() { + return new boolean[0]; + } + + @Override + public String getInferenceEntityId() { + return amazonBedrockModel.getInferenceEntityId(); + } + + public TimeValue timeout() { + return timeout; + } + + public abstract TaskType taskType(); +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/NoOpHttpRequest.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/NoOpHttpRequest.java new file mode 100644 index 0000000000000..7087bb03bca5e --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/NoOpHttpRequest.java @@ -0,0 +1,20 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock; + +import org.apache.http.client.methods.HttpRequestBase; + +/** + * Needed for compatibility with RequestSender + */ +public class NoOpHttpRequest extends HttpRequestBase { + @Override + public String getMethod() { + return "NOOP"; + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockAI21LabsCompletionRequestEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockAI21LabsCompletionRequestEntity.java new file mode 100644 index 0000000000000..6e2f2f6702005 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockAI21LabsCompletionRequestEntity.java @@ -0,0 +1,63 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion; + +import com.amazonaws.services.bedrockruntime.model.ConverseRequest; +import com.amazonaws.services.bedrockruntime.model.InferenceConfiguration; + +import org.elasticsearch.core.Nullable; + +import java.util.List; +import java.util.Objects; + +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseUtils.getConverseMessageList; + +public record AmazonBedrockAI21LabsCompletionRequestEntity( + List messages, + @Nullable Double temperature, + @Nullable Double topP, + @Nullable Integer maxTokenCount +) implements AmazonBedrockConverseRequestEntity { + + public AmazonBedrockAI21LabsCompletionRequestEntity { + Objects.requireNonNull(messages); + } + + @Override + public ConverseRequest addMessages(ConverseRequest request) { + return request.withMessages(getConverseMessageList(messages)); + } + + @Override + public ConverseRequest addInferenceConfig(ConverseRequest request) { + if (temperature == null && topP == null && maxTokenCount == null) { + return request; + } + + InferenceConfiguration inferenceConfig = new InferenceConfiguration(); + + if (temperature != null) { + inferenceConfig = inferenceConfig.withTemperature(temperature.floatValue()); + } + + if (topP != null) { + inferenceConfig = inferenceConfig.withTopP(topP.floatValue()); + } + + if (maxTokenCount != null) { + inferenceConfig = inferenceConfig.withMaxTokens(maxTokenCount); + } + + return request.withInferenceConfig(inferenceConfig); + } + + @Override + public ConverseRequest addAdditionalModelFields(ConverseRequest request) { + return request; + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockAnthropicCompletionRequestEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockAnthropicCompletionRequestEntity.java new file mode 100644 index 0000000000000..a8b0032af09c5 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockAnthropicCompletionRequestEntity.java @@ -0,0 +1,70 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion; + +import com.amazonaws.services.bedrockruntime.model.ConverseRequest; +import com.amazonaws.services.bedrockruntime.model.InferenceConfiguration; + +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.Strings; + +import java.util.List; +import java.util.Objects; + +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseUtils.getConverseMessageList; + +public record AmazonBedrockAnthropicCompletionRequestEntity( + List messages, + @Nullable Double temperature, + @Nullable Double topP, + @Nullable Double topK, + @Nullable Integer maxTokenCount +) implements AmazonBedrockConverseRequestEntity { + + public AmazonBedrockAnthropicCompletionRequestEntity { + Objects.requireNonNull(messages); + } + + @Override + public ConverseRequest addMessages(ConverseRequest request) { + return request.withMessages(getConverseMessageList(messages)); + } + + @Override + public ConverseRequest addInferenceConfig(ConverseRequest request) { + if (temperature == null && topP == null && maxTokenCount == null) { + return request; + } + + InferenceConfiguration inferenceConfig = new InferenceConfiguration(); + + if (temperature != null) { + inferenceConfig = inferenceConfig.withTemperature(temperature.floatValue()); + } + + if (topP != null) { + inferenceConfig = inferenceConfig.withTopP(topP.floatValue()); + } + + if (maxTokenCount != null) { + inferenceConfig = inferenceConfig.withMaxTokens(maxTokenCount); + } + + return request.withInferenceConfig(inferenceConfig); + } + + @Override + public ConverseRequest addAdditionalModelFields(ConverseRequest request) { + if (topK == null) { + return request; + } + + String topKField = Strings.format("{\"top_k\":%f}", topK.floatValue()); + return request.withAdditionalModelResponseFieldPaths(topKField); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockChatCompletionEntityFactory.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockChatCompletionEntityFactory.java new file mode 100644 index 0000000000000..f86d2229d42ad --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockChatCompletionEntityFactory.java @@ -0,0 +1,78 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion; + +import org.elasticsearch.xpack.inference.services.amazonbedrock.completion.AmazonBedrockChatCompletionModel; + +import java.util.List; +import java.util.Objects; + +public final class AmazonBedrockChatCompletionEntityFactory { + public static AmazonBedrockConverseRequestEntity createEntity(AmazonBedrockChatCompletionModel model, List messages) { + Objects.requireNonNull(model); + Objects.requireNonNull(messages); + var serviceSettings = model.getServiceSettings(); + var taskSettings = model.getTaskSettings(); + switch (serviceSettings.provider()) { + case AI21LABS -> { + return new AmazonBedrockAI21LabsCompletionRequestEntity( + messages, + taskSettings.temperature(), + taskSettings.topP(), + taskSettings.maxNewTokens() + ); + } + case AMAZONTITAN -> { + return new AmazonBedrockTitanCompletionRequestEntity( + messages, + taskSettings.temperature(), + taskSettings.topP(), + taskSettings.maxNewTokens() + ); + } + case ANTHROPIC -> { + return new AmazonBedrockAnthropicCompletionRequestEntity( + messages, + taskSettings.temperature(), + taskSettings.topP(), + taskSettings.topK(), + taskSettings.maxNewTokens() + ); + } + case COHERE -> { + return new AmazonBedrockCohereCompletionRequestEntity( + messages, + taskSettings.temperature(), + taskSettings.topP(), + taskSettings.topK(), + taskSettings.maxNewTokens() + ); + } + case META -> { + return new AmazonBedrockMetaCompletionRequestEntity( + messages, + taskSettings.temperature(), + taskSettings.topP(), + taskSettings.maxNewTokens() + ); + } + case MISTRAL -> { + return new AmazonBedrockMistralCompletionRequestEntity( + messages, + taskSettings.temperature(), + taskSettings.topP(), + taskSettings.topK(), + taskSettings.maxNewTokens() + ); + } + default -> { + return null; + } + } + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockChatCompletionRequest.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockChatCompletionRequest.java new file mode 100644 index 0000000000000..f02f05f2d3b17 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockChatCompletionRequest.java @@ -0,0 +1,69 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion; + +import com.amazonaws.services.bedrockruntime.model.ConverseRequest; + +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.xpack.core.common.socket.SocketAccess; +import org.elasticsearch.xpack.inference.external.amazonbedrock.AmazonBedrockBaseClient; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.AmazonBedrockRequest; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.completion.AmazonBedrockChatCompletionResponseListener; +import org.elasticsearch.xpack.inference.services.amazonbedrock.completion.AmazonBedrockChatCompletionModel; + +import java.io.IOException; +import java.util.Objects; + +public class AmazonBedrockChatCompletionRequest extends AmazonBedrockRequest { + public static final String USER_ROLE = "user"; + private final AmazonBedrockConverseRequestEntity requestEntity; + private AmazonBedrockChatCompletionResponseListener listener; + + public AmazonBedrockChatCompletionRequest( + AmazonBedrockChatCompletionModel model, + AmazonBedrockConverseRequestEntity requestEntity, + @Nullable TimeValue timeout + ) { + super(model, timeout); + this.requestEntity = Objects.requireNonNull(requestEntity); + } + + @Override + protected void executeRequest(AmazonBedrockBaseClient client) { + var converseRequest = getConverseRequest(); + + try { + SocketAccess.doPrivileged(() -> client.converse(converseRequest, listener)); + } catch (IOException e) { + listener.onFailure(new RuntimeException(e)); + } + } + + @Override + public TaskType taskType() { + return TaskType.COMPLETION; + } + + private ConverseRequest getConverseRequest() { + var converseRequest = new ConverseRequest().withModelId(amazonBedrockModel.model()); + converseRequest = requestEntity.addMessages(converseRequest); + converseRequest = requestEntity.addInferenceConfig(converseRequest); + converseRequest = requestEntity.addAdditionalModelFields(converseRequest); + return converseRequest; + } + + public void executeChatCompletionRequest( + AmazonBedrockBaseClient awsBedrockClient, + AmazonBedrockChatCompletionResponseListener chatCompletionResponseListener + ) { + this.listener = chatCompletionResponseListener; + this.executeRequest(awsBedrockClient); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockCohereCompletionRequestEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockCohereCompletionRequestEntity.java new file mode 100644 index 0000000000000..17a264ef820ff --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockCohereCompletionRequestEntity.java @@ -0,0 +1,70 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion; + +import com.amazonaws.services.bedrockruntime.model.ConverseRequest; +import com.amazonaws.services.bedrockruntime.model.InferenceConfiguration; + +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.Strings; + +import java.util.List; +import java.util.Objects; + +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseUtils.getConverseMessageList; + +public record AmazonBedrockCohereCompletionRequestEntity( + List messages, + @Nullable Double temperature, + @Nullable Double topP, + @Nullable Double topK, + @Nullable Integer maxTokenCount +) implements AmazonBedrockConverseRequestEntity { + + public AmazonBedrockCohereCompletionRequestEntity { + Objects.requireNonNull(messages); + } + + @Override + public ConverseRequest addMessages(ConverseRequest request) { + return request.withMessages(getConverseMessageList(messages)); + } + + @Override + public ConverseRequest addInferenceConfig(ConverseRequest request) { + if (temperature == null && topP == null && maxTokenCount == null) { + return request; + } + + InferenceConfiguration inferenceConfig = new InferenceConfiguration(); + + if (temperature != null) { + inferenceConfig = inferenceConfig.withTemperature(temperature.floatValue()); + } + + if (topP != null) { + inferenceConfig = inferenceConfig.withTopP(topP.floatValue()); + } + + if (maxTokenCount != null) { + inferenceConfig = inferenceConfig.withMaxTokens(maxTokenCount); + } + + return request.withInferenceConfig(inferenceConfig); + } + + @Override + public ConverseRequest addAdditionalModelFields(ConverseRequest request) { + if (topK == null) { + return request; + } + + String topKField = Strings.format("{\"top_k\":%f}", topK.floatValue()); + return request.withAdditionalModelResponseFieldPaths(topKField); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockConverseRequestEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockConverseRequestEntity.java new file mode 100644 index 0000000000000..fbd55e76e509b --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockConverseRequestEntity.java @@ -0,0 +1,18 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion; + +import com.amazonaws.services.bedrockruntime.model.ConverseRequest; + +public interface AmazonBedrockConverseRequestEntity { + ConverseRequest addMessages(ConverseRequest request); + + ConverseRequest addInferenceConfig(ConverseRequest request); + + ConverseRequest addAdditionalModelFields(ConverseRequest request); +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockConverseUtils.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockConverseUtils.java new file mode 100644 index 0000000000000..2cfb56a94b319 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockConverseUtils.java @@ -0,0 +1,29 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion; + +import com.amazonaws.services.bedrockruntime.model.ContentBlock; +import com.amazonaws.services.bedrockruntime.model.Message; + +import java.util.ArrayList; +import java.util.List; + +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockChatCompletionRequest.USER_ROLE; + +public final class AmazonBedrockConverseUtils { + + public static List getConverseMessageList(List messages) { + List messageList = new ArrayList<>(); + for (String message : messages) { + var messageContent = new ContentBlock().withText(message); + var returnMessage = (new Message()).withRole(USER_ROLE).withContent(messageContent); + messageList.add(returnMessage); + } + return messageList; + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockMetaCompletionRequestEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockMetaCompletionRequestEntity.java new file mode 100644 index 0000000000000..cdabdd4cbebff --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockMetaCompletionRequestEntity.java @@ -0,0 +1,63 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion; + +import com.amazonaws.services.bedrockruntime.model.ConverseRequest; +import com.amazonaws.services.bedrockruntime.model.InferenceConfiguration; + +import org.elasticsearch.core.Nullable; + +import java.util.List; +import java.util.Objects; + +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseUtils.getConverseMessageList; + +public record AmazonBedrockMetaCompletionRequestEntity( + List messages, + @Nullable Double temperature, + @Nullable Double topP, + @Nullable Integer maxTokenCount +) implements AmazonBedrockConverseRequestEntity { + + public AmazonBedrockMetaCompletionRequestEntity { + Objects.requireNonNull(messages); + } + + @Override + public ConverseRequest addMessages(ConverseRequest request) { + return request.withMessages(getConverseMessageList(messages)); + } + + @Override + public ConverseRequest addInferenceConfig(ConverseRequest request) { + if (temperature == null && topP == null && maxTokenCount == null) { + return request; + } + + InferenceConfiguration inferenceConfig = new InferenceConfiguration(); + + if (temperature != null) { + inferenceConfig = inferenceConfig.withTemperature(temperature.floatValue()); + } + + if (topP != null) { + inferenceConfig = inferenceConfig.withTopP(topP.floatValue()); + } + + if (maxTokenCount != null) { + inferenceConfig = inferenceConfig.withMaxTokens(maxTokenCount); + } + + return request.withInferenceConfig(inferenceConfig); + } + + @Override + public ConverseRequest addAdditionalModelFields(ConverseRequest request) { + return request; + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockMistralCompletionRequestEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockMistralCompletionRequestEntity.java new file mode 100644 index 0000000000000..c68eaa1b81f54 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockMistralCompletionRequestEntity.java @@ -0,0 +1,70 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion; + +import com.amazonaws.services.bedrockruntime.model.ConverseRequest; +import com.amazonaws.services.bedrockruntime.model.InferenceConfiguration; + +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.Strings; + +import java.util.List; +import java.util.Objects; + +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseUtils.getConverseMessageList; + +public record AmazonBedrockMistralCompletionRequestEntity( + List messages, + @Nullable Double temperature, + @Nullable Double topP, + @Nullable Double topK, + @Nullable Integer maxTokenCount +) implements AmazonBedrockConverseRequestEntity { + + public AmazonBedrockMistralCompletionRequestEntity { + Objects.requireNonNull(messages); + } + + @Override + public ConverseRequest addMessages(ConverseRequest request) { + return request.withMessages(getConverseMessageList(messages)); + } + + @Override + public ConverseRequest addInferenceConfig(ConverseRequest request) { + if (temperature == null && topP == null && maxTokenCount == null) { + return request; + } + + InferenceConfiguration inferenceConfig = new InferenceConfiguration(); + + if (temperature != null) { + inferenceConfig = inferenceConfig.withTemperature(temperature.floatValue()); + } + + if (topP != null) { + inferenceConfig = inferenceConfig.withTopP(topP.floatValue()); + } + + if (maxTokenCount != null) { + inferenceConfig = inferenceConfig.withMaxTokens(maxTokenCount); + } + + return request.withInferenceConfig(inferenceConfig); + } + + @Override + public ConverseRequest addAdditionalModelFields(ConverseRequest request) { + if (topK == null) { + return request; + } + + String topKField = Strings.format("{\"top_k\":%f}", topK.floatValue()); + return request.withAdditionalModelResponseFieldPaths(topKField); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockTitanCompletionRequestEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockTitanCompletionRequestEntity.java new file mode 100644 index 0000000000000..d56035b80e9ef --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockTitanCompletionRequestEntity.java @@ -0,0 +1,63 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion; + +import com.amazonaws.services.bedrockruntime.model.ConverseRequest; +import com.amazonaws.services.bedrockruntime.model.InferenceConfiguration; + +import org.elasticsearch.core.Nullable; + +import java.util.List; +import java.util.Objects; + +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseUtils.getConverseMessageList; + +public record AmazonBedrockTitanCompletionRequestEntity( + List messages, + @Nullable Double temperature, + @Nullable Double topP, + @Nullable Integer maxTokenCount +) implements AmazonBedrockConverseRequestEntity { + + public AmazonBedrockTitanCompletionRequestEntity { + Objects.requireNonNull(messages); + } + + @Override + public ConverseRequest addMessages(ConverseRequest request) { + return request.withMessages(getConverseMessageList(messages)); + } + + @Override + public ConverseRequest addInferenceConfig(ConverseRequest request) { + if (temperature == null && topP == null && maxTokenCount == null) { + return request; + } + + InferenceConfiguration inferenceConfig = new InferenceConfiguration(); + + if (temperature != null) { + inferenceConfig = inferenceConfig.withTemperature(temperature.floatValue()); + } + + if (topP != null) { + inferenceConfig = inferenceConfig.withTopP(topP.floatValue()); + } + + if (maxTokenCount != null) { + inferenceConfig = inferenceConfig.withMaxTokens(maxTokenCount); + } + + return request.withInferenceConfig(inferenceConfig); + } + + @Override + public ConverseRequest addAdditionalModelFields(ConverseRequest request) { + return request; + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/embeddings/AmazonBedrockCohereEmbeddingsRequestEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/embeddings/AmazonBedrockCohereEmbeddingsRequestEntity.java new file mode 100644 index 0000000000000..edca5bc1bdf9c --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/embeddings/AmazonBedrockCohereEmbeddingsRequestEntity.java @@ -0,0 +1,35 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.embeddings; + +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +public record AmazonBedrockCohereEmbeddingsRequestEntity(List input) implements ToXContentObject { + + private static final String TEXTS_FIELD = "texts"; + private static final String INPUT_TYPE_FIELD = "input_type"; + private static final String INPUT_TYPE_SEARCH_DOCUMENT = "search_document"; + + public AmazonBedrockCohereEmbeddingsRequestEntity { + Objects.requireNonNull(input); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(TEXTS_FIELD, input); + builder.field(INPUT_TYPE_FIELD, INPUT_TYPE_SEARCH_DOCUMENT); + builder.endObject(); + return builder; + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/embeddings/AmazonBedrockEmbeddingsEntityFactory.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/embeddings/AmazonBedrockEmbeddingsEntityFactory.java new file mode 100644 index 0000000000000..a31b033507264 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/embeddings/AmazonBedrockEmbeddingsEntityFactory.java @@ -0,0 +1,45 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.embeddings; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.xcontent.ToXContent; +import org.elasticsearch.xpack.inference.common.Truncator; +import org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings.AmazonBedrockEmbeddingsModel; + +import java.util.Objects; + +public final class AmazonBedrockEmbeddingsEntityFactory { + public static ToXContent createEntity(AmazonBedrockEmbeddingsModel model, Truncator.TruncationResult truncationResult) { + Objects.requireNonNull(model); + Objects.requireNonNull(truncationResult); + + var serviceSettings = model.getServiceSettings(); + + var truncatedInput = truncationResult.input(); + if (truncatedInput == null || truncatedInput.isEmpty()) { + throw new ElasticsearchException("[input] cannot be null or empty"); + } + + switch (serviceSettings.provider()) { + case AMAZONTITAN -> { + if (truncatedInput.size() > 1) { + throw new ElasticsearchException("[input] cannot contain more than one string"); + } + return new AmazonBedrockTitanEmbeddingsRequestEntity(truncatedInput.get(0)); + } + case COHERE -> { + return new AmazonBedrockCohereEmbeddingsRequestEntity(truncatedInput); + } + default -> { + return null; + } + } + + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/embeddings/AmazonBedrockEmbeddingsRequest.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/embeddings/AmazonBedrockEmbeddingsRequest.java new file mode 100644 index 0000000000000..96d3b3a3cc057 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/embeddings/AmazonBedrockEmbeddingsRequest.java @@ -0,0 +1,99 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.embeddings; + +import com.amazonaws.services.bedrockruntime.model.InvokeModelRequest; +import com.amazonaws.services.bedrockruntime.model.InvokeModelResult; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.xcontent.ToXContent; +import org.elasticsearch.xpack.core.common.socket.SocketAccess; +import org.elasticsearch.xpack.inference.common.Truncator; +import org.elasticsearch.xpack.inference.external.amazonbedrock.AmazonBedrockBaseClient; +import org.elasticsearch.xpack.inference.external.request.Request; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.AmazonBedrockJsonBuilder; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.AmazonBedrockRequest; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.embeddings.AmazonBedrockEmbeddingsResponseListener; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockProvider; +import org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings.AmazonBedrockEmbeddingsModel; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Objects; + +public class AmazonBedrockEmbeddingsRequest extends AmazonBedrockRequest { + private final AmazonBedrockEmbeddingsModel embeddingsModel; + private final ToXContent requestEntity; + private final Truncator truncator; + private final Truncator.TruncationResult truncationResult; + private final AmazonBedrockProvider provider; + private ActionListener listener = null; + + public AmazonBedrockEmbeddingsRequest( + Truncator truncator, + Truncator.TruncationResult input, + AmazonBedrockEmbeddingsModel model, + ToXContent requestEntity, + @Nullable TimeValue timeout + ) { + super(model, timeout); + this.truncator = Objects.requireNonNull(truncator); + this.truncationResult = Objects.requireNonNull(input); + this.requestEntity = Objects.requireNonNull(requestEntity); + this.embeddingsModel = model; + this.provider = model.provider(); + } + + public AmazonBedrockProvider provider() { + return provider; + } + + @Override + protected void executeRequest(AmazonBedrockBaseClient client) { + try { + var jsonBuilder = new AmazonBedrockJsonBuilder(requestEntity); + var bodyAsString = jsonBuilder.getStringContent(); + + var charset = StandardCharsets.UTF_8; + var bodyBuffer = charset.encode(bodyAsString); + + var invokeModelRequest = new InvokeModelRequest().withModelId(embeddingsModel.model()).withBody(bodyBuffer); + + SocketAccess.doPrivileged(() -> client.invokeModel(invokeModelRequest, listener)); + } catch (IOException e) { + listener.onFailure(new RuntimeException(e)); + } + } + + @Override + public Request truncate() { + var truncatedInput = truncator.truncate(truncationResult.input()); + return new AmazonBedrockEmbeddingsRequest(truncator, truncatedInput, embeddingsModel, requestEntity, timeout); + } + + @Override + public boolean[] getTruncationInfo() { + return truncationResult.truncated().clone(); + } + + @Override + public TaskType taskType() { + return TaskType.TEXT_EMBEDDING; + } + + public void executeEmbeddingsRequest( + AmazonBedrockBaseClient awsBedrockClient, + AmazonBedrockEmbeddingsResponseListener embeddingsResponseListener + ) { + this.listener = embeddingsResponseListener; + this.executeRequest(awsBedrockClient); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/embeddings/AmazonBedrockTitanEmbeddingsRequestEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/embeddings/AmazonBedrockTitanEmbeddingsRequestEntity.java new file mode 100644 index 0000000000000..f55edd0442913 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/embeddings/AmazonBedrockTitanEmbeddingsRequestEntity.java @@ -0,0 +1,31 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.embeddings; + +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Objects; + +public record AmazonBedrockTitanEmbeddingsRequestEntity(String inputText) implements ToXContentObject { + + private static final String INPUT_TEXT_FIELD = "inputText"; + + public AmazonBedrockTitanEmbeddingsRequestEntity { + Objects.requireNonNull(inputText); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(INPUT_TEXT_FIELD, inputText); + builder.endObject(); + return builder; + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/AmazonBedrockResponse.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/AmazonBedrockResponse.java new file mode 100644 index 0000000000000..54b05137acda3 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/AmazonBedrockResponse.java @@ -0,0 +1,15 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.response.amazonbedrock; + +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.AmazonBedrockRequest; + +public abstract class AmazonBedrockResponse { + public abstract InferenceServiceResults accept(AmazonBedrockRequest request); +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/AmazonBedrockResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/AmazonBedrockResponseHandler.java new file mode 100644 index 0000000000000..9dc15ea667c1d --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/AmazonBedrockResponseHandler.java @@ -0,0 +1,23 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.response.amazonbedrock; + +import org.apache.logging.log4j.Logger; +import org.elasticsearch.xpack.inference.external.http.HttpResult; +import org.elasticsearch.xpack.inference.external.http.retry.ResponseHandler; +import org.elasticsearch.xpack.inference.external.http.retry.RetryException; +import org.elasticsearch.xpack.inference.external.request.Request; +import org.elasticsearch.xpack.inference.logging.ThrottlerManager; + +public abstract class AmazonBedrockResponseHandler implements ResponseHandler { + @Override + public final void validateResponse(ThrottlerManager throttlerManager, Logger logger, Request request, HttpResult result) + throws RetryException { + // do nothing as the AWS SDK will take care of validation for us + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/AmazonBedrockResponseListener.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/AmazonBedrockResponseListener.java new file mode 100644 index 0000000000000..ce4d6d1dea655 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/AmazonBedrockResponseListener.java @@ -0,0 +1,30 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.response.amazonbedrock; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.AmazonBedrockRequest; + +import java.util.Objects; + +public class AmazonBedrockResponseListener { + protected final AmazonBedrockRequest request; + protected final ActionListener inferenceResultsListener; + protected final AmazonBedrockResponseHandler responseHandler; + + public AmazonBedrockResponseListener( + AmazonBedrockRequest request, + AmazonBedrockResponseHandler responseHandler, + ActionListener inferenceResultsListener + ) { + this.request = Objects.requireNonNull(request); + this.responseHandler = Objects.requireNonNull(responseHandler); + this.inferenceResultsListener = Objects.requireNonNull(inferenceResultsListener); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/completion/AmazonBedrockChatCompletionResponse.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/completion/AmazonBedrockChatCompletionResponse.java new file mode 100644 index 0000000000000..5b3872e2c416a --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/completion/AmazonBedrockChatCompletionResponse.java @@ -0,0 +1,49 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.response.amazonbedrock.completion; + +import com.amazonaws.services.bedrockruntime.model.ConverseResult; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xpack.core.inference.results.ChatCompletionResults; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.AmazonBedrockRequest; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockChatCompletionRequest; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.AmazonBedrockResponse; + +import java.util.ArrayList; + +public class AmazonBedrockChatCompletionResponse extends AmazonBedrockResponse { + + private final ConverseResult result; + + public AmazonBedrockChatCompletionResponse(ConverseResult responseResult) { + this.result = responseResult; + } + + @Override + public InferenceServiceResults accept(AmazonBedrockRequest request) { + if (request instanceof AmazonBedrockChatCompletionRequest asChatCompletionRequest) { + return fromResponse(result); + } + + throw new ElasticsearchException("unexpected request type [" + request.getClass() + "]"); + } + + public static ChatCompletionResults fromResponse(ConverseResult response) { + var responseMessage = response.getOutput().getMessage(); + + var messageContents = responseMessage.getContent(); + var resultTexts = new ArrayList(); + for (var messageContent : messageContents) { + resultTexts.add(new ChatCompletionResults.Result(messageContent.getText())); + } + + return new ChatCompletionResults(resultTexts); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/completion/AmazonBedrockChatCompletionResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/completion/AmazonBedrockChatCompletionResponseHandler.java new file mode 100644 index 0000000000000..a24f54c50eef3 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/completion/AmazonBedrockChatCompletionResponseHandler.java @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.response.amazonbedrock.completion; + +import com.amazonaws.services.bedrockruntime.model.ConverseResult; + +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xpack.inference.external.http.HttpResult; +import org.elasticsearch.xpack.inference.external.http.retry.RetryException; +import org.elasticsearch.xpack.inference.external.request.Request; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.AmazonBedrockRequest; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.AmazonBedrockResponseHandler; + +public class AmazonBedrockChatCompletionResponseHandler extends AmazonBedrockResponseHandler { + + private ConverseResult responseResult; + + public AmazonBedrockChatCompletionResponseHandler() {} + + @Override + public InferenceServiceResults parseResult(Request request, HttpResult result) throws RetryException { + var response = new AmazonBedrockChatCompletionResponse(responseResult); + return response.accept((AmazonBedrockRequest) request); + } + + @Override + public String getRequestType() { + return "Amazon Bedrock Chat Completion"; + } + + public void acceptChatCompletionResponseObject(ConverseResult response) { + this.responseResult = response; + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/completion/AmazonBedrockChatCompletionResponseListener.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/completion/AmazonBedrockChatCompletionResponseListener.java new file mode 100644 index 0000000000000..be03ba84571eb --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/completion/AmazonBedrockChatCompletionResponseListener.java @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.response.amazonbedrock.completion; + +import com.amazonaws.services.bedrockruntime.model.ConverseResult; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockChatCompletionRequest; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.AmazonBedrockResponseHandler; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.AmazonBedrockResponseListener; + +public class AmazonBedrockChatCompletionResponseListener extends AmazonBedrockResponseListener implements ActionListener { + + public AmazonBedrockChatCompletionResponseListener( + AmazonBedrockChatCompletionRequest request, + AmazonBedrockResponseHandler responseHandler, + ActionListener inferenceResultsListener + ) { + super(request, responseHandler, inferenceResultsListener); + } + + @Override + public void onResponse(ConverseResult result) { + ((AmazonBedrockChatCompletionResponseHandler) responseHandler).acceptChatCompletionResponseObject(result); + inferenceResultsListener.onResponse(responseHandler.parseResult(request, null)); + } + + @Override + public void onFailure(Exception e) { + throw new ElasticsearchException(e); + } + +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/embeddings/AmazonBedrockEmbeddingsResponse.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/embeddings/AmazonBedrockEmbeddingsResponse.java new file mode 100644 index 0000000000000..83fa790acbe68 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/embeddings/AmazonBedrockEmbeddingsResponse.java @@ -0,0 +1,132 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.response.amazonbedrock.embeddings; + +import com.amazonaws.services.bedrockruntime.model.InvokeModelResult; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentParserConfiguration; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.core.inference.results.InferenceTextEmbeddingFloatResults; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.AmazonBedrockRequest; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.embeddings.AmazonBedrockEmbeddingsRequest; +import org.elasticsearch.xpack.inference.external.response.XContentUtils; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.AmazonBedrockResponse; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockProvider; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.List; + +import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; +import static org.elasticsearch.common.xcontent.XContentParserUtils.parseList; +import static org.elasticsearch.xpack.inference.external.response.XContentUtils.positionParserAtTokenAfterField; + +public class AmazonBedrockEmbeddingsResponse extends AmazonBedrockResponse { + private static final String FAILED_TO_FIND_FIELD_TEMPLATE = "Failed to find required field [%s] in Amazon Bedrock embeddings response"; + private final InvokeModelResult result; + + public AmazonBedrockEmbeddingsResponse(InvokeModelResult invokeModelResult) { + this.result = invokeModelResult; + } + + @Override + public InferenceServiceResults accept(AmazonBedrockRequest request) { + if (request instanceof AmazonBedrockEmbeddingsRequest asEmbeddingsRequest) { + return fromResponse(result, asEmbeddingsRequest.provider()); + } + + throw new ElasticsearchException("unexpected request type [" + request.getClass() + "]"); + } + + public static InferenceTextEmbeddingFloatResults fromResponse(InvokeModelResult response, AmazonBedrockProvider provider) { + var charset = StandardCharsets.UTF_8; + var bodyText = String.valueOf(charset.decode(response.getBody())); + + var parserConfig = XContentParserConfiguration.EMPTY.withDeprecationHandler(LoggingDeprecationHandler.INSTANCE); + + try (XContentParser jsonParser = XContentFactory.xContent(XContentType.JSON).createParser(parserConfig, bodyText)) { + // move to the first token + jsonParser.nextToken(); + + XContentParser.Token token = jsonParser.currentToken(); + ensureExpectedToken(XContentParser.Token.START_OBJECT, token, jsonParser); + + var embeddingList = parseEmbeddings(jsonParser, provider); + + return new InferenceTextEmbeddingFloatResults(embeddingList); + } catch (IOException e) { + throw new ElasticsearchException(e); + } + } + + private static List parseEmbeddings( + XContentParser jsonParser, + AmazonBedrockProvider provider + ) throws IOException { + switch (provider) { + case AMAZONTITAN -> { + return parseTitanEmbeddings(jsonParser); + } + case COHERE -> { + return parseCohereEmbeddings(jsonParser); + } + default -> throw new IOException("Unsupported provider [" + provider + "]"); + } + } + + private static List parseTitanEmbeddings(XContentParser parser) + throws IOException { + /* + Titan response: + { + "embedding": [float, float, ...], + "inputTextTokenCount": int + } + */ + positionParserAtTokenAfterField(parser, "embedding", FAILED_TO_FIND_FIELD_TEMPLATE); + List embeddingValuesList = parseList(parser, XContentUtils::parseFloat); + var embeddingValues = InferenceTextEmbeddingFloatResults.InferenceFloatEmbedding.of(embeddingValuesList); + return List.of(embeddingValues); + } + + private static List parseCohereEmbeddings(XContentParser parser) + throws IOException { + /* + Cohere response: + { + "embeddings": [ + [< array of 1024 floats >], + ... + ], + "id": string, + "response_type" : "embeddings_floats", + "texts": [string] + } + */ + positionParserAtTokenAfterField(parser, "embeddings", FAILED_TO_FIND_FIELD_TEMPLATE); + + List embeddingList = parseList( + parser, + AmazonBedrockEmbeddingsResponse::parseCohereEmbeddingsListItem + ); + + return embeddingList; + } + + private static InferenceTextEmbeddingFloatResults.InferenceFloatEmbedding parseCohereEmbeddingsListItem(XContentParser parser) + throws IOException { + List embeddingValuesList = parseList(parser, XContentUtils::parseFloat); + return InferenceTextEmbeddingFloatResults.InferenceFloatEmbedding.of(embeddingValuesList); + } + +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/embeddings/AmazonBedrockEmbeddingsResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/embeddings/AmazonBedrockEmbeddingsResponseHandler.java new file mode 100644 index 0000000000000..a3fb68ee23486 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/embeddings/AmazonBedrockEmbeddingsResponseHandler.java @@ -0,0 +1,37 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.response.amazonbedrock.embeddings; + +import com.amazonaws.services.bedrockruntime.model.InvokeModelResult; + +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xpack.inference.external.http.HttpResult; +import org.elasticsearch.xpack.inference.external.http.retry.RetryException; +import org.elasticsearch.xpack.inference.external.request.Request; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.AmazonBedrockRequest; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.AmazonBedrockResponseHandler; + +public class AmazonBedrockEmbeddingsResponseHandler extends AmazonBedrockResponseHandler { + + private InvokeModelResult invokeModelResult; + + @Override + public InferenceServiceResults parseResult(Request request, HttpResult result) throws RetryException { + var responseParser = new AmazonBedrockEmbeddingsResponse(invokeModelResult); + return responseParser.accept((AmazonBedrockRequest) request); + } + + @Override + public String getRequestType() { + return "Amazon Bedrock Embeddings"; + } + + public void acceptEmbeddingsResult(InvokeModelResult result) { + this.invokeModelResult = result; + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/embeddings/AmazonBedrockEmbeddingsResponseListener.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/embeddings/AmazonBedrockEmbeddingsResponseListener.java new file mode 100644 index 0000000000000..36519ae31ff60 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/response/amazonbedrock/embeddings/AmazonBedrockEmbeddingsResponseListener.java @@ -0,0 +1,38 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.response.amazonbedrock.embeddings; + +import com.amazonaws.services.bedrockruntime.model.InvokeModelResult; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.embeddings.AmazonBedrockEmbeddingsRequest; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.AmazonBedrockResponseHandler; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.AmazonBedrockResponseListener; + +public class AmazonBedrockEmbeddingsResponseListener extends AmazonBedrockResponseListener implements ActionListener { + + public AmazonBedrockEmbeddingsResponseListener( + AmazonBedrockEmbeddingsRequest request, + AmazonBedrockResponseHandler responseHandler, + ActionListener inferenceResultsListener + ) { + super(request, responseHandler, inferenceResultsListener); + } + + @Override + public void onResponse(InvokeModelResult result) { + ((AmazonBedrockEmbeddingsResponseHandler) responseHandler).acceptEmbeddingsResult(result); + inferenceResultsListener.onResponse(responseHandler.parseResult(request, null)); + } + + @Override + public void onFailure(Exception e) { + inferenceResultsListener.onFailure(e); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java index a22126439e9e2..42413c35fcbff 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContext.java @@ -14,8 +14,11 @@ import org.elasticsearch.inference.TaskType; import org.elasticsearch.search.rank.context.RankFeaturePhaseRankCoordinatorContext; import org.elasticsearch.search.rank.feature.RankFeatureDoc; +import org.elasticsearch.xpack.core.inference.action.GetInferenceModelAction; import org.elasticsearch.xpack.core.inference.action.InferenceAction; import org.elasticsearch.xpack.core.inference.results.RankedDocsResults; +import org.elasticsearch.xpack.inference.services.cohere.rerank.CohereRerankTaskSettings; +import org.elasticsearch.xpack.inference.services.googlevertexai.rerank.GoogleVertexAiRerankTaskSettings; import java.util.Arrays; import java.util.Comparator; @@ -53,24 +56,77 @@ public TextSimilarityRankFeaturePhaseRankCoordinatorContext( protected void computeScores(RankFeatureDoc[] featureDocs, ActionListener scoreListener) { // Wrap the provided rankListener to an ActionListener that would handle the response from the inference service // and then pass the results - final ActionListener actionListener = scoreListener.delegateFailureAndWrap((l, r) -> { - float[] scores = extractScoresFromResponse(r); - if (scores.length != featureDocs.length) { + final ActionListener inferenceListener = scoreListener.delegateFailureAndWrap((l, r) -> { + InferenceServiceResults results = r.getResults(); + assert results instanceof RankedDocsResults; + + // Ensure we get exactly as many scores as the number of docs we passed, otherwise we may return incorrect results + List rankedDocs = ((RankedDocsResults) results).getRankedDocs(); + if (rankedDocs.size() != featureDocs.length) { l.onFailure( - new IllegalStateException("Document and score count mismatch: [" + featureDocs.length + "] vs [" + scores.length + "]") + new IllegalStateException( + "Reranker input document count and returned score count mismatch: [" + + featureDocs.length + + "] vs [" + + rankedDocs.size() + + "]" + ) ); } else { + float[] scores = extractScoresFromRankedDocs(rankedDocs); l.onResponse(scores); } }); - List featureData = Arrays.stream(featureDocs).map(x -> x.featureData).toList(); - InferenceAction.Request request = generateRequest(featureData); - try { - client.execute(InferenceAction.INSTANCE, request, actionListener); - } finally { - request.decRef(); - } + // top N listener + ActionListener topNListener = scoreListener.delegateFailureAndWrap((l, r) -> { + // The rerank inference endpoint may have an override to return top N documents only, in that case let's fail fast to avoid + // assigning scores to the wrong input + Integer configuredTopN = null; + if (r.getEndpoints().isEmpty() == false + && r.getEndpoints().get(0).getTaskSettings() instanceof CohereRerankTaskSettings cohereTaskSettings) { + configuredTopN = cohereTaskSettings.getTopNDocumentsOnly(); + } else if (r.getEndpoints().isEmpty() == false + && r.getEndpoints().get(0).getTaskSettings() instanceof GoogleVertexAiRerankTaskSettings googleVertexAiTaskSettings) { + configuredTopN = googleVertexAiTaskSettings.topN(); + } + if (configuredTopN != null && configuredTopN < rankWindowSize) { + l.onFailure( + new IllegalArgumentException( + "Inference endpoint [" + + inferenceId + + "] is configured to return the top [" + + configuredTopN + + "] results, but rank_window_size is [" + + rankWindowSize + + "]. Reduce rank_window_size to be less than or equal to the configured top N value." + ) + ); + return; + } + List featureData = Arrays.stream(featureDocs).map(x -> x.featureData).toList(); + InferenceAction.Request inferenceRequest = generateRequest(featureData); + try { + client.execute(InferenceAction.INSTANCE, inferenceRequest, inferenceListener); + } finally { + inferenceRequest.decRef(); + } + }); + + GetInferenceModelAction.Request getModelRequest = new GetInferenceModelAction.Request(inferenceId, TaskType.RERANK); + client.execute(GetInferenceModelAction.INSTANCE, getModelRequest, topNListener); + } + + /** + * Sorts documents by score descending and discards those with a score less than minScore. + * @param originalDocs documents to process + */ + @Override + protected RankFeatureDoc[] preprocess(RankFeatureDoc[] originalDocs) { + return Arrays.stream(originalDocs) + .filter(doc -> minScore == null || doc.score >= minScore) + .sorted(Comparator.comparing((RankFeatureDoc doc) -> doc.score).reversed()) + .toArray(RankFeatureDoc[]::new); } protected InferenceAction.Request generateRequest(List docFeatures) { @@ -85,11 +141,7 @@ protected InferenceAction.Request generateRequest(List docFeatures) { ); } - private float[] extractScoresFromResponse(InferenceAction.Response response) { - InferenceServiceResults results = response.getResults(); - assert results instanceof RankedDocsResults; - - List rankedDocs = ((RankedDocsResults) results).getRankedDocs(); + private float[] extractScoresFromRankedDocs(List rankedDocs) { float[] scores = new float[rankedDocs.size()]; for (RankedDocsResults.RankedDoc rankedDoc : rankedDocs) { scores[rankedDoc.index()] = rankedDoc.relevanceScore(); @@ -97,16 +149,4 @@ private float[] extractScoresFromResponse(InferenceAction.Response response) { return scores; } - - /** - * Sorts documents by score descending and discards those with a score less than minScore. - * @param originalDocs documents to process - */ - @Override - protected RankFeatureDoc[] preprocess(RankFeatureDoc[] originalDocs) { - return Arrays.stream(originalDocs) - .filter(doc -> minScore == null || doc.score >= minScore) - .sorted(Comparator.comparing((RankFeatureDoc doc) -> doc.score).reversed()) - .toArray(RankFeatureDoc[]::new); - } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java index a81fbb51f678d..e1d27e96cc5ff 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java @@ -101,6 +101,7 @@ public TextSimilarityRankRetrieverBuilder( @Override public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder, boolean compoundUsed) { + retrieverBuilder.getPreFilterQueryBuilders().addAll(preFilterQueryBuilders); retrieverBuilder.extractToSearchSourceBuilder(searchSourceBuilder, compoundUsed); // Combining with other rank builder (such as RRF) is not supported yet diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java index ae82264a77a0d..a6e4fcae7169f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/registry/ModelRegistry.java @@ -74,7 +74,10 @@ public static UnparsedModel unparsedModelFromMap(ModelConfigMap modelConfigMap) if (modelConfigMap.config() == null) { throw new ElasticsearchStatusException("Missing config map", RestStatus.BAD_REQUEST); } - String inferenceEntityId = ServiceUtils.removeStringOrThrowIfNull(modelConfigMap.config(), ModelConfigurations.MODEL_ID); + String inferenceEntityId = ServiceUtils.removeStringOrThrowIfNull( + modelConfigMap.config(), + ModelConfigurations.INDEX_ONLY_ID_FIELD_NAME + ); String service = ServiceUtils.removeStringOrThrowIfNull(modelConfigMap.config(), ModelConfigurations.SERVICE); String taskTypeStr = ServiceUtils.removeStringOrThrowIfNull(modelConfigMap.config(), TaskType.NAME); TaskType taskType = TaskType.fromString(taskTypeStr); @@ -375,7 +378,10 @@ public void deleteModel(String inferenceEntityId, ActionListener listen private static IndexRequest createIndexRequest(String docId, String indexName, ToXContentObject body, boolean allowOverwriting) { try (XContentBuilder builder = XContentFactory.jsonBuilder()) { var request = new IndexRequest(indexName); - XContentBuilder source = body.toXContent(builder, ToXContent.EMPTY_PARAMS); + XContentBuilder source = body.toXContent( + builder, + new ToXContent.MapParams(Map.of(ModelConfigurations.USE_ID_FOR_INDEX, Boolean.TRUE.toString())) + ); var operation = allowOverwriting ? DocWriteRequest.OpType.INDEX : DocWriteRequest.OpType.CREATE; return request.opType(operation).id(docId).source(source); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockConstants.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockConstants.java new file mode 100644 index 0000000000000..1755dac2ac13f --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockConstants.java @@ -0,0 +1,27 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock; + +public class AmazonBedrockConstants { + public static final String ACCESS_KEY_FIELD = "access_key"; + public static final String SECRET_KEY_FIELD = "secret_key"; + public static final String REGION_FIELD = "region"; + public static final String MODEL_FIELD = "model"; + public static final String PROVIDER_FIELD = "provider"; + + public static final String TEMPERATURE_FIELD = "temperature"; + public static final String TOP_P_FIELD = "top_p"; + public static final String TOP_K_FIELD = "top_k"; + public static final String MAX_NEW_TOKENS_FIELD = "max_new_tokens"; + + public static final Double MIN_TEMPERATURE_TOP_P_TOP_K_VALUE = 0.0; + public static final Double MAX_TEMPERATURE_TOP_P_TOP_K_VALUE = 1.0; + + public static final int DEFAULT_MAX_CHUNK_SIZE = 2048; + +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockModel.java new file mode 100644 index 0000000000000..13ca8bd7bd749 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockModel.java @@ -0,0 +1,88 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock; + +import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.ModelSecrets; +import org.elasticsearch.inference.ServiceSettings; +import org.elasticsearch.inference.TaskSettings; +import org.elasticsearch.xpack.inference.external.action.ExecutableAction; +import org.elasticsearch.xpack.inference.external.action.amazonbedrock.AmazonBedrockActionVisitor; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; + +import java.util.Map; + +public abstract class AmazonBedrockModel extends Model { + + protected String region; + protected String model; + protected AmazonBedrockProvider provider; + protected RateLimitSettings rateLimitSettings; + + protected AmazonBedrockModel(ModelConfigurations modelConfigurations, ModelSecrets secrets) { + super(modelConfigurations, secrets); + setPropertiesFromServiceSettings((AmazonBedrockServiceSettings) modelConfigurations.getServiceSettings()); + } + + protected AmazonBedrockModel(Model model, TaskSettings taskSettings) { + super(model, taskSettings); + + if (model instanceof AmazonBedrockModel bedrockModel) { + setPropertiesFromServiceSettings(bedrockModel.getServiceSettings()); + } + } + + protected AmazonBedrockModel(Model model, ServiceSettings serviceSettings) { + super(model, serviceSettings); + if (serviceSettings instanceof AmazonBedrockServiceSettings bedrockServiceSettings) { + setPropertiesFromServiceSettings(bedrockServiceSettings); + } + } + + protected AmazonBedrockModel(ModelConfigurations modelConfigurations) { + super(modelConfigurations); + setPropertiesFromServiceSettings((AmazonBedrockServiceSettings) modelConfigurations.getServiceSettings()); + } + + public String region() { + return region; + } + + public String model() { + return model; + } + + public AmazonBedrockProvider provider() { + return provider; + } + + public RateLimitSettings rateLimitSettings() { + return rateLimitSettings; + } + + private void setPropertiesFromServiceSettings(AmazonBedrockServiceSettings serviceSettings) { + this.region = serviceSettings.region(); + this.model = serviceSettings.model(); + this.provider = serviceSettings.provider(); + this.rateLimitSettings = serviceSettings.rateLimitSettings(); + } + + public abstract ExecutableAction accept(AmazonBedrockActionVisitor creator, Map taskSettings); + + @Override + public AmazonBedrockServiceSettings getServiceSettings() { + return (AmazonBedrockServiceSettings) super.getServiceSettings(); + } + + @Override + public AmazonBedrockSecretSettings getSecretSettings() { + return (AmazonBedrockSecretSettings) super.getSecretSettings(); + } + +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockProvider.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockProvider.java new file mode 100644 index 0000000000000..340a5a65f0969 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockProvider.java @@ -0,0 +1,30 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock; + +import java.util.Locale; + +public enum AmazonBedrockProvider { + AMAZONTITAN, + ANTHROPIC, + AI21LABS, + COHERE, + META, + MISTRAL; + + public static String NAME = "amazon_bedrock_provider"; + + public static AmazonBedrockProvider fromString(String name) { + return valueOf(name.trim().toUpperCase(Locale.ROOT)); + } + + @Override + public String toString() { + return name().toLowerCase(Locale.ROOT); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockProviderCapabilities.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockProviderCapabilities.java new file mode 100644 index 0000000000000..28b10ef294bda --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockProviderCapabilities.java @@ -0,0 +1,102 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock; + +import org.elasticsearch.inference.SimilarityMeasure; +import org.elasticsearch.inference.TaskType; + +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.DEFAULT_MAX_CHUNK_SIZE; + +public final class AmazonBedrockProviderCapabilities { + private static final List embeddingProviders = List.of( + AmazonBedrockProvider.AMAZONTITAN, + AmazonBedrockProvider.COHERE + ); + + private static final List chatCompletionProviders = List.of( + AmazonBedrockProvider.AMAZONTITAN, + AmazonBedrockProvider.ANTHROPIC, + AmazonBedrockProvider.AI21LABS, + AmazonBedrockProvider.COHERE, + AmazonBedrockProvider.META, + AmazonBedrockProvider.MISTRAL + ); + + private static final List chatCompletionProvidersWithTopK = List.of( + AmazonBedrockProvider.ANTHROPIC, + AmazonBedrockProvider.COHERE, + AmazonBedrockProvider.MISTRAL + ); + + private static final Map embeddingsDefaultSimilarityMeasure = Map.of( + AmazonBedrockProvider.AMAZONTITAN, + SimilarityMeasure.COSINE, + AmazonBedrockProvider.COHERE, + SimilarityMeasure.DOT_PRODUCT + ); + + private static final Map embeddingsDefaultChunkSize = Map.of( + AmazonBedrockProvider.AMAZONTITAN, + 8192, + AmazonBedrockProvider.COHERE, + 2048 + ); + + private static final Map embeddingsMaxBatchSize = Map.of( + AmazonBedrockProvider.AMAZONTITAN, + 1, + AmazonBedrockProvider.COHERE, + 96 + ); + + public static boolean providerAllowsTaskType(AmazonBedrockProvider provider, TaskType taskType) { + switch (taskType) { + case COMPLETION -> { + return chatCompletionProviders.contains(provider); + } + case TEXT_EMBEDDING -> { + return embeddingProviders.contains(provider); + } + default -> { + return false; + } + } + } + + public static boolean chatCompletionProviderHasTopKParameter(AmazonBedrockProvider provider) { + return chatCompletionProvidersWithTopK.contains(provider); + } + + public static SimilarityMeasure getProviderDefaultSimilarityMeasure(AmazonBedrockProvider provider) { + if (embeddingsDefaultSimilarityMeasure.containsKey(provider)) { + return embeddingsDefaultSimilarityMeasure.get(provider); + } + + return SimilarityMeasure.COSINE; + } + + public static int getEmbeddingsProviderDefaultChunkSize(AmazonBedrockProvider provider) { + if (embeddingsDefaultChunkSize.containsKey(provider)) { + return embeddingsDefaultChunkSize.get(provider); + } + + return DEFAULT_MAX_CHUNK_SIZE; + } + + public static int getEmbeddingsMaxBatchSize(AmazonBedrockProvider provider) { + if (embeddingsMaxBatchSize.containsKey(provider)) { + return embeddingsMaxBatchSize.get(provider); + } + + return 1; + } + +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockSecretSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockSecretSettings.java new file mode 100644 index 0000000000000..9e6328ce1c358 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockSecretSettings.java @@ -0,0 +1,110 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.settings.SecureString; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.inference.ModelSecrets; +import org.elasticsearch.inference.SecretSettings; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Map; +import java.util.Objects; + +import static org.elasticsearch.TransportVersions.ML_INFERENCE_AMAZON_BEDROCK_ADDED; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractRequiredSecureString; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.ACCESS_KEY_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.SECRET_KEY_FIELD; + +public class AmazonBedrockSecretSettings implements SecretSettings { + public static final String NAME = "amazon_bedrock_secret_settings"; + + public final SecureString accessKey; + public final SecureString secretKey; + + public static AmazonBedrockSecretSettings fromMap(@Nullable Map map) { + if (map == null) { + return null; + } + + ValidationException validationException = new ValidationException(); + SecureString secureAccessKey = extractRequiredSecureString( + map, + ACCESS_KEY_FIELD, + ModelSecrets.SECRET_SETTINGS, + validationException + ); + SecureString secureSecretKey = extractRequiredSecureString( + map, + SECRET_KEY_FIELD, + ModelSecrets.SECRET_SETTINGS, + validationException + ); + + if (validationException.validationErrors().isEmpty() == false) { + throw validationException; + } + + return new AmazonBedrockSecretSettings(secureAccessKey, secureSecretKey); + } + + public AmazonBedrockSecretSettings(SecureString accessKey, SecureString secretKey) { + this.accessKey = Objects.requireNonNull(accessKey); + this.secretKey = Objects.requireNonNull(secretKey); + } + + public AmazonBedrockSecretSettings(StreamInput in) throws IOException { + this.accessKey = in.readSecureString(); + this.secretKey = in.readSecureString(); + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return ML_INFERENCE_AMAZON_BEDROCK_ADDED; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeSecureString(accessKey); + out.writeSecureString(secretKey); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + + builder.field(ACCESS_KEY_FIELD, accessKey.toString()); + builder.field(SECRET_KEY_FIELD, secretKey.toString()); + + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object object) { + if (this == object) return true; + if (object == null || getClass() != object.getClass()) return false; + AmazonBedrockSecretSettings that = (AmazonBedrockSecretSettings) object; + return Objects.equals(accessKey, that.accessKey) && Objects.equals(secretKey, that.secretKey); + } + + @Override + public int hashCode() { + return Objects.hash(accessKey, secretKey); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockService.java new file mode 100644 index 0000000000000..459ca367058f8 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockService.java @@ -0,0 +1,328 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock; + +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.TransportVersion; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.common.Strings; +import org.elasticsearch.core.IOUtils; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.ChunkedInferenceServiceResults; +import org.elasticsearch.inference.ChunkingOptions; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.inference.InputType; +import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.ModelSecrets; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.xpack.inference.chunking.EmbeddingRequestChunker; +import org.elasticsearch.xpack.inference.external.action.amazonbedrock.AmazonBedrockActionCreator; +import org.elasticsearch.xpack.inference.external.amazonbedrock.AmazonBedrockRequestSender; +import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; +import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; +import org.elasticsearch.xpack.inference.external.http.sender.Sender; +import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; +import org.elasticsearch.xpack.inference.services.SenderService; +import org.elasticsearch.xpack.inference.services.ServiceComponents; +import org.elasticsearch.xpack.inference.services.ServiceUtils; +import org.elasticsearch.xpack.inference.services.amazonbedrock.completion.AmazonBedrockChatCompletionModel; +import org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings.AmazonBedrockEmbeddingsModel; +import org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings.AmazonBedrockEmbeddingsServiceSettings; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.elasticsearch.TransportVersions.ML_INFERENCE_AMAZON_BEDROCK_ADDED; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.createInvalidModelException; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.parsePersistedConfigErrorMsg; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrDefaultEmpty; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwIfNotEmptyMap; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.TOP_K_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockProviderCapabilities.chatCompletionProviderHasTopKParameter; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockProviderCapabilities.getEmbeddingsMaxBatchSize; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockProviderCapabilities.getProviderDefaultSimilarityMeasure; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockProviderCapabilities.providerAllowsTaskType; + +public class AmazonBedrockService extends SenderService { + public static final String NAME = "amazonbedrock"; + + private final Sender amazonBedrockSender; + + public AmazonBedrockService( + HttpRequestSender.Factory httpSenderFactory, + AmazonBedrockRequestSender.Factory amazonBedrockFactory, + ServiceComponents serviceComponents + ) { + super(httpSenderFactory, serviceComponents); + this.amazonBedrockSender = amazonBedrockFactory.createSender(); + } + + @Override + protected void doInfer( + Model model, + List input, + Map taskSettings, + InputType inputType, + TimeValue timeout, + ActionListener listener + ) { + var actionCreator = new AmazonBedrockActionCreator(amazonBedrockSender, this.getServiceComponents(), timeout); + if (model instanceof AmazonBedrockModel baseAmazonBedrockModel) { + var action = baseAmazonBedrockModel.accept(actionCreator, taskSettings); + action.execute(new DocumentsOnlyInput(input), timeout, listener); + } else { + listener.onFailure(createInvalidModelException(model)); + } + } + + @Override + protected void doInfer( + Model model, + String query, + List input, + Map taskSettings, + InputType inputType, + TimeValue timeout, + ActionListener listener + ) { + throw new UnsupportedOperationException("Amazon Bedrock service does not support inference with query input"); + } + + @Override + protected void doChunkedInfer( + Model model, + String query, + List input, + Map taskSettings, + InputType inputType, + ChunkingOptions chunkingOptions, + TimeValue timeout, + ActionListener> listener + ) { + var actionCreator = new AmazonBedrockActionCreator(amazonBedrockSender, this.getServiceComponents(), timeout); + if (model instanceof AmazonBedrockModel baseAmazonBedrockModel) { + var maxBatchSize = getEmbeddingsMaxBatchSize(baseAmazonBedrockModel.provider()); + var batchedRequests = new EmbeddingRequestChunker(input, maxBatchSize, EmbeddingRequestChunker.EmbeddingType.FLOAT) + .batchRequestsWithListeners(listener); + for (var request : batchedRequests) { + var action = baseAmazonBedrockModel.accept(actionCreator, taskSettings); + action.execute(new DocumentsOnlyInput(request.batch().inputs()), timeout, request.listener()); + } + } else { + listener.onFailure(createInvalidModelException(model)); + } + } + + @Override + public String name() { + return NAME; + } + + @Override + public void parseRequestConfig( + String modelId, + TaskType taskType, + Map config, + Set platformArchitectures, + ActionListener parsedModelListener + ) { + try { + Map serviceSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS); + Map taskSettingsMap = removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS); + + AmazonBedrockModel model = createModel( + modelId, + taskType, + serviceSettingsMap, + taskSettingsMap, + serviceSettingsMap, + TaskType.unsupportedTaskTypeErrorMsg(taskType, NAME), + ConfigurationParseContext.REQUEST + ); + + throwIfNotEmptyMap(config, NAME); + throwIfNotEmptyMap(serviceSettingsMap, NAME); + throwIfNotEmptyMap(taskSettingsMap, NAME); + + parsedModelListener.onResponse(model); + } catch (Exception e) { + parsedModelListener.onFailure(e); + } + } + + @Override + public Model parsePersistedConfigWithSecrets( + String modelId, + TaskType taskType, + Map config, + Map secrets + ) { + Map serviceSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS); + Map taskSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.TASK_SETTINGS); + Map secretSettingsMap = removeFromMapOrDefaultEmpty(secrets, ModelSecrets.SECRET_SETTINGS); + + return createModel( + modelId, + taskType, + serviceSettingsMap, + taskSettingsMap, + secretSettingsMap, + parsePersistedConfigErrorMsg(modelId, NAME), + ConfigurationParseContext.PERSISTENT + ); + } + + @Override + public Model parsePersistedConfig(String modelId, TaskType taskType, Map config) { + Map serviceSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS); + Map taskSettingsMap = removeFromMapOrDefaultEmpty(config, ModelConfigurations.TASK_SETTINGS); + + return createModel( + modelId, + taskType, + serviceSettingsMap, + taskSettingsMap, + null, + parsePersistedConfigErrorMsg(modelId, NAME), + ConfigurationParseContext.PERSISTENT + ); + } + + private static AmazonBedrockModel createModel( + String inferenceEntityId, + TaskType taskType, + Map serviceSettings, + Map taskSettings, + @Nullable Map secretSettings, + String failureMessage, + ConfigurationParseContext context + ) { + switch (taskType) { + case TEXT_EMBEDDING -> { + var model = new AmazonBedrockEmbeddingsModel( + inferenceEntityId, + taskType, + NAME, + serviceSettings, + taskSettings, + secretSettings, + context + ); + checkProviderForTask(TaskType.TEXT_EMBEDDING, model.provider()); + return model; + } + case COMPLETION -> { + var model = new AmazonBedrockChatCompletionModel( + inferenceEntityId, + taskType, + NAME, + serviceSettings, + taskSettings, + secretSettings, + context + ); + checkProviderForTask(TaskType.COMPLETION, model.provider()); + checkChatCompletionProviderForTopKParameter(model); + return model; + } + default -> throw new ElasticsearchStatusException(failureMessage, RestStatus.BAD_REQUEST); + } + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return ML_INFERENCE_AMAZON_BEDROCK_ADDED; + } + + /** + * For text embedding models get the embedding size and + * update the service settings. + * + * @param model The new model + * @param listener The listener + */ + @Override + public void checkModelConfig(Model model, ActionListener listener) { + if (model instanceof AmazonBedrockEmbeddingsModel embeddingsModel) { + ServiceUtils.getEmbeddingSize( + model, + this, + listener.delegateFailureAndWrap((l, size) -> l.onResponse(updateModelWithEmbeddingDetails(embeddingsModel, size))) + ); + } else { + listener.onResponse(model); + } + } + + private AmazonBedrockEmbeddingsModel updateModelWithEmbeddingDetails(AmazonBedrockEmbeddingsModel model, int embeddingSize) { + AmazonBedrockEmbeddingsServiceSettings serviceSettings = model.getServiceSettings(); + if (serviceSettings.dimensionsSetByUser() + && serviceSettings.dimensions() != null + && serviceSettings.dimensions() != embeddingSize) { + throw new ElasticsearchStatusException( + Strings.format( + "The retrieved embeddings size [%s] does not match the size specified in the settings [%s]. " + + "Please recreate the [%s] configuration with the correct dimensions", + embeddingSize, + serviceSettings.dimensions(), + model.getConfigurations().getInferenceEntityId() + ), + RestStatus.BAD_REQUEST + ); + } + + var similarityFromModel = serviceSettings.similarity(); + var similarityToUse = similarityFromModel == null ? getProviderDefaultSimilarityMeasure(model.provider()) : similarityFromModel; + + AmazonBedrockEmbeddingsServiceSettings settingsToUse = new AmazonBedrockEmbeddingsServiceSettings( + serviceSettings.region(), + serviceSettings.model(), + serviceSettings.provider(), + embeddingSize, + serviceSettings.dimensionsSetByUser(), + serviceSettings.maxInputTokens(), + similarityToUse, + serviceSettings.rateLimitSettings() + ); + + return new AmazonBedrockEmbeddingsModel(model, settingsToUse); + } + + private static void checkProviderForTask(TaskType taskType, AmazonBedrockProvider provider) { + if (providerAllowsTaskType(provider, taskType) == false) { + throw new ElasticsearchStatusException( + Strings.format("The [%s] task type for provider [%s] is not available", taskType, provider), + RestStatus.BAD_REQUEST + ); + } + } + + private static void checkChatCompletionProviderForTopKParameter(AmazonBedrockChatCompletionModel model) { + var taskSettings = model.getTaskSettings(); + if (taskSettings.topK() != null) { + if (chatCompletionProviderHasTopKParameter(model.provider()) == false) { + throw new ElasticsearchStatusException( + Strings.format("The [%s] task parameter is not available for provider [%s]", TOP_K_FIELD, model.provider()), + RestStatus.BAD_REQUEST + ); + } + } + } + + @Override + public void close() throws IOException { + super.close(); + IOUtils.closeWhileHandlingException(amazonBedrockSender); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceSettings.java new file mode 100644 index 0000000000000..13c7c0a8c5938 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceSettings.java @@ -0,0 +1,141 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.ServiceSettings; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; +import org.elasticsearch.xpack.inference.services.settings.FilteredXContentObject; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; + +import java.io.IOException; +import java.util.EnumSet; +import java.util.Map; +import java.util.Objects; + +import static org.elasticsearch.TransportVersions.ML_INFERENCE_AMAZON_BEDROCK_ADDED; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractRequiredEnum; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractRequiredString; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.MODEL_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.PROVIDER_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.REGION_FIELD; + +public abstract class AmazonBedrockServiceSettings extends FilteredXContentObject implements ServiceSettings { + + protected static final String AMAZON_BEDROCK_BASE_NAME = "amazon_bedrock"; + + protected final String region; + protected final String model; + protected final AmazonBedrockProvider provider; + protected final RateLimitSettings rateLimitSettings; + + // the default requests per minute are defined as per-model in the "Runtime quotas" on AWS + // see: https://docs.aws.amazon.com/bedrock/latest/userguide/quotas.html + // setting this to 240 requests per minute (4 requests / sec) is a sane default for us as it should be enough for + // decent throughput without exceeding the minimal for _most_ items. The user should consult + // the table above if using a model that might have a lesser limit (e.g. Anthropic Claude 3.5) + protected static final RateLimitSettings DEFAULT_RATE_LIMIT_SETTINGS = new RateLimitSettings(240); + + protected static AmazonBedrockServiceSettings.BaseAmazonBedrockCommonSettings fromMap( + Map map, + ValidationException validationException, + ConfigurationParseContext context + ) { + String model = extractRequiredString(map, MODEL_FIELD, ModelConfigurations.SERVICE_SETTINGS, validationException); + String region = extractRequiredString(map, REGION_FIELD, ModelConfigurations.SERVICE_SETTINGS, validationException); + AmazonBedrockProvider provider = extractRequiredEnum( + map, + PROVIDER_FIELD, + ModelConfigurations.SERVICE_SETTINGS, + AmazonBedrockProvider::fromString, + EnumSet.allOf(AmazonBedrockProvider.class), + validationException + ); + RateLimitSettings rateLimitSettings = RateLimitSettings.of( + map, + DEFAULT_RATE_LIMIT_SETTINGS, + validationException, + AMAZON_BEDROCK_BASE_NAME, + context + ); + + return new BaseAmazonBedrockCommonSettings(region, model, provider, rateLimitSettings); + } + + protected record BaseAmazonBedrockCommonSettings( + String region, + String model, + AmazonBedrockProvider provider, + @Nullable RateLimitSettings rateLimitSettings + ) {} + + protected AmazonBedrockServiceSettings(StreamInput in) throws IOException { + this.region = in.readString(); + this.model = in.readString(); + this.provider = in.readEnum(AmazonBedrockProvider.class); + this.rateLimitSettings = new RateLimitSettings(in); + } + + protected AmazonBedrockServiceSettings( + String region, + String model, + AmazonBedrockProvider provider, + @Nullable RateLimitSettings rateLimitSettings + ) { + this.region = Objects.requireNonNull(region); + this.model = Objects.requireNonNull(model); + this.provider = Objects.requireNonNull(provider); + this.rateLimitSettings = Objects.requireNonNullElse(rateLimitSettings, DEFAULT_RATE_LIMIT_SETTINGS); + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return ML_INFERENCE_AMAZON_BEDROCK_ADDED; + } + + public String region() { + return region; + } + + public String model() { + return model; + } + + public AmazonBedrockProvider provider() { + return provider; + } + + public RateLimitSettings rateLimitSettings() { + return rateLimitSettings; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(region); + out.writeString(model); + out.writeEnum(provider); + rateLimitSettings.writeTo(out); + } + + public void addBaseXContent(XContentBuilder builder, Params params) throws IOException { + toXContentFragmentOfExposedFields(builder, params); + } + + protected void addXContentFragmentOfExposedFields(XContentBuilder builder, Params params) throws IOException { + builder.field(REGION_FIELD, region); + builder.field(MODEL_FIELD, model); + builder.field(PROVIDER_FIELD, provider.name()); + rateLimitSettings.toXContent(builder, params); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionModel.java new file mode 100644 index 0000000000000..27dc607d671aa --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionModel.java @@ -0,0 +1,83 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock.completion; + +import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.ModelSecrets; +import org.elasticsearch.inference.TaskSettings; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.xpack.inference.external.action.ExecutableAction; +import org.elasticsearch.xpack.inference.external.action.amazonbedrock.AmazonBedrockActionVisitor; +import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockModel; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockSecretSettings; + +import java.util.Map; + +public class AmazonBedrockChatCompletionModel extends AmazonBedrockModel { + + public static AmazonBedrockChatCompletionModel of(AmazonBedrockChatCompletionModel completionModel, Map taskSettings) { + if (taskSettings == null || taskSettings.isEmpty()) { + return completionModel; + } + + var requestTaskSettings = AmazonBedrockChatCompletionRequestTaskSettings.fromMap(taskSettings); + var taskSettingsToUse = AmazonBedrockChatCompletionTaskSettings.of(completionModel.getTaskSettings(), requestTaskSettings); + return new AmazonBedrockChatCompletionModel(completionModel, taskSettingsToUse); + } + + public AmazonBedrockChatCompletionModel( + String inferenceEntityId, + TaskType taskType, + String name, + Map serviceSettings, + Map taskSettings, + Map secretSettings, + ConfigurationParseContext context + ) { + this( + inferenceEntityId, + taskType, + name, + AmazonBedrockChatCompletionServiceSettings.fromMap(serviceSettings, context), + AmazonBedrockChatCompletionTaskSettings.fromMap(taskSettings), + AmazonBedrockSecretSettings.fromMap(secretSettings) + ); + } + + public AmazonBedrockChatCompletionModel( + String inferenceEntityId, + TaskType taskType, + String service, + AmazonBedrockChatCompletionServiceSettings serviceSettings, + AmazonBedrockChatCompletionTaskSettings taskSettings, + AmazonBedrockSecretSettings secrets + ) { + super(new ModelConfigurations(inferenceEntityId, taskType, service, serviceSettings, taskSettings), new ModelSecrets(secrets)); + } + + public AmazonBedrockChatCompletionModel(Model model, TaskSettings taskSettings) { + super(model, taskSettings); + } + + @Override + public ExecutableAction accept(AmazonBedrockActionVisitor creator, Map taskSettings) { + return creator.create(this, taskSettings); + } + + @Override + public AmazonBedrockChatCompletionServiceSettings getServiceSettings() { + return (AmazonBedrockChatCompletionServiceSettings) super.getServiceSettings(); + } + + @Override + public AmazonBedrockChatCompletionTaskSettings getTaskSettings() { + return (AmazonBedrockChatCompletionTaskSettings) super.getTaskSettings(); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionRequestTaskSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionRequestTaskSettings.java new file mode 100644 index 0000000000000..5985dcd56c5d2 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionRequestTaskSettings.java @@ -0,0 +1,90 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock.completion; + +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.inference.ModelConfigurations; + +import java.util.Map; + +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalDoubleInRange; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalPositiveInteger; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.MAX_NEW_TOKENS_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.MAX_TEMPERATURE_TOP_P_TOP_K_VALUE; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.MIN_TEMPERATURE_TOP_P_TOP_K_VALUE; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.TEMPERATURE_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.TOP_K_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.TOP_P_FIELD; + +public record AmazonBedrockChatCompletionRequestTaskSettings( + @Nullable Double temperature, + @Nullable Double topP, + @Nullable Double topK, + @Nullable Integer maxNewTokens +) { + + public static final AmazonBedrockChatCompletionRequestTaskSettings EMPTY_SETTINGS = new AmazonBedrockChatCompletionRequestTaskSettings( + null, + null, + null, + null + ); + + /** + * Extracts the task settings from a map. All settings are considered optional and the absence of a setting + * does not throw an error. + * + * @param map the settings received from a request + * @return a {@link AmazonBedrockChatCompletionRequestTaskSettings} + */ + public static AmazonBedrockChatCompletionRequestTaskSettings fromMap(Map map) { + if (map.isEmpty()) { + return AmazonBedrockChatCompletionRequestTaskSettings.EMPTY_SETTINGS; + } + + ValidationException validationException = new ValidationException(); + + var temperature = extractOptionalDoubleInRange( + map, + TEMPERATURE_FIELD, + MIN_TEMPERATURE_TOP_P_TOP_K_VALUE, + MAX_TEMPERATURE_TOP_P_TOP_K_VALUE, + ModelConfigurations.TASK_SETTINGS, + validationException + ); + var topP = extractOptionalDoubleInRange( + map, + TOP_P_FIELD, + MIN_TEMPERATURE_TOP_P_TOP_K_VALUE, + MAX_TEMPERATURE_TOP_P_TOP_K_VALUE, + ModelConfigurations.TASK_SETTINGS, + validationException + ); + var topK = extractOptionalDoubleInRange( + map, + TOP_K_FIELD, + MIN_TEMPERATURE_TOP_P_TOP_K_VALUE, + MAX_TEMPERATURE_TOP_P_TOP_K_VALUE, + ModelConfigurations.TASK_SETTINGS, + validationException + ); + Integer maxNewTokens = extractOptionalPositiveInteger( + map, + MAX_NEW_TOKENS_FIELD, + ModelConfigurations.TASK_SETTINGS, + validationException + ); + + if (validationException.validationErrors().isEmpty() == false) { + throw validationException; + } + + return new AmazonBedrockChatCompletionRequestTaskSettings(temperature, topP, topK, maxNewTokens); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionServiceSettings.java new file mode 100644 index 0000000000000..fc3d09c6eea7a --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionServiceSettings.java @@ -0,0 +1,93 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock.completion; + +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockProvider; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockServiceSettings; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; + +import java.io.IOException; +import java.util.Map; +import java.util.Objects; + +public class AmazonBedrockChatCompletionServiceSettings extends AmazonBedrockServiceSettings { + public static final String NAME = "amazon_bedrock_chat_completion_service_settings"; + + public static AmazonBedrockChatCompletionServiceSettings fromMap( + Map serviceSettings, + ConfigurationParseContext context + ) { + ValidationException validationException = new ValidationException(); + + var baseSettings = AmazonBedrockServiceSettings.fromMap(serviceSettings, validationException, context); + + if (validationException.validationErrors().isEmpty() == false) { + throw validationException; + } + + return new AmazonBedrockChatCompletionServiceSettings( + baseSettings.region(), + baseSettings.model(), + baseSettings.provider(), + baseSettings.rateLimitSettings() + ); + } + + public AmazonBedrockChatCompletionServiceSettings( + String region, + String model, + AmazonBedrockProvider provider, + RateLimitSettings rateLimitSettings + ) { + super(region, model, provider, rateLimitSettings); + } + + public AmazonBedrockChatCompletionServiceSettings(StreamInput in) throws IOException { + super(in); + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + super.addBaseXContent(builder, params); + builder.endObject(); + return builder; + } + + @Override + protected XContentBuilder toXContentFragmentOfExposedFields(XContentBuilder builder, Params params) throws IOException { + super.addXContentFragmentOfExposedFields(builder, params); + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AmazonBedrockChatCompletionServiceSettings that = (AmazonBedrockChatCompletionServiceSettings) o; + + return Objects.equals(region, that.region) + && Objects.equals(provider, that.provider) + && Objects.equals(model, that.model) + && Objects.equals(rateLimitSettings, that.rateLimitSettings); + } + + @Override + public int hashCode() { + return Objects.hash(region, model, provider, rateLimitSettings); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionTaskSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionTaskSettings.java new file mode 100644 index 0000000000000..e689e68794e1f --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionTaskSettings.java @@ -0,0 +1,190 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock.completion; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.TaskSettings; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Map; +import java.util.Objects; + +import static org.elasticsearch.TransportVersions.ML_INFERENCE_AMAZON_BEDROCK_ADDED; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalDoubleInRange; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalPositiveInteger; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.MAX_NEW_TOKENS_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.MAX_TEMPERATURE_TOP_P_TOP_K_VALUE; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.MIN_TEMPERATURE_TOP_P_TOP_K_VALUE; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.TEMPERATURE_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.TOP_K_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.TOP_P_FIELD; + +public class AmazonBedrockChatCompletionTaskSettings implements TaskSettings { + public static final String NAME = "amazon_bedrock_chat_completion_task_settings"; + + public static final AmazonBedrockChatCompletionRequestTaskSettings EMPTY_SETTINGS = new AmazonBedrockChatCompletionRequestTaskSettings( + null, + null, + null, + null + ); + + public static AmazonBedrockChatCompletionTaskSettings fromMap(Map settings) { + ValidationException validationException = new ValidationException(); + + Double temperature = extractOptionalDoubleInRange( + settings, + TEMPERATURE_FIELD, + MIN_TEMPERATURE_TOP_P_TOP_K_VALUE, + MAX_TEMPERATURE_TOP_P_TOP_K_VALUE, + ModelConfigurations.TASK_SETTINGS, + validationException + ); + Double topP = extractOptionalDoubleInRange( + settings, + TOP_P_FIELD, + MIN_TEMPERATURE_TOP_P_TOP_K_VALUE, + MAX_TEMPERATURE_TOP_P_TOP_K_VALUE, + ModelConfigurations.TASK_SETTINGS, + validationException + ); + Double topK = extractOptionalDoubleInRange( + settings, + TOP_K_FIELD, + MIN_TEMPERATURE_TOP_P_TOP_K_VALUE, + MAX_TEMPERATURE_TOP_P_TOP_K_VALUE, + ModelConfigurations.TASK_SETTINGS, + validationException + ); + Integer maxNewTokens = extractOptionalPositiveInteger( + settings, + MAX_NEW_TOKENS_FIELD, + ModelConfigurations.TASK_SETTINGS, + validationException + ); + + if (validationException.validationErrors().isEmpty() == false) { + throw validationException; + } + + return new AmazonBedrockChatCompletionTaskSettings(temperature, topP, topK, maxNewTokens); + } + + public static AmazonBedrockChatCompletionTaskSettings of( + AmazonBedrockChatCompletionTaskSettings originalSettings, + AmazonBedrockChatCompletionRequestTaskSettings requestSettings + ) { + var temperature = requestSettings.temperature() == null ? originalSettings.temperature() : requestSettings.temperature(); + var topP = requestSettings.topP() == null ? originalSettings.topP() : requestSettings.topP(); + var topK = requestSettings.topK() == null ? originalSettings.topK() : requestSettings.topK(); + var maxNewTokens = requestSettings.maxNewTokens() == null ? originalSettings.maxNewTokens() : requestSettings.maxNewTokens(); + + return new AmazonBedrockChatCompletionTaskSettings(temperature, topP, topK, maxNewTokens); + } + + private final Double temperature; + private final Double topP; + private final Double topK; + private final Integer maxNewTokens; + + public AmazonBedrockChatCompletionTaskSettings( + @Nullable Double temperature, + @Nullable Double topP, + @Nullable Double topK, + @Nullable Integer maxNewTokens + ) { + this.temperature = temperature; + this.topP = topP; + this.topK = topK; + this.maxNewTokens = maxNewTokens; + } + + public AmazonBedrockChatCompletionTaskSettings(StreamInput in) throws IOException { + this.temperature = in.readOptionalDouble(); + this.topP = in.readOptionalDouble(); + this.topK = in.readOptionalDouble(); + this.maxNewTokens = in.readOptionalVInt(); + } + + public Double temperature() { + return temperature; + } + + public Double topP() { + return topP; + } + + public Double topK() { + return topK; + } + + public Integer maxNewTokens() { + return maxNewTokens; + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return ML_INFERENCE_AMAZON_BEDROCK_ADDED; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeOptionalDouble(temperature); + out.writeOptionalDouble(topP); + out.writeOptionalDouble(topK); + out.writeOptionalVInt(maxNewTokens); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + { + if (temperature != null) { + builder.field(TEMPERATURE_FIELD, temperature); + } + if (topP != null) { + builder.field(TOP_P_FIELD, topP); + } + if (topK != null) { + builder.field(TOP_K_FIELD, topK); + } + if (maxNewTokens != null) { + builder.field(MAX_NEW_TOKENS_FIELD, maxNewTokens); + } + } + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AmazonBedrockChatCompletionTaskSettings that = (AmazonBedrockChatCompletionTaskSettings) o; + return Objects.equals(temperature, that.temperature) + && Objects.equals(topP, that.topP) + && Objects.equals(topK, that.topK) + && Objects.equals(maxNewTokens, that.maxNewTokens); + } + + @Override + public int hashCode() { + return Objects.hash(temperature, topP, topK, maxNewTokens); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/embeddings/AmazonBedrockEmbeddingsModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/embeddings/AmazonBedrockEmbeddingsModel.java new file mode 100644 index 0000000000000..0e3a954a03279 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/embeddings/AmazonBedrockEmbeddingsModel.java @@ -0,0 +1,85 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings; + +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.inference.EmptyTaskSettings; +import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.ModelSecrets; +import org.elasticsearch.inference.ServiceSettings; +import org.elasticsearch.inference.TaskSettings; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.xpack.inference.external.action.ExecutableAction; +import org.elasticsearch.xpack.inference.external.action.amazonbedrock.AmazonBedrockActionVisitor; +import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockModel; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockSecretSettings; + +import java.util.Map; + +public class AmazonBedrockEmbeddingsModel extends AmazonBedrockModel { + + public static AmazonBedrockEmbeddingsModel of(AmazonBedrockEmbeddingsModel embeddingsModel, Map taskSettings) { + if (taskSettings != null && taskSettings.isEmpty() == false) { + // no task settings allowed + var validationException = new ValidationException(); + validationException.addValidationError("Amazon Bedrock embeddings model cannot have task settings"); + throw validationException; + } + + return embeddingsModel; + } + + public AmazonBedrockEmbeddingsModel( + String inferenceEntityId, + TaskType taskType, + String service, + Map serviceSettings, + Map taskSettings, + Map secretSettings, + ConfigurationParseContext context + ) { + this( + inferenceEntityId, + taskType, + service, + AmazonBedrockEmbeddingsServiceSettings.fromMap(serviceSettings, context), + new EmptyTaskSettings(), + AmazonBedrockSecretSettings.fromMap(secretSettings) + ); + } + + public AmazonBedrockEmbeddingsModel( + String inferenceEntityId, + TaskType taskType, + String service, + AmazonBedrockEmbeddingsServiceSettings serviceSettings, + TaskSettings taskSettings, + AmazonBedrockSecretSettings secrets + ) { + super( + new ModelConfigurations(inferenceEntityId, taskType, service, serviceSettings, new EmptyTaskSettings()), + new ModelSecrets(secrets) + ); + } + + public AmazonBedrockEmbeddingsModel(Model model, ServiceSettings serviceSettings) { + super(model, serviceSettings); + } + + @Override + public ExecutableAction accept(AmazonBedrockActionVisitor creator, Map taskSettings) { + return creator.create(this, taskSettings); + } + + @Override + public AmazonBedrockEmbeddingsServiceSettings getServiceSettings() { + return (AmazonBedrockEmbeddingsServiceSettings) super.getServiceSettings(); + } +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/embeddings/AmazonBedrockEmbeddingsServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/embeddings/AmazonBedrockEmbeddingsServiceSettings.java new file mode 100644 index 0000000000000..4bf037558c618 --- /dev/null +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/embeddings/AmazonBedrockEmbeddingsServiceSettings.java @@ -0,0 +1,220 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings; + +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.SimilarityMeasure; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; +import org.elasticsearch.xpack.inference.services.ServiceUtils; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockProvider; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockServiceSettings; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; + +import java.io.IOException; +import java.util.Map; +import java.util.Objects; + +import static org.elasticsearch.xpack.inference.services.ServiceFields.DIMENSIONS; +import static org.elasticsearch.xpack.inference.services.ServiceFields.MAX_INPUT_TOKENS; +import static org.elasticsearch.xpack.inference.services.ServiceFields.SIMILARITY; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalBoolean; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalPositiveInteger; +import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractSimilarity; + +public class AmazonBedrockEmbeddingsServiceSettings extends AmazonBedrockServiceSettings { + public static final String NAME = "amazon_bedrock_embeddings_service_settings"; + static final String DIMENSIONS_SET_BY_USER = "dimensions_set_by_user"; + + private final Integer dimensions; + private final Boolean dimensionsSetByUser; + private final Integer maxInputTokens; + private final SimilarityMeasure similarity; + + public static AmazonBedrockEmbeddingsServiceSettings fromMap(Map map, ConfigurationParseContext context) { + ValidationException validationException = new ValidationException(); + + var settings = embeddingSettingsFromMap(map, validationException, context); + + if (validationException.validationErrors().isEmpty() == false) { + throw validationException; + } + + return settings; + } + + private static AmazonBedrockEmbeddingsServiceSettings embeddingSettingsFromMap( + Map map, + ValidationException validationException, + ConfigurationParseContext context + ) { + var baseSettings = AmazonBedrockServiceSettings.fromMap(map, validationException, context); + SimilarityMeasure similarity = extractSimilarity(map, ModelConfigurations.SERVICE_SETTINGS, validationException); + + Integer maxTokens = extractOptionalPositiveInteger( + map, + MAX_INPUT_TOKENS, + ModelConfigurations.SERVICE_SETTINGS, + validationException + ); + Integer dims = extractOptionalPositiveInteger(map, DIMENSIONS, ModelConfigurations.SERVICE_SETTINGS, validationException); + + Boolean dimensionsSetByUser = extractOptionalBoolean(map, DIMENSIONS_SET_BY_USER, validationException); + + switch (context) { + case REQUEST -> { + if (dimensionsSetByUser != null) { + validationException.addValidationError( + ServiceUtils.invalidSettingError(DIMENSIONS_SET_BY_USER, ModelConfigurations.SERVICE_SETTINGS) + ); + } + + if (dims != null) { + validationException.addValidationError( + ServiceUtils.invalidSettingError(DIMENSIONS, ModelConfigurations.SERVICE_SETTINGS) + ); + } + dimensionsSetByUser = false; + } + case PERSISTENT -> { + if (dimensionsSetByUser == null) { + validationException.addValidationError( + ServiceUtils.missingSettingErrorMsg(DIMENSIONS_SET_BY_USER, ModelConfigurations.SERVICE_SETTINGS) + ); + } + } + } + return new AmazonBedrockEmbeddingsServiceSettings( + baseSettings.region(), + baseSettings.model(), + baseSettings.provider(), + dims, + dimensionsSetByUser, + maxTokens, + similarity, + baseSettings.rateLimitSettings() + ); + } + + public AmazonBedrockEmbeddingsServiceSettings(StreamInput in) throws IOException { + super(in); + dimensions = in.readOptionalVInt(); + dimensionsSetByUser = in.readBoolean(); + maxInputTokens = in.readOptionalVInt(); + similarity = in.readOptionalEnum(SimilarityMeasure.class); + } + + public AmazonBedrockEmbeddingsServiceSettings( + String region, + String model, + AmazonBedrockProvider provider, + @Nullable Integer dimensions, + Boolean dimensionsSetByUser, + @Nullable Integer maxInputTokens, + @Nullable SimilarityMeasure similarity, + RateLimitSettings rateLimitSettings + ) { + super(region, model, provider, rateLimitSettings); + this.dimensions = dimensions; + this.dimensionsSetByUser = dimensionsSetByUser; + this.maxInputTokens = maxInputTokens; + this.similarity = similarity; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeOptionalVInt(dimensions); + out.writeBoolean(dimensionsSetByUser); + out.writeOptionalVInt(maxInputTokens); + out.writeOptionalEnum(similarity); + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + + super.addBaseXContent(builder, params); + builder.field(DIMENSIONS_SET_BY_USER, dimensionsSetByUser); + + builder.endObject(); + return builder; + } + + @Override + protected XContentBuilder toXContentFragmentOfExposedFields(XContentBuilder builder, Params params) throws IOException { + super.addXContentFragmentOfExposedFields(builder, params); + + if (dimensions != null) { + builder.field(DIMENSIONS, dimensions); + } + if (maxInputTokens != null) { + builder.field(MAX_INPUT_TOKENS, maxInputTokens); + } + if (similarity != null) { + builder.field(SIMILARITY, similarity); + } + + return builder; + } + + @Override + public SimilarityMeasure similarity() { + return similarity; + } + + @Override + public Integer dimensions() { + return dimensions; + } + + public boolean dimensionsSetByUser() { + return this.dimensionsSetByUser; + } + + public Integer maxInputTokens() { + return maxInputTokens; + } + + @Override + public DenseVectorFieldMapper.ElementType elementType() { + return DenseVectorFieldMapper.ElementType.FLOAT; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AmazonBedrockEmbeddingsServiceSettings that = (AmazonBedrockEmbeddingsServiceSettings) o; + + return Objects.equals(region, that.region) + && Objects.equals(provider, that.provider) + && Objects.equals(model, that.model) + && Objects.equals(dimensions, that.dimensions) + && Objects.equals(dimensionsSetByUser, that.dimensionsSetByUser) + && Objects.equals(maxInputTokens, that.maxInputTokens) + && Objects.equals(similarity, that.similarity) + && Objects.equals(rateLimitSettings, that.rateLimitSettings); + } + + @Override + public int hashCode() { + return Objects.hash(region, model, provider, dimensions, dimensionsSetByUser, maxInputTokens, similarity, rateLimitSettings); + } + +} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiService.java index 3facb78864831..3c75243770f97 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiService.java @@ -24,10 +24,6 @@ import org.elasticsearch.inference.SimilarityMeasure; import org.elasticsearch.inference.TaskType; import org.elasticsearch.rest.RestStatus; -import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults; -import org.elasticsearch.xpack.core.inference.results.InferenceChunkedTextEmbeddingFloatResults; -import org.elasticsearch.xpack.core.inference.results.InferenceTextEmbeddingFloatResults; -import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults; import org.elasticsearch.xpack.inference.chunking.EmbeddingRequestChunker; import org.elasticsearch.xpack.inference.external.action.azureopenai.AzureOpenAiActionCreator; import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; @@ -44,7 +40,6 @@ import java.util.Map; import java.util.Set; -import static org.elasticsearch.xpack.core.inference.results.ResultUtils.createInvalidChunkedResultException; import static org.elasticsearch.xpack.inference.services.ServiceUtils.createInvalidModelException; import static org.elasticsearch.xpack.inference.services.ServiceUtils.parsePersistedConfigErrorMsg; import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrDefaultEmpty; @@ -246,19 +241,6 @@ protected void doChunkedInfer( } } - private static List translateToChunkedResults( - List inputs, - InferenceServiceResults inferenceResults - ) { - if (inferenceResults instanceof InferenceTextEmbeddingFloatResults textEmbeddingResults) { - return InferenceChunkedTextEmbeddingFloatResults.listOf(inputs, textEmbeddingResults); - } else if (inferenceResults instanceof ErrorInferenceResults error) { - return List.of(new ErrorChunkedInferenceResults(error.getException())); - } else { - throw createInvalidChunkedResultException(InferenceTextEmbeddingFloatResults.NAME, inferenceResults.getWriteableName()); - } - } - /** * For text embedding models get the embedding size and * update the service settings. diff --git a/x-pack/plugin/inference/src/main/plugin-metadata/plugin-security.policy b/x-pack/plugin/inference/src/main/plugin-metadata/plugin-security.policy index f21a46521a7f7..a39fcf53be7f3 100644 --- a/x-pack/plugin/inference/src/main/plugin-metadata/plugin-security.policy +++ b/x-pack/plugin/inference/src/main/plugin-metadata/plugin-security.policy @@ -8,12 +8,18 @@ grant { // required by: com.google.api.client.json.JsonParser#parseValue + // also required by AWS SDK for client configuration permission java.lang.RuntimePermission "accessDeclaredMembers"; + permission java.lang.RuntimePermission "getClassLoader"; + // required by: com.google.api.client.json.GenericJson# + // also by AWS SDK for Jackson's ObjectMapper permission java.lang.reflect.ReflectPermission "suppressAccessChecks"; + // required to add google certs to the gcs client trustore permission java.lang.RuntimePermission "setFactory"; // gcs client opens socket connections for to access repository - permission java.net.SocketPermission "*", "connect"; + // also, AWS Bedrock client opens socket connections and needs resolve for to access to resources + permission java.net.SocketPermission "*", "connect,resolve"; }; diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/action/amazonbedrock/AmazonBedrockActionCreatorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/action/amazonbedrock/AmazonBedrockActionCreatorTests.java new file mode 100644 index 0000000000000..87d3a82b4aae6 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/action/amazonbedrock/AmazonBedrockActionCreatorTests.java @@ -0,0 +1,175 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.action.amazonbedrock; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xpack.core.inference.action.InferenceAction; +import org.elasticsearch.xpack.core.inference.results.ChatCompletionResults; +import org.elasticsearch.xpack.core.inference.results.InferenceTextEmbeddingFloatResults; +import org.elasticsearch.xpack.inference.external.amazonbedrock.AmazonBedrockMockRequestSender; +import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; +import org.elasticsearch.xpack.inference.services.ServiceComponentsTests; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockProvider; +import org.elasticsearch.xpack.inference.services.amazonbedrock.completion.AmazonBedrockChatCompletionModelTests; +import org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings.AmazonBedrockEmbeddingsModelTests; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import static org.elasticsearch.xpack.inference.Utils.inferenceUtilityPool; +import static org.elasticsearch.xpack.inference.results.ChatCompletionResultsTests.buildExpectationCompletion; +import static org.elasticsearch.xpack.inference.results.TextEmbeddingResultsTests.buildExpectationFloat; +import static org.hamcrest.Matchers.is; + +public class AmazonBedrockActionCreatorTests extends ESTestCase { + private static final TimeValue TIMEOUT = new TimeValue(30, TimeUnit.SECONDS); + private ThreadPool threadPool; + + @Before + public void init() throws Exception { + threadPool = createThreadPool(inferenceUtilityPool()); + } + + @After + public void shutdown() throws IOException { + terminate(threadPool); + } + + public void testEmbeddingsRequestAction() throws IOException { + var serviceComponents = ServiceComponentsTests.createWithEmptySettings(threadPool); + var mockedFloatResults = List.of(new InferenceTextEmbeddingFloatResults.InferenceFloatEmbedding(new float[] { 0.0123F, -0.0123F })); + var mockedResult = new InferenceTextEmbeddingFloatResults(mockedFloatResults); + try (var sender = new AmazonBedrockMockRequestSender()) { + sender.enqueue(mockedResult); + var creator = new AmazonBedrockActionCreator(sender, serviceComponents, TIMEOUT); + var model = AmazonBedrockEmbeddingsModelTests.createModel( + "test_id", + "test_region", + "test_model", + AmazonBedrockProvider.AMAZONTITAN, + null, + false, + null, + null, + null, + "accesskey", + "secretkey" + ); + var action = creator.create(model, Map.of()); + PlainActionFuture listener = new PlainActionFuture<>(); + action.execute(new DocumentsOnlyInput(List.of("abc")), InferenceAction.Request.DEFAULT_TIMEOUT, listener); + var result = listener.actionGet(TIMEOUT); + + assertThat(result.asMap(), is(buildExpectationFloat(List.of(new float[] { 0.0123F, -0.0123F })))); + + assertThat(sender.sendCount(), is(1)); + var sentInputs = sender.getInputs(); + assertThat(sentInputs.size(), is(1)); + assertThat(sentInputs.get(0), is("abc")); + } + } + + public void testEmbeddingsRequestAction_HandlesException() throws IOException { + var serviceComponents = ServiceComponentsTests.createWithEmptySettings(threadPool); + var mockedResult = new ElasticsearchException("mock exception"); + try (var sender = new AmazonBedrockMockRequestSender()) { + sender.enqueue(mockedResult); + var creator = new AmazonBedrockActionCreator(sender, serviceComponents, TIMEOUT); + var model = AmazonBedrockEmbeddingsModelTests.createModel( + "test_id", + "test_region", + "test_model", + AmazonBedrockProvider.AMAZONTITAN, + "accesskey", + "secretkey" + ); + var action = creator.create(model, Map.of()); + PlainActionFuture listener = new PlainActionFuture<>(); + action.execute(new DocumentsOnlyInput(List.of("abc")), InferenceAction.Request.DEFAULT_TIMEOUT, listener); + var thrownException = expectThrows(ElasticsearchException.class, () -> listener.actionGet(TIMEOUT)); + + assertThat(sender.sendCount(), is(1)); + assertThat(sender.getInputs().size(), is(1)); + assertThat(thrownException.getMessage(), is("mock exception")); + } + } + + public void testCompletionRequestAction() throws IOException { + var serviceComponents = ServiceComponentsTests.createWithEmptySettings(threadPool); + var mockedChatCompletionResults = List.of(new ChatCompletionResults.Result("test input string")); + var mockedResult = new ChatCompletionResults(mockedChatCompletionResults); + try (var sender = new AmazonBedrockMockRequestSender()) { + sender.enqueue(mockedResult); + var creator = new AmazonBedrockActionCreator(sender, serviceComponents, TIMEOUT); + var model = AmazonBedrockChatCompletionModelTests.createModel( + "test_id", + "test_region", + "test_model", + AmazonBedrockProvider.AMAZONTITAN, + null, + null, + null, + null, + null, + "accesskey", + "secretkey" + ); + var action = creator.create(model, Map.of()); + PlainActionFuture listener = new PlainActionFuture<>(); + action.execute(new DocumentsOnlyInput(List.of("abc")), InferenceAction.Request.DEFAULT_TIMEOUT, listener); + var result = listener.actionGet(TIMEOUT); + + assertThat(result.asMap(), is(buildExpectationCompletion(List.of("test input string")))); + + assertThat(sender.sendCount(), is(1)); + var sentInputs = sender.getInputs(); + assertThat(sentInputs.size(), is(1)); + assertThat(sentInputs.get(0), is("abc")); + } + } + + public void testChatCompletionRequestAction_HandlesException() throws IOException { + var serviceComponents = ServiceComponentsTests.createWithEmptySettings(threadPool); + var mockedResult = new ElasticsearchException("mock exception"); + try (var sender = new AmazonBedrockMockRequestSender()) { + sender.enqueue(mockedResult); + var creator = new AmazonBedrockActionCreator(sender, serviceComponents, TIMEOUT); + var model = AmazonBedrockChatCompletionModelTests.createModel( + "test_id", + "test_region", + "test_model", + AmazonBedrockProvider.AMAZONTITAN, + null, + null, + null, + null, + null, + "accesskey", + "secretkey" + ); + var action = creator.create(model, Map.of()); + PlainActionFuture listener = new PlainActionFuture<>(); + action.execute(new DocumentsOnlyInput(List.of("abc")), InferenceAction.Request.DEFAULT_TIMEOUT, listener); + var thrownException = expectThrows(ElasticsearchException.class, () -> listener.actionGet(TIMEOUT)); + + assertThat(sender.sendCount(), is(1)); + assertThat(sender.getInputs().size(), is(1)); + assertThat(thrownException.getMessage(), is("mock exception")); + } + } + +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockExecutorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockExecutorTests.java new file mode 100644 index 0000000000000..9326d39cb657c --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockExecutorTests.java @@ -0,0 +1,172 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.amazonbedrock; + +import com.amazonaws.services.bedrockruntime.model.ContentBlock; +import com.amazonaws.services.bedrockruntime.model.ConverseOutput; +import com.amazonaws.services.bedrockruntime.model.ConverseResult; +import com.amazonaws.services.bedrockruntime.model.InvokeModelResult; +import com.amazonaws.services.bedrockruntime.model.Message; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockChatCompletionRequest; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockTitanCompletionRequestEntity; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.embeddings.AmazonBedrockEmbeddingsRequest; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.embeddings.AmazonBedrockTitanEmbeddingsRequestEntity; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.completion.AmazonBedrockChatCompletionResponseHandler; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.embeddings.AmazonBedrockEmbeddingsResponseHandler; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockProvider; +import org.elasticsearch.xpack.inference.services.amazonbedrock.completion.AmazonBedrockChatCompletionModelTests; +import org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings.AmazonBedrockEmbeddingsModelTests; + +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.util.List; + +import static org.elasticsearch.xpack.inference.common.TruncatorTests.createTruncator; +import static org.elasticsearch.xpack.inference.results.ChatCompletionResultsTests.buildExpectationCompletion; +import static org.elasticsearch.xpack.inference.results.TextEmbeddingResultsTests.buildExpectationFloat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.is; + +public class AmazonBedrockExecutorTests extends ESTestCase { + public void testExecute_EmbeddingsRequest_ForAmazonTitan() throws CharacterCodingException { + var model = AmazonBedrockEmbeddingsModelTests.createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + "accesskey", + "secretkey" + ); + var truncator = createTruncator(); + var truncatedInput = truncator.truncate(List.of("abc")); + var requestEntity = new AmazonBedrockTitanEmbeddingsRequestEntity("abc"); + var request = new AmazonBedrockEmbeddingsRequest(truncator, truncatedInput, model, requestEntity, null); + var responseHandler = new AmazonBedrockEmbeddingsResponseHandler(); + + var clientCache = new AmazonBedrockMockClientCache(null, getTestInvokeResult(TEST_AMAZON_TITAN_EMBEDDINGS_RESULT), null); + var listener = new PlainActionFuture(); + + var executor = new AmazonBedrockEmbeddingsExecutor(request, responseHandler, logger, () -> false, listener, clientCache); + executor.run(); + var result = listener.actionGet(new TimeValue(30000)); + assertNotNull(result); + assertThat(result.asMap(), is(buildExpectationFloat(List.of(new float[] { 0.123F, 0.456F, 0.678F, 0.789F })))); + } + + public void testExecute_EmbeddingsRequest_ForCohere() throws CharacterCodingException { + var model = AmazonBedrockEmbeddingsModelTests.createModel( + "id", + "region", + "model", + AmazonBedrockProvider.COHERE, + "accesskey", + "secretkey" + ); + var requestEntity = new AmazonBedrockTitanEmbeddingsRequestEntity("abc"); + var truncator = createTruncator(); + var truncatedInput = truncator.truncate(List.of("abc")); + var request = new AmazonBedrockEmbeddingsRequest(truncator, truncatedInput, model, requestEntity, null); + var responseHandler = new AmazonBedrockEmbeddingsResponseHandler(); + + var clientCache = new AmazonBedrockMockClientCache(null, getTestInvokeResult(TEST_COHERE_EMBEDDINGS_RESULT), null); + var listener = new PlainActionFuture(); + + var executor = new AmazonBedrockEmbeddingsExecutor(request, responseHandler, logger, () -> false, listener, clientCache); + executor.run(); + var result = listener.actionGet(new TimeValue(30000)); + assertNotNull(result); + assertThat(result.asMap(), is(buildExpectationFloat(List.of(new float[] { 0.123F, 0.456F, 0.678F, 0.789F })))); + } + + public void testExecute_ChatCompletionRequest() throws CharacterCodingException { + var model = AmazonBedrockChatCompletionModelTests.createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + "accesskey", + "secretkey" + ); + + var requestEntity = new AmazonBedrockTitanCompletionRequestEntity(List.of("abc"), null, null, 512); + var request = new AmazonBedrockChatCompletionRequest(model, requestEntity, null); + var responseHandler = new AmazonBedrockChatCompletionResponseHandler(); + + var clientCache = new AmazonBedrockMockClientCache(getTestConverseResult("converse result"), null, null); + var listener = new PlainActionFuture(); + + var executor = new AmazonBedrockChatCompletionExecutor(request, responseHandler, logger, () -> false, listener, clientCache); + executor.run(); + var result = listener.actionGet(new TimeValue(30000)); + assertNotNull(result); + assertThat(result.asMap(), is(buildExpectationCompletion(List.of("converse result")))); + } + + public void testExecute_FailsProperly_WithElasticsearchException() { + var model = AmazonBedrockChatCompletionModelTests.createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + "accesskey", + "secretkey" + ); + + var requestEntity = new AmazonBedrockTitanCompletionRequestEntity(List.of("abc"), null, null, 512); + var request = new AmazonBedrockChatCompletionRequest(model, requestEntity, null); + var responseHandler = new AmazonBedrockChatCompletionResponseHandler(); + + var clientCache = new AmazonBedrockMockClientCache(null, null, new ElasticsearchException("test exception")); + var listener = new PlainActionFuture(); + + var executor = new AmazonBedrockChatCompletionExecutor(request, responseHandler, logger, () -> false, listener, clientCache); + executor.run(); + + var exceptionThrown = assertThrows(ElasticsearchException.class, () -> listener.actionGet(new TimeValue(30000))); + assertThat(exceptionThrown.getMessage(), containsString("Failed to send request from inference entity id [id]")); + assertThat(exceptionThrown.getCause().getMessage(), containsString("test exception")); + } + + public static ConverseResult getTestConverseResult(String resultText) { + var message = new Message().withContent(new ContentBlock().withText(resultText)); + var converseOutput = new ConverseOutput().withMessage(message); + return new ConverseResult().withOutput(converseOutput); + } + + public static InvokeModelResult getTestInvokeResult(String resultJson) throws CharacterCodingException { + var result = new InvokeModelResult(); + result.setContentType("application/json"); + var encoder = Charset.forName("UTF-8").newEncoder(); + result.setBody(encoder.encode(CharBuffer.wrap(resultJson))); + return result; + } + + public static final String TEST_AMAZON_TITAN_EMBEDDINGS_RESULT = """ + { + "embedding": [0.123, 0.456, 0.678, 0.789], + "inputTextTokenCount": int + }"""; + + public static final String TEST_COHERE_EMBEDDINGS_RESULT = """ + { + "embeddings": [ + [0.123, 0.456, 0.678, 0.789] + ], + "id": string, + "response_type" : "embeddings_floats", + "texts": [string] + } + """; +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockInferenceClientCacheTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockInferenceClientCacheTests.java new file mode 100644 index 0000000000000..873b2e22497c6 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockInferenceClientCacheTests.java @@ -0,0 +1,108 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.amazonbedrock; + +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockProvider; +import org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings.AmazonBedrockEmbeddingsModelTests; + +import java.io.IOException; +import java.time.Clock; +import java.time.Duration; +import java.time.Instant; +import java.time.ZoneId; + +import static org.elasticsearch.xpack.inference.external.amazonbedrock.AmazonBedrockInferenceClient.CLIENT_CACHE_EXPIRY_MINUTES; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.sameInstance; + +public class AmazonBedrockInferenceClientCacheTests extends ESTestCase { + public void testCache_ReturnsSameObject() throws IOException { + AmazonBedrockInferenceClientCache cacheInstance; + try (var cache = new AmazonBedrockInferenceClientCache(AmazonBedrockMockInferenceClient::create, null)) { + cacheInstance = cache; + var model = AmazonBedrockEmbeddingsModelTests.createModel( + "inferenceId", + "testregion", + "model", + AmazonBedrockProvider.AMAZONTITAN, + "access_key", + "secret_key" + ); + + var client = cache.getOrCreateClient(model, null); + + var secondModel = AmazonBedrockEmbeddingsModelTests.createModel( + "inferenceId_two", + "testregion", + "a_different_model", + AmazonBedrockProvider.COHERE, + "access_key", + "secret_key" + ); + + var secondClient = cache.getOrCreateClient(secondModel, null); + assertThat(client, sameInstance(secondClient)); + + assertThat(cache.clientCount(), is(1)); + + var thirdClient = cache.getOrCreateClient(model, null); + assertThat(client, sameInstance(thirdClient)); + + assertThat(cache.clientCount(), is(1)); + } + assertThat(cacheInstance.clientCount(), is(0)); + } + + public void testCache_ItEvictsExpiredClients() throws IOException { + var clock = Clock.fixed(Instant.now(), ZoneId.systemDefault()); + AmazonBedrockInferenceClientCache cacheInstance; + try (var cache = new AmazonBedrockInferenceClientCache(AmazonBedrockMockInferenceClient::create, clock)) { + cacheInstance = cache; + + var model = AmazonBedrockEmbeddingsModelTests.createModel( + "inferenceId", + "testregion", + "model", + AmazonBedrockProvider.AMAZONTITAN, + "access_key", + "secret_key" + ); + + var client = cache.getOrCreateClient(model, null); + + var secondModel = AmazonBedrockEmbeddingsModelTests.createModel( + "inferenceId_two", + "some_other_region", + "a_different_model", + AmazonBedrockProvider.COHERE, + "other_access_key", + "other_secret_key" + ); + + assertThat(cache.clientCount(), is(1)); + + var secondClient = cache.getOrCreateClient(secondModel, null); + assertThat(client, not(sameInstance(secondClient))); + + assertThat(cache.clientCount(), is(2)); + + // set clock to after expiry + cache.setClock(Clock.fixed(clock.instant().plus(Duration.ofMinutes(CLIENT_CACHE_EXPIRY_MINUTES + 1)), ZoneId.systemDefault())); + + // get another client, this will ensure flushExpiredClients is called + var regetSecondClient = cache.getOrCreateClient(secondModel, null); + assertThat(secondClient, sameInstance(regetSecondClient)); + + var regetFirstClient = cache.getOrCreateClient(model, null); + assertThat(client, not(sameInstance(regetFirstClient))); + } + assertThat(cacheInstance.clientCount(), is(0)); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockMockClientCache.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockMockClientCache.java new file mode 100644 index 0000000000000..912967a9012d7 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockMockClientCache.java @@ -0,0 +1,62 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.amazonbedrock; + +import com.amazonaws.services.bedrockruntime.model.ConverseResult; +import com.amazonaws.services.bedrockruntime.model.InvokeModelResult; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockModel; + +import java.io.IOException; + +public class AmazonBedrockMockClientCache implements AmazonBedrockClientCache { + private ConverseResult converseResult = null; + private InvokeModelResult invokeModelResult = null; + private ElasticsearchException exceptionToThrow = null; + + public AmazonBedrockMockClientCache() {} + + public AmazonBedrockMockClientCache( + @Nullable ConverseResult converseResult, + @Nullable InvokeModelResult invokeModelResult, + @Nullable ElasticsearchException exceptionToThrow + ) { + this.converseResult = converseResult; + this.invokeModelResult = invokeModelResult; + this.exceptionToThrow = exceptionToThrow; + } + + @Override + public AmazonBedrockBaseClient getOrCreateClient(AmazonBedrockModel model, TimeValue timeout) { + var client = (AmazonBedrockMockInferenceClient) AmazonBedrockMockInferenceClient.create(model, timeout); + client.setConverseResult(converseResult); + client.setInvokeModelResult(invokeModelResult); + client.setExceptionToThrow(exceptionToThrow); + return client; + } + + @Override + public void close() throws IOException { + // nothing to do + } + + public void setConverseResult(ConverseResult converseResult) { + this.converseResult = converseResult; + } + + public void setInvokeModelResult(InvokeModelResult invokeModelResult) { + this.invokeModelResult = invokeModelResult; + } + + public void setExceptionToThrow(ElasticsearchException exceptionToThrow) { + this.exceptionToThrow = exceptionToThrow; + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockMockExecuteRequestSender.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockMockExecuteRequestSender.java new file mode 100644 index 0000000000000..b0df8a40e2551 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockMockExecuteRequestSender.java @@ -0,0 +1,80 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.amazonbedrock; + +import com.amazonaws.services.bedrockruntime.model.ConverseResult; +import com.amazonaws.services.bedrockruntime.model.InvokeModelResult; + +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.AmazonBedrockRequest; +import org.elasticsearch.xpack.inference.external.response.amazonbedrock.AmazonBedrockResponseHandler; +import org.elasticsearch.xpack.inference.logging.ThrottlerManager; + +import java.util.List; +import java.util.Queue; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.function.Supplier; + +public class AmazonBedrockMockExecuteRequestSender extends AmazonBedrockExecuteOnlyRequestSender { + + private Queue results = new ConcurrentLinkedQueue<>(); + private Queue> inputs = new ConcurrentLinkedQueue<>(); + private int sendCounter = 0; + + public AmazonBedrockMockExecuteRequestSender(AmazonBedrockClientCache clientCache, ThrottlerManager throttlerManager) { + super(clientCache, throttlerManager); + } + + public void enqueue(Object result) { + results.add(result); + } + + public int sendCount() { + return sendCounter; + } + + public List getInputs() { + return inputs.remove(); + } + + @Override + protected AmazonBedrockExecutor createExecutor( + AmazonBedrockRequest awsRequest, + AmazonBedrockResponseHandler awsResponse, + Logger logger, + Supplier hasRequestTimedOutFunction, + ActionListener listener + ) { + setCacheResult(); + return super.createExecutor(awsRequest, awsResponse, logger, hasRequestTimedOutFunction, listener); + } + + private void setCacheResult() { + var mockCache = (AmazonBedrockMockClientCache) this.clientCache; + var result = results.remove(); + if (result instanceof ConverseResult converseResult) { + mockCache.setConverseResult(converseResult); + return; + } + + if (result instanceof InvokeModelResult invokeModelResult) { + mockCache.setInvokeModelResult(invokeModelResult); + return; + } + + if (result instanceof ElasticsearchException exception) { + mockCache.setExceptionToThrow(exception); + return; + } + + throw new RuntimeException("Unknown result type: " + result.getClass()); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockMockInferenceClient.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockMockInferenceClient.java new file mode 100644 index 0000000000000..dcbf8dfcbff01 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockMockInferenceClient.java @@ -0,0 +1,133 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.amazonbedrock; + +import com.amazonaws.services.bedrockruntime.AmazonBedrockRuntimeAsync; +import com.amazonaws.services.bedrockruntime.model.ConverseResult; +import com.amazonaws.services.bedrockruntime.model.InvokeModelResult; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockModel; + +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; + +public class AmazonBedrockMockInferenceClient extends AmazonBedrockInferenceClient { + private ConverseResult converseResult = null; + private InvokeModelResult invokeModelResult = null; + private ElasticsearchException exceptionToThrow = null; + + private Future converseResultFuture = new MockConverseResultFuture(); + private Future invokeModelResultFuture = new MockInvokeResultFuture(); + + public static AmazonBedrockBaseClient create(AmazonBedrockModel model, @Nullable TimeValue timeout) { + return new AmazonBedrockMockInferenceClient(model, timeout); + } + + protected AmazonBedrockMockInferenceClient(AmazonBedrockModel model, @Nullable TimeValue timeout) { + super(model, timeout); + } + + public void setExceptionToThrow(ElasticsearchException exceptionToThrow) { + this.exceptionToThrow = exceptionToThrow; + } + + public void setConverseResult(ConverseResult result) { + this.converseResult = result; + } + + public void setInvokeModelResult(InvokeModelResult result) { + this.invokeModelResult = result; + } + + @Override + protected AmazonBedrockRuntimeAsync createAmazonBedrockClient(AmazonBedrockModel model, @Nullable TimeValue timeout) { + var runtimeClient = mock(AmazonBedrockRuntimeAsync.class); + doAnswer(invocation -> invokeModelResultFuture).when(runtimeClient).invokeModelAsync(any()); + doAnswer(invocation -> converseResultFuture).when(runtimeClient).converseAsync(any()); + + return runtimeClient; + } + + @Override + void close() {} + + private class MockConverseResultFuture implements Future { + @Override + public boolean cancel(boolean mayInterruptIfRunning) { + return false; + } + + @Override + public boolean isCancelled() { + return false; + } + + @Override + public boolean isDone() { + return false; + } + + @Override + public ConverseResult get() throws InterruptedException, ExecutionException { + if (exceptionToThrow != null) { + throw exceptionToThrow; + } + return converseResult; + } + + @Override + public ConverseResult get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException { + if (exceptionToThrow != null) { + throw exceptionToThrow; + } + return converseResult; + } + } + + private class MockInvokeResultFuture implements Future { + @Override + public boolean cancel(boolean mayInterruptIfRunning) { + return false; + } + + @Override + public boolean isCancelled() { + return false; + } + + @Override + public boolean isDone() { + return false; + } + + @Override + public InvokeModelResult get() throws InterruptedException, ExecutionException { + if (exceptionToThrow != null) { + throw exceptionToThrow; + } + return invokeModelResult; + } + + @Override + public InvokeModelResult get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException { + if (exceptionToThrow != null) { + throw exceptionToThrow; + } + return invokeModelResult; + } + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockMockRequestSender.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockMockRequestSender.java new file mode 100644 index 0000000000000..e68beaf4c1eb5 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockMockRequestSender.java @@ -0,0 +1,91 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.amazonbedrock; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; +import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; +import org.elasticsearch.xpack.inference.external.http.sender.RequestManager; +import org.elasticsearch.xpack.inference.external.http.sender.Sender; +import org.elasticsearch.xpack.inference.services.ServiceComponents; + +import java.io.IOException; +import java.util.List; +import java.util.Queue; +import java.util.concurrent.ConcurrentLinkedQueue; + +public class AmazonBedrockMockRequestSender implements Sender { + + public static class Factory extends AmazonBedrockRequestSender.Factory { + private final Sender sender; + + public Factory(ServiceComponents serviceComponents, ClusterService clusterService) { + super(serviceComponents, clusterService); + this.sender = new AmazonBedrockMockRequestSender(); + } + + public Sender createSender() { + return sender; + } + } + + private Queue results = new ConcurrentLinkedQueue<>(); + private Queue> inputs = new ConcurrentLinkedQueue<>(); + private int sendCounter = 0; + + public void enqueue(Object result) { + results.add(result); + } + + public int sendCount() { + return sendCounter; + } + + public List getInputs() { + return inputs.remove(); + } + + @Override + public void start() { + // do nothing + } + + @Override + public void send( + RequestManager requestCreator, + InferenceInputs inferenceInputs, + TimeValue timeout, + ActionListener listener + ) { + sendCounter++; + var docsInput = (DocumentsOnlyInput) inferenceInputs; + inputs.add(docsInput.getInputs()); + + if (results.isEmpty()) { + listener.onFailure(new ElasticsearchException("No results found")); + } else { + var resultObject = results.remove(); + if (resultObject instanceof InferenceServiceResults inferenceResult) { + listener.onResponse(inferenceResult); + } else if (resultObject instanceof Exception e) { + listener.onFailure(e); + } else { + throw new RuntimeException("Unknown result type: " + resultObject.getClass()); + } + } + } + + @Override + public void close() throws IOException { + // do nothing + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockRequestSenderTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockRequestSenderTests.java new file mode 100644 index 0000000000000..7fa8a09d5bf12 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/amazonbedrock/AmazonBedrockRequestSenderTests.java @@ -0,0 +1,127 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.amazonbedrock; + +import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xpack.inference.external.http.sender.AmazonBedrockChatCompletionRequestManager; +import org.elasticsearch.xpack.inference.external.http.sender.AmazonBedrockEmbeddingsRequestManager; +import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; +import org.elasticsearch.xpack.inference.external.http.sender.Sender; +import org.elasticsearch.xpack.inference.logging.ThrottlerManager; +import org.elasticsearch.xpack.inference.services.ServiceComponentsTests; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockProvider; +import org.elasticsearch.xpack.inference.services.amazonbedrock.completion.AmazonBedrockChatCompletionModelTests; +import org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings.AmazonBedrockEmbeddingsModelTests; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; + +import static org.elasticsearch.xpack.inference.Utils.inferenceUtilityPool; +import static org.elasticsearch.xpack.inference.Utils.mockClusterServiceEmpty; +import static org.elasticsearch.xpack.inference.external.amazonbedrock.AmazonBedrockExecutorTests.TEST_AMAZON_TITAN_EMBEDDINGS_RESULT; +import static org.elasticsearch.xpack.inference.results.ChatCompletionResultsTests.buildExpectationCompletion; +import static org.elasticsearch.xpack.inference.results.TextEmbeddingResultsTests.buildExpectationFloat; +import static org.hamcrest.Matchers.is; +import static org.mockito.Mockito.mock; + +public class AmazonBedrockRequestSenderTests extends ESTestCase { + private static final TimeValue TIMEOUT = new TimeValue(30, TimeUnit.SECONDS); + private ThreadPool threadPool; + private final AtomicReference threadRef = new AtomicReference<>(); + + @Before + public void init() throws Exception { + threadPool = createThreadPool(inferenceUtilityPool()); + threadRef.set(null); + } + + @After + public void shutdown() throws IOException, InterruptedException { + if (threadRef.get() != null) { + threadRef.get().join(TIMEOUT.millis()); + } + + terminate(threadPool); + } + + public void testCreateSender_SendsEmbeddingsRequestAndReceivesResponse() throws Exception { + var senderFactory = createSenderFactory(threadPool, Settings.EMPTY); + var requestSender = new AmazonBedrockMockExecuteRequestSender(new AmazonBedrockMockClientCache(), mock(ThrottlerManager.class)); + requestSender.enqueue(AmazonBedrockExecutorTests.getTestInvokeResult(TEST_AMAZON_TITAN_EMBEDDINGS_RESULT)); + try (var sender = createSender(senderFactory, requestSender)) { + sender.start(); + + var model = AmazonBedrockEmbeddingsModelTests.createModel( + "test_id", + "test_region", + "test_model", + AmazonBedrockProvider.AMAZONTITAN, + "accesskey", + "secretkey" + ); + + PlainActionFuture listener = new PlainActionFuture<>(); + var serviceComponents = ServiceComponentsTests.createWithEmptySettings(threadPool); + var requestManager = new AmazonBedrockEmbeddingsRequestManager( + model, + serviceComponents.truncator(), + threadPool, + new TimeValue(30, TimeUnit.SECONDS) + ); + sender.send(requestManager, new DocumentsOnlyInput(List.of("abc")), null, listener); + + var result = listener.actionGet(TIMEOUT); + assertThat(result.asMap(), is(buildExpectationFloat(List.of(new float[] { 0.123F, 0.456F, 0.678F, 0.789F })))); + } + } + + public void testCreateSender_SendsCompletionRequestAndReceivesResponse() throws Exception { + var senderFactory = createSenderFactory(threadPool, Settings.EMPTY); + var requestSender = new AmazonBedrockMockExecuteRequestSender(new AmazonBedrockMockClientCache(), mock(ThrottlerManager.class)); + requestSender.enqueue(AmazonBedrockExecutorTests.getTestConverseResult("test response text")); + try (var sender = createSender(senderFactory, requestSender)) { + sender.start(); + + var model = AmazonBedrockChatCompletionModelTests.createModel( + "test_id", + "test_region", + "test_model", + AmazonBedrockProvider.AMAZONTITAN, + "accesskey", + "secretkey" + ); + + PlainActionFuture listener = new PlainActionFuture<>(); + var requestManager = new AmazonBedrockChatCompletionRequestManager(model, threadPool, new TimeValue(30, TimeUnit.SECONDS)); + sender.send(requestManager, new DocumentsOnlyInput(List.of("abc")), null, listener); + + var result = listener.actionGet(TIMEOUT); + assertThat(result.asMap(), is(buildExpectationCompletion(List.of("test response text")))); + } + } + + public static AmazonBedrockRequestSender.Factory createSenderFactory(ThreadPool threadPool, Settings settings) { + return new AmazonBedrockRequestSender.Factory( + ServiceComponentsTests.createWithSettings(threadPool, settings), + mockClusterServiceEmpty() + ); + } + + public static Sender createSender(AmazonBedrockRequestSender.Factory factory, AmazonBedrockExecuteOnlyRequestSender requestSender) { + return factory.createSender(requestSender); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockAI21LabsCompletionRequestEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockAI21LabsCompletionRequestEntityTests.java new file mode 100644 index 0000000000000..b91aab5410048 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockAI21LabsCompletionRequestEntityTests.java @@ -0,0 +1,70 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion; + +import org.elasticsearch.test.ESTestCase; + +import java.util.List; + +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHasMessage; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyMaxTokensInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyTemperatureInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyTopKInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyTopPInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveMaxTokensInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveTemperatureInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveTopPInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.getConverseRequest; +import static org.hamcrest.Matchers.is; + +public class AmazonBedrockAI21LabsCompletionRequestEntityTests extends ESTestCase { + public void testRequestEntity_CreatesProperRequest() { + var request = new AmazonBedrockAI21LabsCompletionRequestEntity(List.of("test message"), null, null, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } + + public void testRequestEntity_CreatesProperRequest_WithTemperature() { + var request = new AmazonBedrockAI21LabsCompletionRequestEntity(List.of("test message"), 1.0, null, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertTrue(doesConverseRequestHaveTemperatureInput(builtRequest, 1.0)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } + + public void testRequestEntity_CreatesProperRequest_WithTopP() { + var request = new AmazonBedrockAI21LabsCompletionRequestEntity(List.of("test message"), null, 1.0, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertTrue(doesConverseRequestHaveTopPInput(builtRequest, 1.0)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } + + public void testRequestEntity_CreatesProperRequest_WithMaxTokens() { + var request = new AmazonBedrockAI21LabsCompletionRequestEntity(List.of("test message"), null, null, 128); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertTrue(doesConverseRequestHaveMaxTokensInput(builtRequest, 128)); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockAnthropicCompletionRequestEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockAnthropicCompletionRequestEntityTests.java new file mode 100644 index 0000000000000..89d5fec7efba6 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockAnthropicCompletionRequestEntityTests.java @@ -0,0 +1,82 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion; + +import org.elasticsearch.test.ESTestCase; + +import java.util.List; + +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHasMessage; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyMaxTokensInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyTemperatureInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyTopKInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyTopPInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveMaxTokensInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveTemperatureInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveTopKInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveTopPInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.getConverseRequest; +import static org.hamcrest.Matchers.is; + +public class AmazonBedrockAnthropicCompletionRequestEntityTests extends ESTestCase { + public void testRequestEntity_CreatesProperRequest() { + var request = new AmazonBedrockAnthropicCompletionRequestEntity(List.of("test message"), null, null, null, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } + + public void testRequestEntity_CreatesProperRequest_WithTemperature() { + var request = new AmazonBedrockAnthropicCompletionRequestEntity(List.of("test message"), 1.0, null, null, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertTrue(doesConverseRequestHaveTemperatureInput(builtRequest, 1.0)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } + + public void testRequestEntity_CreatesProperRequest_WithTopP() { + var request = new AmazonBedrockAnthropicCompletionRequestEntity(List.of("test message"), null, 1.0, null, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertTrue(doesConverseRequestHaveTopPInput(builtRequest, 1.0)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } + + public void testRequestEntity_CreatesProperRequest_WithMaxTokens() { + var request = new AmazonBedrockAnthropicCompletionRequestEntity(List.of("test message"), null, null, null, 128); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertTrue(doesConverseRequestHaveMaxTokensInput(builtRequest, 128)); + } + + public void testRequestEntity_CreatesProperRequest_WithTopK() { + var request = new AmazonBedrockAnthropicCompletionRequestEntity(List.of("test message"), null, null, 1.0, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertTrue(doesConverseRequestHaveTopKInput(builtRequest, 1.0)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockCohereCompletionRequestEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockCohereCompletionRequestEntityTests.java new file mode 100644 index 0000000000000..8df5c7f32e529 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockCohereCompletionRequestEntityTests.java @@ -0,0 +1,82 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion; + +import org.elasticsearch.test.ESTestCase; + +import java.util.List; + +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHasMessage; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyMaxTokensInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyTemperatureInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyTopKInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyTopPInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveMaxTokensInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveTemperatureInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveTopKInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveTopPInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.getConverseRequest; +import static org.hamcrest.Matchers.is; + +public class AmazonBedrockCohereCompletionRequestEntityTests extends ESTestCase { + public void testRequestEntity_CreatesProperRequest() { + var request = new AmazonBedrockCohereCompletionRequestEntity(List.of("test message"), null, null, null, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } + + public void testRequestEntity_CreatesProperRequest_WithTemperature() { + var request = new AmazonBedrockCohereCompletionRequestEntity(List.of("test message"), 1.0, null, null, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertTrue(doesConverseRequestHaveTemperatureInput(builtRequest, 1.0)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } + + public void testRequestEntity_CreatesProperRequest_WithTopP() { + var request = new AmazonBedrockCohereCompletionRequestEntity(List.of("test message"), null, 1.0, null, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertTrue(doesConverseRequestHaveTopPInput(builtRequest, 1.0)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } + + public void testRequestEntity_CreatesProperRequest_WithMaxTokens() { + var request = new AmazonBedrockCohereCompletionRequestEntity(List.of("test message"), null, null, null, 128); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertTrue(doesConverseRequestHaveMaxTokensInput(builtRequest, 128)); + } + + public void testRequestEntity_CreatesProperRequest_WithTopK() { + var request = new AmazonBedrockCohereCompletionRequestEntity(List.of("test message"), null, null, 1.0, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertTrue(doesConverseRequestHaveTopKInput(builtRequest, 1.0)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockConverseRequestUtils.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockConverseRequestUtils.java new file mode 100644 index 0000000000000..cbbe3c5554967 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockConverseRequestUtils.java @@ -0,0 +1,94 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion; + +import com.amazonaws.services.bedrockruntime.model.ContentBlock; +import com.amazonaws.services.bedrockruntime.model.ConverseRequest; +import com.amazonaws.services.bedrockruntime.model.Message; + +import org.elasticsearch.core.Strings; + +public final class AmazonBedrockConverseRequestUtils { + public static ConverseRequest getConverseRequest(String modelId, AmazonBedrockConverseRequestEntity requestEntity) { + var converseRequest = new ConverseRequest().withModelId(modelId); + converseRequest = requestEntity.addMessages(converseRequest); + converseRequest = requestEntity.addInferenceConfig(converseRequest); + converseRequest = requestEntity.addAdditionalModelFields(converseRequest); + return converseRequest; + } + + public static boolean doesConverseRequestHasMessage(ConverseRequest converseRequest, String expectedMessage) { + for (Message message : converseRequest.getMessages()) { + var content = message.getContent(); + for (ContentBlock contentBlock : content) { + if (contentBlock.getText().equals(expectedMessage)) { + return true; + } + } + } + return false; + } + + public static boolean doesConverseRequestHaveAnyTemperatureInput(ConverseRequest converseRequest) { + return converseRequest.getInferenceConfig() != null + && converseRequest.getInferenceConfig().getTemperature() != null + && (converseRequest.getInferenceConfig().getTemperature().isNaN() == false); + } + + public static boolean doesConverseRequestHaveAnyTopPInput(ConverseRequest converseRequest) { + return converseRequest.getInferenceConfig() != null + && converseRequest.getInferenceConfig().getTopP() != null + && (converseRequest.getInferenceConfig().getTopP().isNaN() == false); + } + + public static boolean doesConverseRequestHaveAnyMaxTokensInput(ConverseRequest converseRequest) { + return converseRequest.getInferenceConfig() != null && converseRequest.getInferenceConfig().getMaxTokens() != null; + } + + public static boolean doesConverseRequestHaveTemperatureInput(ConverseRequest converseRequest, Double temperature) { + return doesConverseRequestHaveAnyTemperatureInput(converseRequest) + && converseRequest.getInferenceConfig().getTemperature().equals(temperature.floatValue()); + } + + public static boolean doesConverseRequestHaveTopPInput(ConverseRequest converseRequest, Double topP) { + return doesConverseRequestHaveAnyTopPInput(converseRequest) + && converseRequest.getInferenceConfig().getTopP().equals(topP.floatValue()); + } + + public static boolean doesConverseRequestHaveMaxTokensInput(ConverseRequest converseRequest, Integer maxTokens) { + return doesConverseRequestHaveAnyMaxTokensInput(converseRequest) + && converseRequest.getInferenceConfig().getMaxTokens().equals(maxTokens); + } + + public static boolean doesConverseRequestHaveAnyTopKInput(ConverseRequest converseRequest) { + if (converseRequest.getAdditionalModelResponseFieldPaths() == null) { + return false; + } + + for (String fieldPath : converseRequest.getAdditionalModelResponseFieldPaths()) { + if (fieldPath.contains("{\"top_k\":")) { + return true; + } + } + return false; + } + + public static boolean doesConverseRequestHaveTopKInput(ConverseRequest converseRequest, Double topK) { + if (doesConverseRequestHaveAnyTopKInput(converseRequest) == false) { + return false; + } + + var checkString = Strings.format("{\"top_k\":%f}", topK.floatValue()); + for (String fieldPath : converseRequest.getAdditionalModelResponseFieldPaths()) { + if (fieldPath.contains(checkString)) { + return true; + } + } + return false; + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockMetaCompletionRequestEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockMetaCompletionRequestEntityTests.java new file mode 100644 index 0000000000000..fa482669a0bb2 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockMetaCompletionRequestEntityTests.java @@ -0,0 +1,70 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion; + +import org.elasticsearch.test.ESTestCase; + +import java.util.List; + +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHasMessage; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyMaxTokensInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyTemperatureInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyTopKInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyTopPInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveMaxTokensInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveTemperatureInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveTopPInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.getConverseRequest; +import static org.hamcrest.Matchers.is; + +public class AmazonBedrockMetaCompletionRequestEntityTests extends ESTestCase { + public void testRequestEntity_CreatesProperRequest() { + var request = new AmazonBedrockMetaCompletionRequestEntity(List.of("test message"), null, null, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } + + public void testRequestEntity_CreatesProperRequest_WithTemperature() { + var request = new AmazonBedrockMetaCompletionRequestEntity(List.of("test message"), 1.0, null, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertTrue(doesConverseRequestHaveTemperatureInput(builtRequest, 1.0)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } + + public void testRequestEntity_CreatesProperRequest_WithTopP() { + var request = new AmazonBedrockMetaCompletionRequestEntity(List.of("test message"), null, 1.0, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertTrue(doesConverseRequestHaveTopPInput(builtRequest, 1.0)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } + + public void testRequestEntity_CreatesProperRequest_WithMaxTokens() { + var request = new AmazonBedrockMetaCompletionRequestEntity(List.of("test message"), null, null, 128); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertTrue(doesConverseRequestHaveMaxTokensInput(builtRequest, 128)); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockMistralCompletionRequestEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockMistralCompletionRequestEntityTests.java new file mode 100644 index 0000000000000..788625d3702b8 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockMistralCompletionRequestEntityTests.java @@ -0,0 +1,82 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion; + +import org.elasticsearch.test.ESTestCase; + +import java.util.List; + +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHasMessage; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyMaxTokensInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyTemperatureInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyTopKInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyTopPInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveMaxTokensInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveTemperatureInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveTopKInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveTopPInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.getConverseRequest; +import static org.hamcrest.Matchers.is; + +public class AmazonBedrockMistralCompletionRequestEntityTests extends ESTestCase { + public void testRequestEntity_CreatesProperRequest() { + var request = new AmazonBedrockMistralCompletionRequestEntity(List.of("test message"), null, null, null, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } + + public void testRequestEntity_CreatesProperRequest_WithTemperature() { + var request = new AmazonBedrockMistralCompletionRequestEntity(List.of("test message"), 1.0, null, null, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertTrue(doesConverseRequestHaveTemperatureInput(builtRequest, 1.0)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } + + public void testRequestEntity_CreatesProperRequest_WithTopP() { + var request = new AmazonBedrockMistralCompletionRequestEntity(List.of("test message"), null, 1.0, null, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertTrue(doesConverseRequestHaveTopPInput(builtRequest, 1.0)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } + + public void testRequestEntity_CreatesProperRequest_WithMaxTokens() { + var request = new AmazonBedrockMistralCompletionRequestEntity(List.of("test message"), null, null, null, 128); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertTrue(doesConverseRequestHaveMaxTokensInput(builtRequest, 128)); + } + + public void testRequestEntity_CreatesProperRequest_WithTopK() { + var request = new AmazonBedrockMistralCompletionRequestEntity(List.of("test message"), null, null, 1.0, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertTrue(doesConverseRequestHaveTopKInput(builtRequest, 1.0)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockTitanCompletionRequestEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockTitanCompletionRequestEntityTests.java new file mode 100644 index 0000000000000..79fa387876c8b --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/completion/AmazonBedrockTitanCompletionRequestEntityTests.java @@ -0,0 +1,70 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion; + +import org.elasticsearch.test.ESTestCase; + +import java.util.List; + +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHasMessage; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyMaxTokensInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyTemperatureInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyTopKInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveAnyTopPInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveMaxTokensInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveTemperatureInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.doesConverseRequestHaveTopPInput; +import static org.elasticsearch.xpack.inference.external.request.amazonbedrock.completion.AmazonBedrockConverseRequestUtils.getConverseRequest; +import static org.hamcrest.Matchers.is; + +public class AmazonBedrockTitanCompletionRequestEntityTests extends ESTestCase { + public void testRequestEntity_CreatesProperRequest() { + var request = new AmazonBedrockTitanCompletionRequestEntity(List.of("test message"), null, null, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } + + public void testRequestEntity_CreatesProperRequest_WithTemperature() { + var request = new AmazonBedrockTitanCompletionRequestEntity(List.of("test message"), 1.0, null, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertTrue(doesConverseRequestHaveTemperatureInput(builtRequest, 1.0)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } + + public void testRequestEntity_CreatesProperRequest_WithTopP() { + var request = new AmazonBedrockTitanCompletionRequestEntity(List.of("test message"), null, 1.0, null); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertTrue(doesConverseRequestHaveTopPInput(builtRequest, 1.0)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyMaxTokensInput(builtRequest)); + } + + public void testRequestEntity_CreatesProperRequest_WithMaxTokens() { + var request = new AmazonBedrockTitanCompletionRequestEntity(List.of("test message"), null, null, 128); + var builtRequest = getConverseRequest("testmodel", request); + assertThat(builtRequest.getModelId(), is("testmodel")); + assertThat(doesConverseRequestHasMessage(builtRequest, "test message"), is(true)); + assertFalse(doesConverseRequestHaveAnyTemperatureInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopPInput(builtRequest)); + assertFalse(doesConverseRequestHaveAnyTopKInput(builtRequest)); + assertTrue(doesConverseRequestHaveMaxTokensInput(builtRequest, 128)); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/embeddings/AmazonBedrockCohereEmbeddingsRequestEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/embeddings/AmazonBedrockCohereEmbeddingsRequestEntityTests.java new file mode 100644 index 0000000000000..fd8114f889d6a --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/embeddings/AmazonBedrockCohereEmbeddingsRequestEntityTests.java @@ -0,0 +1,25 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.embeddings; + +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.AmazonBedrockJsonBuilder; + +import java.io.IOException; +import java.util.List; + +import static org.hamcrest.Matchers.is; + +public class AmazonBedrockCohereEmbeddingsRequestEntityTests extends ESTestCase { + public void testRequestEntity_GeneratesExpectedJsonBody() throws IOException { + var entity = new AmazonBedrockCohereEmbeddingsRequestEntity(List.of("test input")); + var builder = new AmazonBedrockJsonBuilder(entity); + var result = builder.getStringContent(); + assertThat(result, is("{\"texts\":[\"test input\"],\"input_type\":\"search_document\"}")); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/embeddings/AmazonBedrockTitanEmbeddingsRequestEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/embeddings/AmazonBedrockTitanEmbeddingsRequestEntityTests.java new file mode 100644 index 0000000000000..da98fa251fdc8 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/amazonbedrock/embeddings/AmazonBedrockTitanEmbeddingsRequestEntityTests.java @@ -0,0 +1,24 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.external.request.amazonbedrock.embeddings; + +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.inference.external.request.amazonbedrock.AmazonBedrockJsonBuilder; + +import java.io.IOException; + +import static org.hamcrest.Matchers.is; + +public class AmazonBedrockTitanEmbeddingsRequestEntityTests extends ESTestCase { + public void testRequestEntity_GeneratesExpectedJsonBody() throws IOException { + var entity = new AmazonBedrockTitanEmbeddingsRequestEntity("test input"); + var builder = new AmazonBedrockJsonBuilder(entity); + var result = builder.getStringContent(); + assertThat(result, is("{\"inputText\":\"test input\"}")); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContextTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContextTests.java index 50d91a2271de6..2e9be42b5c5d4 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContextTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankFeaturePhaseRankCoordinatorContextTests.java @@ -12,7 +12,7 @@ import org.elasticsearch.inference.TaskType; import org.elasticsearch.search.rank.feature.RankFeatureDoc; import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.xpack.core.inference.action.InferenceAction; +import org.elasticsearch.xpack.core.inference.action.GetInferenceModelAction; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.argThat; @@ -54,10 +54,9 @@ public void onFailure(Exception e) { fail(); } }); - verify(mockClient).execute( - eq(InferenceAction.INSTANCE), - argThat(actionRequest -> ((InferenceAction.Request) actionRequest).getTaskType().equals(TaskType.RERANK)), + eq(GetInferenceModelAction.INSTANCE), + argThat(actionRequest -> ((GetInferenceModelAction.Request) actionRequest).getTaskType().equals(TaskType.RERANK)), any() ); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankTests.java index 8cb9305edd057..a26dc50097cf5 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankTests.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.inference.rank.textsimilarity; +import org.elasticsearch.action.search.SearchPhaseExecutionException; import org.elasticsearch.client.internal.Client; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.inference.InputType; @@ -29,22 +30,46 @@ import java.util.Objects; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; public class TextSimilarityRankTests extends ESSingleNodeTestCase { /** - * {@code TextSimilarityRankBuilder} that simulates an inference call that returns a different number of results as the input. + * {@code TextSimilarityRankBuilder} that sets top_n in the inference endpoint's task settings. + * See {@code TextSimilarityTestPlugin -> TestFilter -> handleGetInferenceModelActionRequest} for the logic that extracts the top_n + * value. */ - public static class InvalidInferenceResultCountProvidingTextSimilarityRankBuilder extends TextSimilarityRankBuilder { + public static class TopNConfigurationAcceptingTextSimilarityRankBuilder extends TextSimilarityRankBuilder { - public InvalidInferenceResultCountProvidingTextSimilarityRankBuilder( + public TopNConfigurationAcceptingTextSimilarityRankBuilder( String field, String inferenceId, String inferenceText, int rankWindowSize, - Float minScore + Float minScore, + int topN + ) { + super(field, inferenceId + "-task-settings-top-" + topN, inferenceText, rankWindowSize, minScore); + } + } + + /** + * {@code TextSimilarityRankBuilder} that simulates an inference call returning N results. + */ + public static class InferenceResultCountAcceptingTextSimilarityRankBuilder extends TextSimilarityRankBuilder { + + private final int inferenceResultCount; + + public InferenceResultCountAcceptingTextSimilarityRankBuilder( + String field, + String inferenceId, + String inferenceText, + int rankWindowSize, + Float minScore, + int inferenceResultCount ) { super(field, inferenceId, inferenceText, rankWindowSize, minScore); + this.inferenceResultCount = inferenceResultCount; } @Override @@ -62,10 +87,10 @@ public RankFeaturePhaseRankCoordinatorContext buildRankFeaturePhaseCoordinatorCo protected InferenceAction.Request generateRequest(List docFeatures) { return new InferenceAction.Request( TaskType.RERANK, - inferenceId, + this.inferenceId, inferenceText, docFeatures, - Map.of("invalidInferenceResultCount", true), + Map.of("inferenceResultCount", inferenceResultCount), InputType.SEARCH, InferenceAction.Request.DEFAULT_TIMEOUT ); @@ -151,18 +176,38 @@ public void testRerankInferenceFailure() { ); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/110398") - public void testRerankInferenceResultMismatch() { - ElasticsearchAssertions.assertFailures( + public void testRerankTopNConfigurationAndRankWindowSizeMismatch() { + SearchPhaseExecutionException ex = expectThrows( + SearchPhaseExecutionException.class, // Execute search with text similarity reranking client.prepareSearch() .setRankBuilder( - new InvalidInferenceResultCountProvidingTextSimilarityRankBuilder("text", "my-rerank-model", "my query", 100, 1.5f) + // Simulate reranker configuration with top_n=3 in task_settings, which is different from rank_window_size=10 + // (Note: top_n comes from inferenceId, there's no other easy way of passing this to the mocked get model request) + new TopNConfigurationAcceptingTextSimilarityRankBuilder("text", "my-rerank-model", "my query", 100, 1.5f, 3) ) - .setQuery(QueryBuilders.matchAllQuery()), - RestStatus.INTERNAL_SERVER_ERROR, - containsString("Failed to execute phase [rank-feature], Computing updated ranks for results failed") + .setQuery(QueryBuilders.matchAllQuery()) + ); + assertThat(ex.status(), equalTo(RestStatus.BAD_REQUEST)); + assertThat( + ex.getDetailedMessage(), + containsString("Reduce rank_window_size to be less than or equal to the configured top N value") + ); + } + + public void testRerankInputSizeAndInferenceResultsMismatch() { + SearchPhaseExecutionException ex = expectThrows( + SearchPhaseExecutionException.class, + // Execute search with text similarity reranking + client.prepareSearch() + .setRankBuilder( + // Simulate reranker returning different number of results from input + new InferenceResultCountAcceptingTextSimilarityRankBuilder("text", "my-rerank-model", "my query", 100, 1.5f, 4) + ) + .setQuery(QueryBuilders.matchAllQuery()) ); + assertThat(ex.status(), equalTo(RestStatus.INTERNAL_SERVER_ERROR)); + assertThat(ex.getDetailedMessage(), containsString("Reranker input document count and returned score count mismatch")); } private static void assertHitHasRankScoreAndText(SearchHit hit, int expectedRank, float expectedScore, String expectedText) { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityTestPlugin.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityTestPlugin.java index 1e457a1a27c92..de81d13f92ea5 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityTestPlugin.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityTestPlugin.java @@ -21,7 +21,9 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.inference.EmptyTaskSettings; import org.elasticsearch.inference.InputType; +import org.elasticsearch.inference.ModelConfigurations; import org.elasticsearch.inference.TaskType; import org.elasticsearch.plugins.ActionPlugin; import org.elasticsearch.plugins.Plugin; @@ -39,8 +41,13 @@ import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xpack.core.inference.action.GetInferenceModelAction; import org.elasticsearch.xpack.core.inference.action.InferenceAction; import org.elasticsearch.xpack.core.inference.results.RankedDocsResults; +import org.elasticsearch.xpack.inference.services.cohere.CohereService; +import org.elasticsearch.xpack.inference.services.cohere.CohereServiceSettings; +import org.elasticsearch.xpack.inference.services.cohere.rerank.CohereRerankServiceSettings; +import org.elasticsearch.xpack.inference.services.cohere.rerank.CohereRerankTaskSettings; import java.io.IOException; import java.util.ArrayList; @@ -48,6 +55,8 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import static java.util.Collections.singletonList; import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; @@ -100,7 +109,6 @@ public int order() { } @Override - @SuppressWarnings("unchecked") public void apply( Task task, String action, @@ -108,23 +116,59 @@ public void app ActionListener listener, ActionFilterChain chain ) { - // For any other action than inference, execute normally - if (action.equals(InferenceAction.INSTANCE.name()) == false) { + if (action.equals(GetInferenceModelAction.INSTANCE.name())) { + assert request instanceof GetInferenceModelAction.Request; + handleGetInferenceModelActionRequest((GetInferenceModelAction.Request) request, listener); + } else if (action.equals(InferenceAction.INSTANCE.name())) { + assert request instanceof InferenceAction.Request; + handleInferenceActionRequest((InferenceAction.Request) request, listener); + } else { + // For any other action than get model and inference, execute normally chain.proceed(task, action, request, listener); - return; } + } - assert request instanceof InferenceAction.Request; - boolean shouldThrow = (boolean) ((InferenceAction.Request) request).getTaskSettings().getOrDefault("throwing", false); - boolean hasInvalidInferenceResultCount = (boolean) ((InferenceAction.Request) request).getTaskSettings() - .getOrDefault("invalidInferenceResultCount", false); + @SuppressWarnings("unchecked") + private void handleGetInferenceModelActionRequest( + GetInferenceModelAction.Request request, + ActionListener listener + ) { + String inferenceEntityId = request.getInferenceEntityId(); + Integer topN = null; + Matcher extractTopN = Pattern.compile(".*(task-settings-top-\\d+).*").matcher(inferenceEntityId); + if (extractTopN.find()) { + topN = Integer.parseInt(extractTopN.group(1).replaceAll("\\D", "")); + } + + ActionResponse response = new GetInferenceModelAction.Response( + List.of( + new ModelConfigurations( + request.getInferenceEntityId(), + request.getTaskType(), + CohereService.NAME, + new CohereRerankServiceSettings(new CohereServiceSettings()), + topN == null ? new EmptyTaskSettings() : new CohereRerankTaskSettings(topN, null, null) + ) + ) + ); + listener.onResponse((Response) response); + } + + @SuppressWarnings("unchecked") + private void handleInferenceActionRequest( + InferenceAction.Request request, + ActionListener listener + ) { + Map taskSettings = request.getTaskSettings(); + boolean shouldThrow = (boolean) taskSettings.getOrDefault("throwing", false); + Integer inferenceResultCount = (Integer) taskSettings.get("inferenceResultCount"); if (shouldThrow) { listener.onFailure(new UnsupportedOperationException("simulated failure")); } else { List rankedDocsResults = new ArrayList<>(); - List inputs = ((InferenceAction.Request) request).getInput(); - int resultCount = hasInvalidInferenceResultCount ? inputs.size() - 1 : inputs.size(); + List inputs = request.getInput(); + int resultCount = inferenceResultCount == null ? inputs.size() : inferenceResultCount; for (int i = 0; i < resultCount; i++) { rankedDocsResults.add(new RankedDocsResults.RankedDoc(i, Float.parseFloat(inputs.get(i)), inputs.get(i))); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockSecretSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockSecretSettingsTests.java new file mode 100644 index 0000000000000..904851842a6c8 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockSecretSettingsTests.java @@ -0,0 +1,120 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.settings.SecureString; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.core.ml.AbstractBWCWireSerializationTestCase; +import org.hamcrest.CoreMatchers; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.ACCESS_KEY_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.SECRET_KEY_FIELD; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.is; + +public class AmazonBedrockSecretSettingsTests extends AbstractBWCWireSerializationTestCase { + + public void testIt_CreatesSettings_ReturnsNullFromMap_null() { + var secrets = AmazonBedrockSecretSettings.fromMap(null); + assertNull(secrets); + } + + public void testIt_CreatesSettings_FromMap_WithValues() { + var secrets = AmazonBedrockSecretSettings.fromMap( + new HashMap<>(Map.of(ACCESS_KEY_FIELD, "accesstest", SECRET_KEY_FIELD, "secrettest")) + ); + assertThat( + secrets, + is(new AmazonBedrockSecretSettings(new SecureString("accesstest".toCharArray()), new SecureString("secrettest".toCharArray()))) + ); + } + + public void testIt_CreatesSettings_FromMap_IgnoresExtraKeys() { + var secrets = AmazonBedrockSecretSettings.fromMap( + new HashMap<>(Map.of(ACCESS_KEY_FIELD, "accesstest", SECRET_KEY_FIELD, "secrettest", "extrakey", "extravalue")) + ); + assertThat( + secrets, + is(new AmazonBedrockSecretSettings(new SecureString("accesstest".toCharArray()), new SecureString("secrettest".toCharArray()))) + ); + } + + public void testIt_FromMap_ThrowsValidationException_AccessKeyMissing() { + var thrownException = expectThrows( + ValidationException.class, + () -> AmazonBedrockSecretSettings.fromMap(new HashMap<>(Map.of(SECRET_KEY_FIELD, "secrettest"))) + ); + + assertThat( + thrownException.getMessage(), + containsString(Strings.format("[secret_settings] does not contain the required setting [%s]", ACCESS_KEY_FIELD)) + ); + } + + public void testIt_FromMap_ThrowsValidationException_SecretKeyMissing() { + var thrownException = expectThrows( + ValidationException.class, + () -> AmazonBedrockSecretSettings.fromMap(new HashMap<>(Map.of(ACCESS_KEY_FIELD, "accesstest"))) + ); + + assertThat( + thrownException.getMessage(), + containsString(Strings.format("[secret_settings] does not contain the required setting [%s]", SECRET_KEY_FIELD)) + ); + } + + public void testToXContent_CreatesProperContent() throws IOException { + var secrets = AmazonBedrockSecretSettings.fromMap( + new HashMap<>(Map.of(ACCESS_KEY_FIELD, "accesstest", SECRET_KEY_FIELD, "secrettest")) + ); + + XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); + secrets.toXContent(builder, null); + String xContentResult = Strings.toString(builder); + assertThat(xContentResult, CoreMatchers.is(""" + {"access_key":"accesstest","secret_key":"secrettest"}""")); + } + + public static Map getAmazonBedrockSecretSettingsMap(String accessKey, String secretKey) { + return new HashMap(Map.of(ACCESS_KEY_FIELD, accessKey, SECRET_KEY_FIELD, secretKey)); + } + + @Override + protected AmazonBedrockSecretSettings mutateInstanceForVersion(AmazonBedrockSecretSettings instance, TransportVersion version) { + return instance; + } + + @Override + protected Writeable.Reader instanceReader() { + return AmazonBedrockSecretSettings::new; + } + + @Override + protected AmazonBedrockSecretSettings createTestInstance() { + return createRandom(); + } + + @Override + protected AmazonBedrockSecretSettings mutateInstance(AmazonBedrockSecretSettings instance) throws IOException { + return randomValueOtherThan(instance, AmazonBedrockSecretSettingsTests::createRandom); + } + + private static AmazonBedrockSecretSettings createRandom() { + return new AmazonBedrockSecretSettings(new SecureString(randomAlphaOfLength(10)), new SecureString(randomAlphaOfLength(10))); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceTests.java new file mode 100644 index 0000000000000..ae413fc17425c --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceTests.java @@ -0,0 +1,1136 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.inference.ChunkedInferenceServiceResults; +import org.elasticsearch.inference.ChunkingOptions; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.inference.InputType; +import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.ModelConfigurations; +import org.elasticsearch.inference.ModelSecrets; +import org.elasticsearch.inference.SimilarityMeasure; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xpack.core.inference.action.InferenceAction; +import org.elasticsearch.xpack.core.inference.results.ChatCompletionResults; +import org.elasticsearch.xpack.core.inference.results.InferenceChunkedTextEmbeddingFloatResults; +import org.elasticsearch.xpack.core.inference.results.InferenceTextEmbeddingFloatResults; +import org.elasticsearch.xpack.inference.Utils; +import org.elasticsearch.xpack.inference.external.amazonbedrock.AmazonBedrockMockRequestSender; +import org.elasticsearch.xpack.inference.external.amazonbedrock.AmazonBedrockRequestSender; +import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; +import org.elasticsearch.xpack.inference.external.http.sender.Sender; +import org.elasticsearch.xpack.inference.services.ServiceComponentsTests; +import org.elasticsearch.xpack.inference.services.amazonbedrock.completion.AmazonBedrockChatCompletionModel; +import org.elasticsearch.xpack.inference.services.amazonbedrock.completion.AmazonBedrockChatCompletionModelTests; +import org.elasticsearch.xpack.inference.services.amazonbedrock.completion.AmazonBedrockChatCompletionServiceSettings; +import org.elasticsearch.xpack.inference.services.amazonbedrock.completion.AmazonBedrockChatCompletionTaskSettings; +import org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings.AmazonBedrockEmbeddingsModel; +import org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings.AmazonBedrockEmbeddingsModelTests; +import org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings.AmazonBedrockEmbeddingsServiceSettings; +import org.hamcrest.CoreMatchers; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +import static org.elasticsearch.xpack.inference.Utils.getInvalidModel; +import static org.elasticsearch.xpack.inference.Utils.inferenceUtilityPool; +import static org.elasticsearch.xpack.inference.Utils.mockClusterServiceEmpty; +import static org.elasticsearch.xpack.inference.results.ChatCompletionResultsTests.buildExpectationCompletion; +import static org.elasticsearch.xpack.inference.results.TextEmbeddingResultsTests.buildExpectationFloat; +import static org.elasticsearch.xpack.inference.services.ServiceComponentsTests.createWithEmptySettings; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockSecretSettingsTests.getAmazonBedrockSecretSettingsMap; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.completion.AmazonBedrockChatCompletionServiceSettingsTests.createChatCompletionRequestSettingsMap; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.completion.AmazonBedrockChatCompletionTaskSettingsTests.getChatCompletionTaskSettingsMap; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings.AmazonBedrockEmbeddingsServiceSettingsTests.createEmbeddingsRequestSettingsMap; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.instanceOf; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; +import static org.mockito.Mockito.when; + +public class AmazonBedrockServiceTests extends ESTestCase { + private static final TimeValue TIMEOUT = new TimeValue(30, TimeUnit.SECONDS); + private ThreadPool threadPool; + + @Before + public void init() throws Exception { + threadPool = createThreadPool(inferenceUtilityPool()); + } + + @After + public void shutdown() throws IOException { + terminate(threadPool); + } + + public void testParseRequestConfig_CreatesAnAmazonBedrockModel() throws IOException { + try (var service = createAmazonBedrockService()) { + ActionListener modelVerificationListener = ActionListener.wrap(model -> { + assertThat(model, instanceOf(AmazonBedrockEmbeddingsModel.class)); + + var settings = (AmazonBedrockEmbeddingsServiceSettings) model.getServiceSettings(); + assertThat(settings.region(), is("region")); + assertThat(settings.model(), is("model")); + assertThat(settings.provider(), is(AmazonBedrockProvider.AMAZONTITAN)); + var secretSettings = (AmazonBedrockSecretSettings) model.getSecretSettings(); + assertThat(secretSettings.accessKey.toString(), is("access")); + assertThat(secretSettings.secretKey.toString(), is("secret")); + }, exception -> fail("Unexpected exception: " + exception)); + + service.parseRequestConfig( + "id", + TaskType.TEXT_EMBEDDING, + getRequestConfigMap( + createEmbeddingsRequestSettingsMap("region", "model", "amazontitan", null, null, null, null), + Map.of(), + getAmazonBedrockSecretSettingsMap("access", "secret") + ), + Set.of(), + modelVerificationListener + ); + } + } + + public void testParseRequestConfig_ThrowsUnsupportedModelType() throws IOException { + try (var service = createAmazonBedrockService()) { + ActionListener modelVerificationListener = ActionListener.wrap( + model -> fail("Expected exception, but got model: " + model), + exception -> { + assertThat(exception, instanceOf(ElasticsearchStatusException.class)); + assertThat(exception.getMessage(), is("The [amazonbedrock] service does not support task type [sparse_embedding]")); + } + ); + + service.parseRequestConfig( + "id", + TaskType.SPARSE_EMBEDDING, + getRequestConfigMap( + createEmbeddingsRequestSettingsMap("region", "model", "amazontitan", null, false, null, null), + Map.of(), + getAmazonBedrockSecretSettingsMap("access", "secret") + ), + Set.of(), + modelVerificationListener + ); + } + } + + public void testCreateModel_ForEmbeddingsTask_InvalidProvider() throws IOException { + try (var service = createAmazonBedrockService()) { + ActionListener modelVerificationListener = ActionListener.wrap( + model -> fail("Expected exception, but got model: " + model), + exception -> { + assertThat(exception, instanceOf(ElasticsearchStatusException.class)); + assertThat(exception.getMessage(), is("The [text_embedding] task type for provider [anthropic] is not available")); + } + ); + + service.parseRequestConfig( + "id", + TaskType.TEXT_EMBEDDING, + getRequestConfigMap( + createEmbeddingsRequestSettingsMap("region", "model", "anthropic", null, null, null, null), + Map.of(), + getAmazonBedrockSecretSettingsMap("access", "secret") + ), + Set.of(), + modelVerificationListener + ); + } + } + + public void testCreateModel_TopKParameter_NotAvailable() throws IOException { + try (var service = createAmazonBedrockService()) { + ActionListener modelVerificationListener = ActionListener.wrap( + model -> fail("Expected exception, but got model: " + model), + exception -> { + assertThat(exception, instanceOf(ElasticsearchStatusException.class)); + assertThat(exception.getMessage(), is("The [top_k] task parameter is not available for provider [amazontitan]")); + } + ); + + service.parseRequestConfig( + "id", + TaskType.COMPLETION, + getRequestConfigMap( + createChatCompletionRequestSettingsMap("region", "model", "amazontitan"), + getChatCompletionTaskSettingsMap(1.0, 0.5, 0.2, 128), + getAmazonBedrockSecretSettingsMap("access", "secret") + ), + Set.of(), + modelVerificationListener + ); + } + } + + public void testParseRequestConfig_ThrowsWhenAnExtraKeyExistsInConfig() throws IOException { + try (var service = createAmazonBedrockService()) { + var config = getRequestConfigMap( + createEmbeddingsRequestSettingsMap("region", "model", "amazontitan", null, null, null, null), + Map.of(), + getAmazonBedrockSecretSettingsMap("access", "secret") + ); + + config.put("extra_key", "value"); + + ActionListener modelVerificationListener = ActionListener.wrap( + model -> fail("Expected exception, but got model: " + model), + exception -> { + assertThat(exception, instanceOf(ElasticsearchStatusException.class)); + assertThat( + exception.getMessage(), + is("Model configuration contains settings [{extra_key=value}] unknown to the [amazonbedrock] service") + ); + } + ); + + service.parseRequestConfig("id", TaskType.TEXT_EMBEDDING, config, Set.of(), modelVerificationListener); + } + } + + public void testParseRequestConfig_ThrowsWhenAnExtraKeyExistsInServiceSettingsMap() throws IOException { + try (var service = createAmazonBedrockService()) { + var serviceSettings = createEmbeddingsRequestSettingsMap("region", "model", "amazontitan", null, null, null, null); + serviceSettings.put("extra_key", "value"); + + var config = getRequestConfigMap(serviceSettings, Map.of(), getAmazonBedrockSecretSettingsMap("access", "secret")); + + ActionListener modelVerificationListener = ActionListener.wrap((model) -> { + fail("Expected exception, but got model: " + model); + }, e -> { + assertThat(e, instanceOf(ElasticsearchStatusException.class)); + assertThat( + e.getMessage(), + is("Model configuration contains settings [{extra_key=value}] unknown to the [amazonbedrock] service") + ); + }); + + service.parseRequestConfig("id", TaskType.TEXT_EMBEDDING, config, Set.of(), modelVerificationListener); + } + } + + public void testParseRequestConfig_ThrowsWhenAnExtraKeyExistsInTaskSettingsMap() throws IOException { + try (var service = createAmazonBedrockService()) { + var settingsMap = createChatCompletionRequestSettingsMap("region", "model", "anthropic"); + var taskSettingsMap = getChatCompletionTaskSettingsMap(1.0, 0.5, 0.2, 128); + var secretSettingsMap = getAmazonBedrockSecretSettingsMap("access", "secret"); + + taskSettingsMap.put("extra_key", "value"); + + var config = getRequestConfigMap(settingsMap, taskSettingsMap, secretSettingsMap); + + ActionListener modelVerificationListener = ActionListener.wrap((model) -> { + fail("Expected exception, but got model: " + model); + }, e -> { + assertThat(e, instanceOf(ElasticsearchStatusException.class)); + assertThat( + e.getMessage(), + is("Model configuration contains settings [{extra_key=value}] unknown to the [amazonbedrock] service") + ); + }); + + service.parseRequestConfig("id", TaskType.COMPLETION, config, Set.of(), modelVerificationListener); + } + } + + public void testParseRequestConfig_ThrowsWhenAnExtraKeyExistsInSecretSettingsMap() throws IOException { + try (var service = createAmazonBedrockService()) { + var settingsMap = createChatCompletionRequestSettingsMap("region", "model", "anthropic"); + var taskSettingsMap = getChatCompletionTaskSettingsMap(1.0, 0.5, 0.2, 128); + var secretSettingsMap = getAmazonBedrockSecretSettingsMap("access", "secret"); + + secretSettingsMap.put("extra_key", "value"); + + var config = getRequestConfigMap(settingsMap, taskSettingsMap, secretSettingsMap); + + ActionListener modelVerificationListener = ActionListener.wrap((model) -> { + fail("Expected exception, but got model: " + model); + }, e -> { + assertThat(e, instanceOf(ElasticsearchStatusException.class)); + assertThat( + e.getMessage(), + is("Model configuration contains settings [{extra_key=value}] unknown to the [amazonbedrock] service") + ); + }); + + service.parseRequestConfig("id", TaskType.COMPLETION, config, Set.of(), modelVerificationListener); + } + } + + public void testParseRequestConfig_MovesModel() throws IOException { + try (var service = createAmazonBedrockService()) { + ActionListener modelVerificationListener = ActionListener.wrap(model -> { + assertThat(model, instanceOf(AmazonBedrockEmbeddingsModel.class)); + + var settings = (AmazonBedrockEmbeddingsServiceSettings) model.getServiceSettings(); + assertThat(settings.region(), is("region")); + assertThat(settings.model(), is("model")); + assertThat(settings.provider(), is(AmazonBedrockProvider.AMAZONTITAN)); + var secretSettings = (AmazonBedrockSecretSettings) model.getSecretSettings(); + assertThat(secretSettings.accessKey.toString(), is("access")); + assertThat(secretSettings.secretKey.toString(), is("secret")); + }, exception -> fail("Unexpected exception: " + exception)); + + service.parseRequestConfig( + "id", + TaskType.TEXT_EMBEDDING, + getRequestConfigMap( + createEmbeddingsRequestSettingsMap("region", "model", "amazontitan", null, null, null, null), + Map.of(), + getAmazonBedrockSecretSettingsMap("access", "secret") + ), + Set.of(), + modelVerificationListener + ); + } + } + + public void testCreateModel_ForEmbeddingsTask_DimensionsIsNotAllowed() throws IOException { + try (var service = createAmazonBedrockService()) { + ActionListener modelVerificationListener = ActionListener.wrap( + model -> fail("Expected exception, but got model: " + model), + exception -> { + assertThat(exception, instanceOf(ValidationException.class)); + assertThat(exception.getMessage(), containsString("[service_settings] does not allow the setting [dimensions]")); + } + ); + + service.parseRequestConfig( + "id", + TaskType.TEXT_EMBEDDING, + getRequestConfigMap( + createEmbeddingsRequestSettingsMap("region", "model", "amazontitan", 512, null, null, null), + Map.of(), + getAmazonBedrockSecretSettingsMap("access", "secret") + ), + Set.of(), + modelVerificationListener + ); + } + } + + public void testParsePersistedConfigWithSecrets_CreatesAnAmazonBedrockEmbeddingsModel() throws IOException { + try (var service = createAmazonBedrockService()) { + var settingsMap = createEmbeddingsRequestSettingsMap("region", "model", "amazontitan", null, false, null, null); + var secretSettingsMap = getAmazonBedrockSecretSettingsMap("access", "secret"); + + var persistedConfig = getPersistedConfigMap(settingsMap, new HashMap(Map.of()), secretSettingsMap); + + var model = service.parsePersistedConfigWithSecrets( + "id", + TaskType.TEXT_EMBEDDING, + persistedConfig.config(), + persistedConfig.secrets() + ); + + assertThat(model, instanceOf(AmazonBedrockEmbeddingsModel.class)); + + var settings = (AmazonBedrockEmbeddingsServiceSettings) model.getServiceSettings(); + assertThat(settings.region(), is("region")); + assertThat(settings.model(), is("model")); + assertThat(settings.provider(), is(AmazonBedrockProvider.AMAZONTITAN)); + var secretSettings = (AmazonBedrockSecretSettings) model.getSecretSettings(); + assertThat(secretSettings.accessKey.toString(), is("access")); + assertThat(secretSettings.secretKey.toString(), is("secret")); + } + } + + public void testParsePersistedConfigWithSecrets_ThrowsErrorTryingToParseInvalidModel() throws IOException { + try (var service = createAmazonBedrockService()) { + var settingsMap = createChatCompletionRequestSettingsMap("region", "model", "amazontitan"); + var secretSettingsMap = getAmazonBedrockSecretSettingsMap("access", "secret"); + + var persistedConfig = getPersistedConfigMap(settingsMap, Map.of(), secretSettingsMap); + + var thrownException = expectThrows( + ElasticsearchStatusException.class, + () -> service.parsePersistedConfigWithSecrets( + "id", + TaskType.SPARSE_EMBEDDING, + persistedConfig.config(), + persistedConfig.secrets() + ) + ); + + assertThat( + thrownException.getMessage(), + is("Failed to parse stored model [id] for [amazonbedrock] service, please delete and add the service again") + ); + } + } + + public void testParsePersistedConfigWithSecrets_DoesNotThrowWhenAnExtraKeyExistsInConfig() throws IOException { + try (var service = createAmazonBedrockService()) { + var settingsMap = createEmbeddingsRequestSettingsMap("region", "model", "amazontitan", null, false, null, null); + var secretSettingsMap = getAmazonBedrockSecretSettingsMap("access", "secret"); + + var persistedConfig = getPersistedConfigMap(settingsMap, new HashMap(Map.of()), secretSettingsMap); + persistedConfig.config().put("extra_key", "value"); + + var model = service.parsePersistedConfigWithSecrets( + "id", + TaskType.TEXT_EMBEDDING, + persistedConfig.config(), + persistedConfig.secrets() + ); + + assertThat(model, instanceOf(AmazonBedrockEmbeddingsModel.class)); + + var settings = (AmazonBedrockEmbeddingsServiceSettings) model.getServiceSettings(); + assertThat(settings.region(), is("region")); + assertThat(settings.model(), is("model")); + assertThat(settings.provider(), is(AmazonBedrockProvider.AMAZONTITAN)); + var secretSettings = (AmazonBedrockSecretSettings) model.getSecretSettings(); + assertThat(secretSettings.accessKey.toString(), is("access")); + assertThat(secretSettings.secretKey.toString(), is("secret")); + } + } + + public void testParsePersistedConfigWithSecrets_DoesNotThrowWhenAnExtraKeyExistsInSecretsSettings() throws IOException { + try (var service = createAmazonBedrockService()) { + var settingsMap = createEmbeddingsRequestSettingsMap("region", "model", "amazontitan", null, false, null, null); + var secretSettingsMap = getAmazonBedrockSecretSettingsMap("access", "secret"); + secretSettingsMap.put("extra_key", "value"); + + var persistedConfig = getPersistedConfigMap(settingsMap, new HashMap(Map.of()), secretSettingsMap); + + var model = service.parsePersistedConfigWithSecrets( + "id", + TaskType.TEXT_EMBEDDING, + persistedConfig.config(), + persistedConfig.secrets() + ); + + assertThat(model, instanceOf(AmazonBedrockEmbeddingsModel.class)); + + var settings = (AmazonBedrockEmbeddingsServiceSettings) model.getServiceSettings(); + assertThat(settings.region(), is("region")); + assertThat(settings.model(), is("model")); + assertThat(settings.provider(), is(AmazonBedrockProvider.AMAZONTITAN)); + var secretSettings = (AmazonBedrockSecretSettings) model.getSecretSettings(); + assertThat(secretSettings.accessKey.toString(), is("access")); + assertThat(secretSettings.secretKey.toString(), is("secret")); + } + } + + public void testParsePersistedConfigWithSecrets_NotThrowWhenAnExtraKeyExistsInSecrets() throws IOException { + try (var service = createAmazonBedrockService()) { + var settingsMap = createEmbeddingsRequestSettingsMap("region", "model", "amazontitan", null, false, null, null); + var secretSettingsMap = getAmazonBedrockSecretSettingsMap("access", "secret"); + + var persistedConfig = getPersistedConfigMap(settingsMap, new HashMap(Map.of()), secretSettingsMap); + persistedConfig.secrets().put("extra_key", "value"); + + var model = service.parsePersistedConfigWithSecrets( + "id", + TaskType.TEXT_EMBEDDING, + persistedConfig.config(), + persistedConfig.secrets() + ); + + assertThat(model, instanceOf(AmazonBedrockEmbeddingsModel.class)); + + var settings = (AmazonBedrockEmbeddingsServiceSettings) model.getServiceSettings(); + assertThat(settings.region(), is("region")); + assertThat(settings.model(), is("model")); + assertThat(settings.provider(), is(AmazonBedrockProvider.AMAZONTITAN)); + var secretSettings = (AmazonBedrockSecretSettings) model.getSecretSettings(); + assertThat(secretSettings.accessKey.toString(), is("access")); + assertThat(secretSettings.secretKey.toString(), is("secret")); + } + } + + public void testParsePersistedConfigWithSecrets_NotThrowWhenAnExtraKeyExistsInServiceSettings() throws IOException { + try (var service = createAmazonBedrockService()) { + var settingsMap = createEmbeddingsRequestSettingsMap("region", "model", "amazontitan", null, false, null, null); + settingsMap.put("extra_key", "value"); + var secretSettingsMap = getAmazonBedrockSecretSettingsMap("access", "secret"); + + var persistedConfig = getPersistedConfigMap(settingsMap, new HashMap(Map.of()), secretSettingsMap); + + var model = service.parsePersistedConfigWithSecrets( + "id", + TaskType.TEXT_EMBEDDING, + persistedConfig.config(), + persistedConfig.secrets() + ); + + assertThat(model, instanceOf(AmazonBedrockEmbeddingsModel.class)); + + var settings = (AmazonBedrockEmbeddingsServiceSettings) model.getServiceSettings(); + assertThat(settings.region(), is("region")); + assertThat(settings.model(), is("model")); + assertThat(settings.provider(), is(AmazonBedrockProvider.AMAZONTITAN)); + var secretSettings = (AmazonBedrockSecretSettings) model.getSecretSettings(); + assertThat(secretSettings.accessKey.toString(), is("access")); + assertThat(secretSettings.secretKey.toString(), is("secret")); + } + } + + public void testParsePersistedConfigWithSecrets_NotThrowWhenAnExtraKeyExistsInTaskSettings() throws IOException { + try (var service = createAmazonBedrockService()) { + var settingsMap = createChatCompletionRequestSettingsMap("region", "model", "anthropic"); + var taskSettingsMap = getChatCompletionTaskSettingsMap(1.0, 0.5, 0.2, 128); + var secretSettingsMap = getAmazonBedrockSecretSettingsMap("access", "secret"); + taskSettingsMap.put("extra_key", "value"); + + var persistedConfig = getPersistedConfigMap(settingsMap, taskSettingsMap, secretSettingsMap); + + var model = service.parsePersistedConfigWithSecrets( + "id", + TaskType.COMPLETION, + persistedConfig.config(), + persistedConfig.secrets() + ); + + assertThat(model, instanceOf(AmazonBedrockChatCompletionModel.class)); + + var settings = (AmazonBedrockChatCompletionServiceSettings) model.getServiceSettings(); + assertThat(settings.region(), is("region")); + assertThat(settings.model(), is("model")); + assertThat(settings.provider(), is(AmazonBedrockProvider.ANTHROPIC)); + var taskSettings = (AmazonBedrockChatCompletionTaskSettings) model.getTaskSettings(); + assertThat(taskSettings.temperature(), is(1.0)); + assertThat(taskSettings.topP(), is(0.5)); + assertThat(taskSettings.topK(), is(0.2)); + assertThat(taskSettings.maxNewTokens(), is(128)); + var secretSettings = (AmazonBedrockSecretSettings) model.getSecretSettings(); + assertThat(secretSettings.accessKey.toString(), is("access")); + assertThat(secretSettings.secretKey.toString(), is("secret")); + } + } + + public void testParsePersistedConfig_CreatesAnAmazonBedrockEmbeddingsModel() throws IOException { + try (var service = createAmazonBedrockService()) { + var settingsMap = createEmbeddingsRequestSettingsMap("region", "model", "amazontitan", null, false, null, null); + var secretSettingsMap = getAmazonBedrockSecretSettingsMap("access", "secret"); + + var persistedConfig = getPersistedConfigMap(settingsMap, new HashMap(Map.of()), secretSettingsMap); + + var model = service.parsePersistedConfig("id", TaskType.TEXT_EMBEDDING, persistedConfig.config()); + + assertThat(model, instanceOf(AmazonBedrockEmbeddingsModel.class)); + + var settings = (AmazonBedrockEmbeddingsServiceSettings) model.getServiceSettings(); + assertThat(settings.region(), is("region")); + assertThat(settings.model(), is("model")); + assertThat(settings.provider(), is(AmazonBedrockProvider.AMAZONTITAN)); + assertNull(model.getSecretSettings()); + } + } + + public void testParsePersistedConfig_CreatesAnAmazonBedrockChatCompletionModel() throws IOException { + try (var service = createAmazonBedrockService()) { + var settingsMap = createChatCompletionRequestSettingsMap("region", "model", "anthropic"); + var taskSettingsMap = getChatCompletionTaskSettingsMap(1.0, 0.5, 0.2, 128); + var secretSettingsMap = getAmazonBedrockSecretSettingsMap("access", "secret"); + + var persistedConfig = getPersistedConfigMap(settingsMap, taskSettingsMap, secretSettingsMap); + var model = service.parsePersistedConfig("id", TaskType.COMPLETION, persistedConfig.config()); + + assertThat(model, instanceOf(AmazonBedrockChatCompletionModel.class)); + + var settings = (AmazonBedrockChatCompletionServiceSettings) model.getServiceSettings(); + assertThat(settings.region(), is("region")); + assertThat(settings.model(), is("model")); + assertThat(settings.provider(), is(AmazonBedrockProvider.ANTHROPIC)); + var taskSettings = (AmazonBedrockChatCompletionTaskSettings) model.getTaskSettings(); + assertThat(taskSettings.temperature(), is(1.0)); + assertThat(taskSettings.topP(), is(0.5)); + assertThat(taskSettings.topK(), is(0.2)); + assertThat(taskSettings.maxNewTokens(), is(128)); + assertNull(model.getSecretSettings()); + } + } + + public void testParsePersistedConfig_ThrowsErrorTryingToParseInvalidModel() throws IOException { + try (var service = createAmazonBedrockService()) { + var settingsMap = createEmbeddingsRequestSettingsMap("region", "model", "amazontitan", null, false, null, null); + var secretSettingsMap = getAmazonBedrockSecretSettingsMap("access", "secret"); + + var persistedConfig = getPersistedConfigMap(settingsMap, new HashMap(Map.of()), secretSettingsMap); + + var thrownException = expectThrows( + ElasticsearchStatusException.class, + () -> service.parsePersistedConfig("id", TaskType.SPARSE_EMBEDDING, persistedConfig.config()) + ); + + assertThat( + thrownException.getMessage(), + is("Failed to parse stored model [id] for [amazonbedrock] service, please delete and add the service again") + ); + } + } + + public void testParsePersistedConfig_DoesNotThrowWhenAnExtraKeyExistsInConfig() throws IOException { + try (var service = createAmazonBedrockService()) { + var settingsMap = createEmbeddingsRequestSettingsMap("region", "model", "amazontitan", null, false, null, null); + var secretSettingsMap = getAmazonBedrockSecretSettingsMap("access", "secret"); + + var persistedConfig = getPersistedConfigMap(settingsMap, new HashMap(Map.of()), secretSettingsMap); + persistedConfig.config().put("extra_key", "value"); + + var model = service.parsePersistedConfig("id", TaskType.TEXT_EMBEDDING, persistedConfig.config()); + + assertThat(model, instanceOf(AmazonBedrockEmbeddingsModel.class)); + + var settings = (AmazonBedrockEmbeddingsServiceSettings) model.getServiceSettings(); + assertThat(settings.region(), is("region")); + assertThat(settings.model(), is("model")); + assertThat(settings.provider(), is(AmazonBedrockProvider.AMAZONTITAN)); + assertNull(model.getSecretSettings()); + } + } + + public void testParsePersistedConfig_NotThrowWhenAnExtraKeyExistsInServiceSettings() throws IOException { + try (var service = createAmazonBedrockService()) { + var settingsMap = createEmbeddingsRequestSettingsMap("region", "model", "amazontitan", null, false, null, null); + settingsMap.put("extra_key", "value"); + var secretSettingsMap = getAmazonBedrockSecretSettingsMap("access", "secret"); + + var persistedConfig = getPersistedConfigMap(settingsMap, new HashMap(Map.of()), secretSettingsMap); + persistedConfig.config().put("extra_key", "value"); + + var model = service.parsePersistedConfig("id", TaskType.TEXT_EMBEDDING, persistedConfig.config()); + + assertThat(model, instanceOf(AmazonBedrockEmbeddingsModel.class)); + + var settings = (AmazonBedrockEmbeddingsServiceSettings) model.getServiceSettings(); + assertThat(settings.region(), is("region")); + assertThat(settings.model(), is("model")); + assertThat(settings.provider(), is(AmazonBedrockProvider.AMAZONTITAN)); + assertNull(model.getSecretSettings()); + } + } + + public void testParsePersistedConfig_NotThrowWhenAnExtraKeyExistsInTaskSettings() throws IOException { + try (var service = createAmazonBedrockService()) { + var settingsMap = createChatCompletionRequestSettingsMap("region", "model", "anthropic"); + var taskSettingsMap = getChatCompletionTaskSettingsMap(1.0, 0.5, 0.2, 128); + taskSettingsMap.put("extra_key", "value"); + var secretSettingsMap = getAmazonBedrockSecretSettingsMap("access", "secret"); + + var persistedConfig = getPersistedConfigMap(settingsMap, taskSettingsMap, secretSettingsMap); + var model = service.parsePersistedConfig("id", TaskType.COMPLETION, persistedConfig.config()); + + assertThat(model, instanceOf(AmazonBedrockChatCompletionModel.class)); + + var settings = (AmazonBedrockChatCompletionServiceSettings) model.getServiceSettings(); + assertThat(settings.region(), is("region")); + assertThat(settings.model(), is("model")); + assertThat(settings.provider(), is(AmazonBedrockProvider.ANTHROPIC)); + var taskSettings = (AmazonBedrockChatCompletionTaskSettings) model.getTaskSettings(); + assertThat(taskSettings.temperature(), is(1.0)); + assertThat(taskSettings.topP(), is(0.5)); + assertThat(taskSettings.topK(), is(0.2)); + assertThat(taskSettings.maxNewTokens(), is(128)); + assertNull(model.getSecretSettings()); + } + } + + public void testInfer_ThrowsErrorWhenModelIsNotAmazonBedrockModel() throws IOException { + var sender = mock(Sender.class); + var factory = mock(HttpRequestSender.Factory.class); + when(factory.createSender()).thenReturn(sender); + + var amazonBedrockFactory = new AmazonBedrockMockRequestSender.Factory( + ServiceComponentsTests.createWithSettings(threadPool, Settings.EMPTY), + mockClusterServiceEmpty() + ); + var mockModel = getInvalidModel("model_id", "service_name"); + + try (var service = new AmazonBedrockService(factory, amazonBedrockFactory, createWithEmptySettings(threadPool))) { + PlainActionFuture listener = new PlainActionFuture<>(); + service.infer( + mockModel, + null, + List.of(""), + new HashMap<>(), + InputType.INGEST, + InferenceAction.Request.DEFAULT_TIMEOUT, + listener + ); + + var thrownException = expectThrows(ElasticsearchStatusException.class, () -> listener.actionGet(TIMEOUT)); + assertThat( + thrownException.getMessage(), + is("The internal model was invalid, please delete the service [service_name] with id [model_id] and add it again.") + ); + + verify(factory, times(1)).createSender(); + verify(sender, times(1)).start(); + } + verify(sender, times(1)).close(); + verifyNoMoreInteractions(factory); + verifyNoMoreInteractions(sender); + } + + public void testInfer_SendsRequest_ForEmbeddingsModel() throws IOException { + var sender = mock(Sender.class); + var factory = mock(HttpRequestSender.Factory.class); + when(factory.createSender()).thenReturn(sender); + + var amazonBedrockFactory = new AmazonBedrockMockRequestSender.Factory( + ServiceComponentsTests.createWithSettings(threadPool, Settings.EMPTY), + mockClusterServiceEmpty() + ); + + try (var service = new AmazonBedrockService(factory, amazonBedrockFactory, createWithEmptySettings(threadPool))) { + try (var requestSender = (AmazonBedrockMockRequestSender) amazonBedrockFactory.createSender()) { + var results = new InferenceTextEmbeddingFloatResults( + List.of(new InferenceTextEmbeddingFloatResults.InferenceFloatEmbedding(new float[] { 0.123F, 0.678F })) + ); + requestSender.enqueue(results); + + var model = AmazonBedrockEmbeddingsModelTests.createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + "access", + "secret" + ); + PlainActionFuture listener = new PlainActionFuture<>(); + service.infer( + model, + null, + List.of("abc"), + new HashMap<>(), + InputType.INGEST, + InferenceAction.Request.DEFAULT_TIMEOUT, + listener + ); + + var result = listener.actionGet(TIMEOUT); + + assertThat(result.asMap(), Matchers.is(buildExpectationFloat(List.of(new float[] { 0.123F, 0.678F })))); + } + } + } + + public void testInfer_SendsRequest_ForChatCompletionModel() throws IOException { + var sender = mock(Sender.class); + var factory = mock(HttpRequestSender.Factory.class); + when(factory.createSender()).thenReturn(sender); + + var amazonBedrockFactory = new AmazonBedrockMockRequestSender.Factory( + ServiceComponentsTests.createWithSettings(threadPool, Settings.EMPTY), + mockClusterServiceEmpty() + ); + + try (var service = new AmazonBedrockService(factory, amazonBedrockFactory, createWithEmptySettings(threadPool))) { + try (var requestSender = (AmazonBedrockMockRequestSender) amazonBedrockFactory.createSender()) { + var mockResults = new ChatCompletionResults(List.of(new ChatCompletionResults.Result("test result"))); + requestSender.enqueue(mockResults); + + var model = AmazonBedrockChatCompletionModelTests.createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + "access", + "secret" + ); + PlainActionFuture listener = new PlainActionFuture<>(); + service.infer( + model, + null, + List.of("abc"), + new HashMap<>(), + InputType.INGEST, + InferenceAction.Request.DEFAULT_TIMEOUT, + listener + ); + + var result = listener.actionGet(TIMEOUT); + + assertThat(result.asMap(), Matchers.is(buildExpectationCompletion(List.of("test result")))); + } + } + } + + public void testCheckModelConfig_IncludesMaxTokens_ForEmbeddingsModel() throws IOException { + var sender = mock(Sender.class); + var factory = mock(HttpRequestSender.Factory.class); + when(factory.createSender()).thenReturn(sender); + + var amazonBedrockFactory = new AmazonBedrockMockRequestSender.Factory( + ServiceComponentsTests.createWithSettings(threadPool, Settings.EMPTY), + mockClusterServiceEmpty() + ); + + try (var service = new AmazonBedrockService(factory, amazonBedrockFactory, createWithEmptySettings(threadPool))) { + try (var requestSender = (AmazonBedrockMockRequestSender) amazonBedrockFactory.createSender()) { + var results = new InferenceTextEmbeddingFloatResults( + List.of(new InferenceTextEmbeddingFloatResults.InferenceFloatEmbedding(new float[] { 0.123F, 0.678F })) + ); + requestSender.enqueue(results); + + var model = AmazonBedrockEmbeddingsModelTests.createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + null, + false, + 100, + null, + null, + "access", + "secret" + ); + + PlainActionFuture listener = new PlainActionFuture<>(); + service.checkModelConfig(model, listener); + var result = listener.actionGet(TIMEOUT); + assertThat( + result, + is( + AmazonBedrockEmbeddingsModelTests.createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + 2, + false, + 100, + SimilarityMeasure.COSINE, + null, + "access", + "secret" + ) + ) + ); + var inputStrings = requestSender.getInputs(); + + MatcherAssert.assertThat(inputStrings, Matchers.is(List.of("how big"))); + } + } + } + + public void testCheckModelConfig_HasSimilarity_ForEmbeddingsModel() throws IOException { + var sender = mock(Sender.class); + var factory = mock(HttpRequestSender.Factory.class); + when(factory.createSender()).thenReturn(sender); + + var amazonBedrockFactory = new AmazonBedrockMockRequestSender.Factory( + ServiceComponentsTests.createWithSettings(threadPool, Settings.EMPTY), + mockClusterServiceEmpty() + ); + + try (var service = new AmazonBedrockService(factory, amazonBedrockFactory, createWithEmptySettings(threadPool))) { + try (var requestSender = (AmazonBedrockMockRequestSender) amazonBedrockFactory.createSender()) { + var results = new InferenceTextEmbeddingFloatResults( + List.of(new InferenceTextEmbeddingFloatResults.InferenceFloatEmbedding(new float[] { 0.123F, 0.678F })) + ); + requestSender.enqueue(results); + + var model = AmazonBedrockEmbeddingsModelTests.createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + null, + false, + null, + SimilarityMeasure.COSINE, + null, + "access", + "secret" + ); + + PlainActionFuture listener = new PlainActionFuture<>(); + service.checkModelConfig(model, listener); + var result = listener.actionGet(TIMEOUT); + assertThat( + result, + is( + AmazonBedrockEmbeddingsModelTests.createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + 2, + false, + null, + SimilarityMeasure.COSINE, + null, + "access", + "secret" + ) + ) + ); + var inputStrings = requestSender.getInputs(); + + MatcherAssert.assertThat(inputStrings, Matchers.is(List.of("how big"))); + } + } + } + + public void testCheckModelConfig_ThrowsIfEmbeddingSizeDoesNotMatchValueSetByUser() throws IOException { + var sender = mock(Sender.class); + var factory = mock(HttpRequestSender.Factory.class); + when(factory.createSender()).thenReturn(sender); + + var amazonBedrockFactory = new AmazonBedrockMockRequestSender.Factory( + ServiceComponentsTests.createWithSettings(threadPool, Settings.EMPTY), + mockClusterServiceEmpty() + ); + + try (var service = new AmazonBedrockService(factory, amazonBedrockFactory, createWithEmptySettings(threadPool))) { + try (var requestSender = (AmazonBedrockMockRequestSender) amazonBedrockFactory.createSender()) { + var results = new InferenceTextEmbeddingFloatResults( + List.of(new InferenceTextEmbeddingFloatResults.InferenceFloatEmbedding(new float[] { 0.123F, 0.678F })) + ); + requestSender.enqueue(results); + + var model = AmazonBedrockEmbeddingsModelTests.createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + 3, + true, + null, + null, + null, + "access", + "secret" + ); + + PlainActionFuture listener = new PlainActionFuture<>(); + service.checkModelConfig(model, listener); + + var exception = expectThrows(ElasticsearchStatusException.class, () -> listener.actionGet(TIMEOUT)); + assertThat( + exception.getMessage(), + is( + "The retrieved embeddings size [2] does not match the size specified in the settings [3]. " + + "Please recreate the [id] configuration with the correct dimensions" + ) + ); + + var inputStrings = requestSender.getInputs(); + MatcherAssert.assertThat(inputStrings, Matchers.is(List.of("how big"))); + } + } + } + + public void testCheckModelConfig_ReturnsNewModelReference_AndDoesNotSendDimensionsField_WhenNotSetByUser() throws IOException { + var sender = mock(Sender.class); + var factory = mock(HttpRequestSender.Factory.class); + when(factory.createSender()).thenReturn(sender); + + var amazonBedrockFactory = new AmazonBedrockMockRequestSender.Factory( + ServiceComponentsTests.createWithSettings(threadPool, Settings.EMPTY), + mockClusterServiceEmpty() + ); + + try (var service = new AmazonBedrockService(factory, amazonBedrockFactory, createWithEmptySettings(threadPool))) { + try (var requestSender = (AmazonBedrockMockRequestSender) amazonBedrockFactory.createSender()) { + var results = new InferenceTextEmbeddingFloatResults( + List.of(new InferenceTextEmbeddingFloatResults.InferenceFloatEmbedding(new float[] { 0.123F, 0.678F })) + ); + requestSender.enqueue(results); + + var model = AmazonBedrockEmbeddingsModelTests.createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + 100, + false, + null, + SimilarityMeasure.COSINE, + null, + "access", + "secret" + ); + + PlainActionFuture listener = new PlainActionFuture<>(); + service.checkModelConfig(model, listener); + var result = listener.actionGet(TIMEOUT); + assertThat( + result, + is( + AmazonBedrockEmbeddingsModelTests.createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + 2, + false, + null, + SimilarityMeasure.COSINE, + null, + "access", + "secret" + ) + ) + ); + var inputStrings = requestSender.getInputs(); + + MatcherAssert.assertThat(inputStrings, Matchers.is(List.of("how big"))); + } + } + } + + public void testInfer_UnauthorizedResponse() throws IOException { + var sender = mock(Sender.class); + var factory = mock(HttpRequestSender.Factory.class); + when(factory.createSender()).thenReturn(sender); + + var amazonBedrockFactory = new AmazonBedrockRequestSender.Factory( + ServiceComponentsTests.createWithSettings(threadPool, Settings.EMPTY), + mockClusterServiceEmpty() + ); + + try (var service = new AmazonBedrockService(factory, amazonBedrockFactory, createWithEmptySettings(threadPool))) { + var model = AmazonBedrockEmbeddingsModelTests.createModel( + "id", + "us-east-1", + "amazon.titan-embed-text-v1", + AmazonBedrockProvider.AMAZONTITAN, + "_INVALID_AWS_ACCESS_KEY_", + "_INVALID_AWS_SECRET_KEY_" + ); + PlainActionFuture listener = new PlainActionFuture<>(); + service.infer( + model, + null, + List.of("abc"), + new HashMap<>(), + InputType.INGEST, + InferenceAction.Request.DEFAULT_TIMEOUT, + listener + ); + + var exceptionThrown = assertThrows(ElasticsearchException.class, () -> listener.actionGet(TIMEOUT)); + assertThat(exceptionThrown.getCause().getMessage(), containsString("The security token included in the request is invalid")); + } + } + + public void testChunkedInfer_CallsInfer_ConvertsFloatResponse_ForEmbeddings() throws IOException { + var sender = mock(Sender.class); + var factory = mock(HttpRequestSender.Factory.class); + when(factory.createSender()).thenReturn(sender); + + var amazonBedrockFactory = new AmazonBedrockMockRequestSender.Factory( + ServiceComponentsTests.createWithSettings(threadPool, Settings.EMPTY), + mockClusterServiceEmpty() + ); + + try (var service = new AmazonBedrockService(factory, amazonBedrockFactory, createWithEmptySettings(threadPool))) { + try (var requestSender = (AmazonBedrockMockRequestSender) amazonBedrockFactory.createSender()) { + { + var mockResults1 = new InferenceTextEmbeddingFloatResults( + List.of(new InferenceTextEmbeddingFloatResults.InferenceFloatEmbedding(new float[] { 0.123F, 0.678F })) + ); + requestSender.enqueue(mockResults1); + } + { + var mockResults2 = new InferenceTextEmbeddingFloatResults( + List.of(new InferenceTextEmbeddingFloatResults.InferenceFloatEmbedding(new float[] { 0.223F, 0.278F })) + ); + requestSender.enqueue(mockResults2); + } + + var model = AmazonBedrockEmbeddingsModelTests.createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + "access", + "secret" + ); + PlainActionFuture> listener = new PlainActionFuture<>(); + service.chunkedInfer( + model, + List.of("abc", "xyz"), + new HashMap<>(), + InputType.INGEST, + new ChunkingOptions(null, null), + InferenceAction.Request.DEFAULT_TIMEOUT, + listener + ); + + var results = listener.actionGet(TIMEOUT); + assertThat(results, hasSize(2)); + { + assertThat(results.get(0), CoreMatchers.instanceOf(InferenceChunkedTextEmbeddingFloatResults.class)); + var floatResult = (InferenceChunkedTextEmbeddingFloatResults) results.get(0); + assertThat(floatResult.chunks(), hasSize(1)); + assertEquals("abc", floatResult.chunks().get(0).matchedText()); + assertArrayEquals(new float[] { 0.123F, 0.678F }, floatResult.chunks().get(0).embedding(), 0.0f); + } + { + assertThat(results.get(1), CoreMatchers.instanceOf(InferenceChunkedTextEmbeddingFloatResults.class)); + var floatResult = (InferenceChunkedTextEmbeddingFloatResults) results.get(1); + assertThat(floatResult.chunks(), hasSize(1)); + assertEquals("xyz", floatResult.chunks().get(0).matchedText()); + assertArrayEquals(new float[] { 0.223F, 0.278F }, floatResult.chunks().get(0).embedding(), 0.0f); + } + } + } + } + + private AmazonBedrockService createAmazonBedrockService() { + var amazonBedrockFactory = new AmazonBedrockMockRequestSender.Factory( + ServiceComponentsTests.createWithSettings(threadPool, Settings.EMPTY), + mockClusterServiceEmpty() + ); + return new AmazonBedrockService(mock(HttpRequestSender.Factory.class), amazonBedrockFactory, createWithEmptySettings(threadPool)); + } + + private Map getRequestConfigMap( + Map serviceSettings, + Map taskSettings, + Map secretSettings + ) { + var builtServiceSettings = new HashMap<>(); + builtServiceSettings.putAll(serviceSettings); + builtServiceSettings.putAll(secretSettings); + + return new HashMap<>( + Map.of(ModelConfigurations.SERVICE_SETTINGS, builtServiceSettings, ModelConfigurations.TASK_SETTINGS, taskSettings) + ); + } + + private Utils.PersistedConfig getPersistedConfigMap( + Map serviceSettings, + Map taskSettings, + Map secretSettings + ) { + + return new Utils.PersistedConfig( + new HashMap<>(Map.of(ModelConfigurations.SERVICE_SETTINGS, serviceSettings, ModelConfigurations.TASK_SETTINGS, taskSettings)), + new HashMap<>(Map.of(ModelSecrets.SECRET_SETTINGS, secretSettings)) + ); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionModelTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionModelTests.java new file mode 100644 index 0000000000000..22173943ff432 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionModelTests.java @@ -0,0 +1,221 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock.completion; + +import org.elasticsearch.common.settings.SecureString; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockProvider; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockSecretSettings; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; + +import static org.elasticsearch.xpack.inference.services.amazonbedrock.completion.AmazonBedrockChatCompletionTaskSettingsTests.getChatCompletionTaskSettingsMap; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.sameInstance; + +public class AmazonBedrockChatCompletionModelTests extends ESTestCase { + public void testOverrideWith_OverridesWithoutValues() { + var model = createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + 1.0, + 0.5, + 0.6, + 512, + null, + "access_key", + "secret_key" + ); + var requestTaskSettingsMap = getChatCompletionTaskSettingsMap(null, null, null, null); + var overriddenModel = AmazonBedrockChatCompletionModel.of(model, requestTaskSettingsMap); + + assertThat(overriddenModel, sameInstance(overriddenModel)); + } + + public void testOverrideWith_temperature() { + var model = createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + 1.0, + null, + null, + null, + null, + "access_key", + "secret_key" + ); + var requestTaskSettings = getChatCompletionTaskSettingsMap(0.5, null, null, null); + var overriddenModel = AmazonBedrockChatCompletionModel.of(model, requestTaskSettings); + assertThat( + overriddenModel, + is( + createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + 0.5, + null, + null, + null, + null, + "access_key", + "secret_key" + ) + ) + ); + } + + public void testOverrideWith_topP() { + var model = createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + null, + 0.8, + null, + null, + null, + "access_key", + "secret_key" + ); + var requestTaskSettings = getChatCompletionTaskSettingsMap(null, 0.5, null, null); + var overriddenModel = AmazonBedrockChatCompletionModel.of(model, requestTaskSettings); + assertThat( + overriddenModel, + is( + createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + null, + 0.5, + null, + null, + null, + "access_key", + "secret_key" + ) + ) + ); + } + + public void testOverrideWith_topK() { + var model = createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + null, + null, + 1.0, + null, + null, + "access_key", + "secret_key" + ); + var requestTaskSettings = getChatCompletionTaskSettingsMap(null, null, 0.8, null); + var overriddenModel = AmazonBedrockChatCompletionModel.of(model, requestTaskSettings); + assertThat( + overriddenModel, + is( + createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + null, + null, + 0.8, + null, + null, + "access_key", + "secret_key" + ) + ) + ); + } + + public void testOverrideWith_maxNewTokens() { + var model = createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + null, + null, + null, + 512, + null, + "access_key", + "secret_key" + ); + var requestTaskSettings = getChatCompletionTaskSettingsMap(null, null, null, 128); + var overriddenModel = AmazonBedrockChatCompletionModel.of(model, requestTaskSettings); + assertThat( + overriddenModel, + is( + createModel( + "id", + "region", + "model", + AmazonBedrockProvider.AMAZONTITAN, + null, + null, + null, + 128, + null, + "access_key", + "secret_key" + ) + ) + ); + } + + public static AmazonBedrockChatCompletionModel createModel( + String id, + String region, + String model, + AmazonBedrockProvider provider, + String accessKey, + String secretKey + ) { + return createModel(id, region, model, provider, null, null, null, null, null, accessKey, secretKey); + } + + public static AmazonBedrockChatCompletionModel createModel( + String id, + String region, + String model, + AmazonBedrockProvider provider, + @Nullable Double temperature, + @Nullable Double topP, + @Nullable Double topK, + @Nullable Integer maxNewTokens, + @Nullable RateLimitSettings rateLimitSettings, + String accessKey, + String secretKey + ) { + return new AmazonBedrockChatCompletionModel( + id, + TaskType.COMPLETION, + "amazonbedrock", + new AmazonBedrockChatCompletionServiceSettings(region, model, provider, rateLimitSettings), + new AmazonBedrockChatCompletionTaskSettings(temperature, topP, topK, maxNewTokens), + new AmazonBedrockSecretSettings(new SecureString(accessKey), new SecureString(secretKey)) + ); + } + +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionRequestTaskSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionRequestTaskSettingsTests.java new file mode 100644 index 0000000000000..681088c786b6b --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionRequestTaskSettingsTests.java @@ -0,0 +1,107 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock.completion; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.test.ESTestCase; +import org.hamcrest.MatcherAssert; + +import java.util.HashMap; +import java.util.Map; + +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.MAX_NEW_TOKENS_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.TEMPERATURE_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.TOP_K_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.TOP_P_FIELD; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.is; + +public class AmazonBedrockChatCompletionRequestTaskSettingsTests extends ESTestCase { + public void testFromMap_ReturnsEmptySettings_WhenTheMapIsEmpty() { + var settings = AmazonBedrockChatCompletionRequestTaskSettings.fromMap(new HashMap<>(Map.of())); + assertThat(settings, is(AmazonBedrockChatCompletionRequestTaskSettings.EMPTY_SETTINGS)); + } + + public void testFromMap_ReturnsEmptySettings_WhenTheMapDoesNotContainTheFields() { + var settings = AmazonBedrockChatCompletionRequestTaskSettings.fromMap(new HashMap<>(Map.of("key", "model"))); + assertThat(settings, is(AmazonBedrockChatCompletionRequestTaskSettings.EMPTY_SETTINGS)); + } + + public void testFromMap_ReturnsTemperature() { + var settings = AmazonBedrockChatCompletionRequestTaskSettings.fromMap(new HashMap<>(Map.of(TEMPERATURE_FIELD, 0.1))); + assertThat(settings.temperature(), is(0.1)); + } + + public void testFromMap_ReturnsTopP() { + var settings = AmazonBedrockChatCompletionRequestTaskSettings.fromMap(new HashMap<>(Map.of(TOP_P_FIELD, 0.1))); + assertThat(settings.topP(), is(0.1)); + } + + public void testFromMap_ReturnsDoSample() { + var settings = AmazonBedrockChatCompletionRequestTaskSettings.fromMap(new HashMap<>(Map.of(TOP_K_FIELD, 0.3))); + assertThat(settings.topK(), is(0.3)); + } + + public void testFromMap_ReturnsMaxNewTokens() { + var settings = AmazonBedrockChatCompletionRequestTaskSettings.fromMap(new HashMap<>(Map.of(MAX_NEW_TOKENS_FIELD, 512))); + assertThat(settings.maxNewTokens(), is(512)); + } + + public void testFromMap_TemperatureIsInvalidValue_ThrowsValidationException() { + var thrownException = expectThrows( + ValidationException.class, + () -> AmazonBedrockChatCompletionRequestTaskSettings.fromMap(new HashMap<>(Map.of(TEMPERATURE_FIELD, "invalid"))) + ); + + MatcherAssert.assertThat( + thrownException.getMessage(), + containsString( + Strings.format("field [temperature] is not of the expected type. The value [invalid] cannot be converted to a [Double]") + ) + ); + } + + public void testFromMap_TopPIsInvalidValue_ThrowsValidationException() { + var thrownException = expectThrows( + ValidationException.class, + () -> AmazonBedrockChatCompletionRequestTaskSettings.fromMap(new HashMap<>(Map.of(TOP_P_FIELD, "invalid"))) + ); + + MatcherAssert.assertThat( + thrownException.getMessage(), + containsString( + Strings.format("field [top_p] is not of the expected type. The value [invalid] cannot be converted to a [Double]") + ) + ); + } + + public void testFromMap_TopKIsInvalidValue_ThrowsValidationException() { + var thrownException = expectThrows( + ValidationException.class, + () -> AmazonBedrockChatCompletionRequestTaskSettings.fromMap(new HashMap<>(Map.of(TOP_K_FIELD, "invalid"))) + ); + + MatcherAssert.assertThat( + thrownException.getMessage(), + containsString("field [top_k] is not of the expected type. The value [invalid] cannot be converted to a [Double]") + ); + } + + public void testFromMap_MaxTokensIsInvalidValue_ThrowsStatusException() { + var thrownException = expectThrows( + ValidationException.class, + () -> AmazonBedrockChatCompletionRequestTaskSettings.fromMap(new HashMap<>(Map.of(MAX_NEW_TOKENS_FIELD, "invalid"))) + ); + + MatcherAssert.assertThat( + thrownException.getMessage(), + containsString("field [max_new_tokens] is not of the expected type. The value [invalid] cannot be converted to a [Integer]") + ); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionServiceSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionServiceSettingsTests.java new file mode 100644 index 0000000000000..90868530d8df8 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionServiceSettingsTests.java @@ -0,0 +1,131 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock.completion; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.core.ml.AbstractBWCWireSerializationTestCase; +import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockProvider; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettingsTests; +import org.hamcrest.CoreMatchers; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.MODEL_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.PROVIDER_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.REGION_FIELD; +import static org.hamcrest.Matchers.is; + +public class AmazonBedrockChatCompletionServiceSettingsTests extends AbstractBWCWireSerializationTestCase< + AmazonBedrockChatCompletionServiceSettings> { + + public void testFromMap_Request_CreatesSettingsCorrectly() { + var region = "region"; + var model = "model-id"; + var provider = "amazontitan"; + var serviceSettings = AmazonBedrockChatCompletionServiceSettings.fromMap( + createChatCompletionRequestSettingsMap(region, model, provider), + ConfigurationParseContext.REQUEST + ); + + assertThat( + serviceSettings, + is(new AmazonBedrockChatCompletionServiceSettings(region, model, AmazonBedrockProvider.AMAZONTITAN, null)) + ); + } + + public void testFromMap_RequestWithRateLimit_CreatesSettingsCorrectly() { + var region = "region"; + var model = "model-id"; + var provider = "amazontitan"; + var settingsMap = createChatCompletionRequestSettingsMap(region, model, provider); + settingsMap.put(RateLimitSettings.FIELD_NAME, new HashMap<>(Map.of(RateLimitSettings.REQUESTS_PER_MINUTE_FIELD, 3))); + + var serviceSettings = AmazonBedrockChatCompletionServiceSettings.fromMap(settingsMap, ConfigurationParseContext.REQUEST); + + assertThat( + serviceSettings, + is(new AmazonBedrockChatCompletionServiceSettings(region, model, AmazonBedrockProvider.AMAZONTITAN, new RateLimitSettings(3))) + ); + } + + public void testFromMap_Persistent_CreatesSettingsCorrectly() { + var region = "region"; + var model = "model-id"; + var provider = "amazontitan"; + var settingsMap = createChatCompletionRequestSettingsMap(region, model, provider); + var serviceSettings = AmazonBedrockChatCompletionServiceSettings.fromMap(settingsMap, ConfigurationParseContext.PERSISTENT); + + assertThat( + serviceSettings, + is(new AmazonBedrockChatCompletionServiceSettings(region, model, AmazonBedrockProvider.AMAZONTITAN, null)) + ); + } + + public void testToXContent_WritesAllValues() throws IOException { + var entity = new AmazonBedrockChatCompletionServiceSettings( + "testregion", + "testmodel", + AmazonBedrockProvider.AMAZONTITAN, + new RateLimitSettings(3) + ); + + XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); + entity.toXContent(builder, null); + String xContentResult = Strings.toString(builder); + + assertThat(xContentResult, CoreMatchers.is(""" + {"region":"testregion","model":"testmodel","provider":"AMAZONTITAN",""" + """ + "rate_limit":{"requests_per_minute":3}}""")); + } + + public static HashMap createChatCompletionRequestSettingsMap(String region, String model, String provider) { + return new HashMap(Map.of(REGION_FIELD, region, MODEL_FIELD, model, PROVIDER_FIELD, provider)); + } + + @Override + protected AmazonBedrockChatCompletionServiceSettings mutateInstanceForVersion( + AmazonBedrockChatCompletionServiceSettings instance, + TransportVersion version + ) { + return instance; + } + + @Override + protected Writeable.Reader instanceReader() { + return AmazonBedrockChatCompletionServiceSettings::new; + } + + @Override + protected AmazonBedrockChatCompletionServiceSettings createTestInstance() { + return createRandom(); + } + + @Override + protected AmazonBedrockChatCompletionServiceSettings mutateInstance(AmazonBedrockChatCompletionServiceSettings instance) + throws IOException { + return randomValueOtherThan(instance, AmazonBedrockChatCompletionServiceSettingsTests::createRandom); + } + + private static AmazonBedrockChatCompletionServiceSettings createRandom() { + return new AmazonBedrockChatCompletionServiceSettings( + randomAlphaOfLength(10), + randomAlphaOfLength(10), + randomFrom(AmazonBedrockProvider.values()), + RateLimitSettingsTests.createRandom() + ); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionTaskSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionTaskSettingsTests.java new file mode 100644 index 0000000000000..0d5440c6d2cf8 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/completion/AmazonBedrockChatCompletionTaskSettingsTests.java @@ -0,0 +1,226 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock.completion; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.core.ml.AbstractBWCWireSerializationTestCase; +import org.hamcrest.MatcherAssert; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.MAX_NEW_TOKENS_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.TEMPERATURE_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.TOP_K_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.TOP_P_FIELD; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.is; + +public class AmazonBedrockChatCompletionTaskSettingsTests extends AbstractBWCWireSerializationTestCase< + AmazonBedrockChatCompletionTaskSettings> { + + public void testFromMap_AllValues() { + var taskMap = getChatCompletionTaskSettingsMap(1.0, 0.5, 0.6, 512); + assertEquals( + new AmazonBedrockChatCompletionTaskSettings(1.0, 0.5, 0.6, 512), + AmazonBedrockChatCompletionTaskSettings.fromMap(taskMap) + ); + } + + public void testFromMap_TemperatureIsInvalidValue_ThrowsValidationException() { + var taskMap = getChatCompletionTaskSettingsMap(1.0, 0.5, 0.6, 512); + taskMap.put(TEMPERATURE_FIELD, "invalid"); + + var thrownException = expectThrows(ValidationException.class, () -> AmazonBedrockChatCompletionTaskSettings.fromMap(taskMap)); + + MatcherAssert.assertThat( + thrownException.getMessage(), + containsString( + Strings.format("field [temperature] is not of the expected type. The value [invalid] cannot be converted to a [Double]") + ) + ); + } + + public void testFromMap_TopPIsInvalidValue_ThrowsValidationException() { + var taskMap = getChatCompletionTaskSettingsMap(1.0, 0.5, 0.6, 512); + taskMap.put(TOP_P_FIELD, "invalid"); + + var thrownException = expectThrows(ValidationException.class, () -> AmazonBedrockChatCompletionTaskSettings.fromMap(taskMap)); + + MatcherAssert.assertThat( + thrownException.getMessage(), + containsString( + Strings.format("field [top_p] is not of the expected type. The value [invalid] cannot be converted to a [Double]") + ) + ); + } + + public void testFromMap_TopKIsInvalidValue_ThrowsValidationException() { + var taskMap = getChatCompletionTaskSettingsMap(1.0, 0.5, 0.6, 512); + taskMap.put(TOP_K_FIELD, "invalid"); + + var thrownException = expectThrows(ValidationException.class, () -> AmazonBedrockChatCompletionTaskSettings.fromMap(taskMap)); + + MatcherAssert.assertThat( + thrownException.getMessage(), + containsString("field [top_k] is not of the expected type. The value [invalid] cannot be converted to a [Double]") + ); + } + + public void testFromMap_MaxNewTokensIsInvalidValue_ThrowsValidationException() { + var taskMap = getChatCompletionTaskSettingsMap(1.0, 0.5, 0.6, 512); + taskMap.put(MAX_NEW_TOKENS_FIELD, "invalid"); + + var thrownException = expectThrows(ValidationException.class, () -> AmazonBedrockChatCompletionTaskSettings.fromMap(taskMap)); + + MatcherAssert.assertThat( + thrownException.getMessage(), + containsString( + Strings.format("field [max_new_tokens] is not of the expected type. The value [invalid] cannot be converted to a [Integer]") + ) + ); + } + + public void testFromMap_WithNoValues_DoesNotThrowException() { + var taskMap = AmazonBedrockChatCompletionTaskSettings.fromMap(new HashMap(Map.of())); + assertNull(taskMap.temperature()); + assertNull(taskMap.topP()); + assertNull(taskMap.topK()); + assertNull(taskMap.maxNewTokens()); + } + + public void testOverrideWith_KeepsOriginalValuesWithOverridesAreNull() { + var settings = AmazonBedrockChatCompletionTaskSettings.fromMap(getChatCompletionTaskSettingsMap(1.0, 0.5, 0.6, 512)); + var overrideSettings = AmazonBedrockChatCompletionTaskSettings.of(settings, AmazonBedrockChatCompletionTaskSettings.EMPTY_SETTINGS); + MatcherAssert.assertThat(overrideSettings, is(settings)); + } + + public void testOverrideWith_UsesTemperatureOverride() { + var settings = AmazonBedrockChatCompletionTaskSettings.fromMap(getChatCompletionTaskSettingsMap(1.0, 0.5, 0.6, 512)); + var overrideSettings = AmazonBedrockChatCompletionRequestTaskSettings.fromMap( + getChatCompletionTaskSettingsMap(0.3, null, null, null) + ); + var overriddenTaskSettings = AmazonBedrockChatCompletionTaskSettings.of(settings, overrideSettings); + MatcherAssert.assertThat(overriddenTaskSettings, is(new AmazonBedrockChatCompletionTaskSettings(0.3, 0.5, 0.6, 512))); + } + + public void testOverrideWith_UsesTopPOverride() { + var settings = AmazonBedrockChatCompletionTaskSettings.fromMap(getChatCompletionTaskSettingsMap(1.0, 0.5, 0.6, 512)); + var overrideSettings = AmazonBedrockChatCompletionRequestTaskSettings.fromMap( + getChatCompletionTaskSettingsMap(null, 0.2, null, null) + ); + var overriddenTaskSettings = AmazonBedrockChatCompletionTaskSettings.of(settings, overrideSettings); + MatcherAssert.assertThat(overriddenTaskSettings, is(new AmazonBedrockChatCompletionTaskSettings(1.0, 0.2, 0.6, 512))); + } + + public void testOverrideWith_UsesDoSampleOverride() { + var settings = AmazonBedrockChatCompletionTaskSettings.fromMap(getChatCompletionTaskSettingsMap(1.0, 0.5, 0.6, 512)); + var overrideSettings = AmazonBedrockChatCompletionRequestTaskSettings.fromMap( + getChatCompletionTaskSettingsMap(null, null, 0.1, null) + ); + var overriddenTaskSettings = AmazonBedrockChatCompletionTaskSettings.of(settings, overrideSettings); + MatcherAssert.assertThat(overriddenTaskSettings, is(new AmazonBedrockChatCompletionTaskSettings(1.0, 0.5, 0.1, 512))); + } + + public void testOverrideWith_UsesMaxNewTokensOverride() { + var settings = AmazonBedrockChatCompletionTaskSettings.fromMap(getChatCompletionTaskSettingsMap(1.0, 0.5, 0.6, 512)); + var overrideSettings = AmazonBedrockChatCompletionRequestTaskSettings.fromMap( + getChatCompletionTaskSettingsMap(null, null, null, 128) + ); + var overriddenTaskSettings = AmazonBedrockChatCompletionTaskSettings.of(settings, overrideSettings); + MatcherAssert.assertThat(overriddenTaskSettings, is(new AmazonBedrockChatCompletionTaskSettings(1.0, 0.5, 0.6, 128))); + } + + public void testToXContent_WithoutParameters() throws IOException { + var settings = AmazonBedrockChatCompletionTaskSettings.fromMap(getChatCompletionTaskSettingsMap(null, null, null, null)); + + XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); + settings.toXContent(builder, null); + String xContentResult = Strings.toString(builder); + + assertThat(xContentResult, is("{}")); + } + + public void testToXContent_WithParameters() throws IOException { + var settings = AmazonBedrockChatCompletionTaskSettings.fromMap(getChatCompletionTaskSettingsMap(1.0, 0.5, 0.6, 512)); + + XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); + settings.toXContent(builder, null); + String xContentResult = Strings.toString(builder); + + assertThat(xContentResult, is(""" + {"temperature":1.0,"top_p":0.5,"top_k":0.6,"max_new_tokens":512}""")); + } + + public static Map getChatCompletionTaskSettingsMap( + @Nullable Double temperature, + @Nullable Double topP, + @Nullable Double topK, + @Nullable Integer maxNewTokens + ) { + var map = new HashMap(); + + if (temperature != null) { + map.put(TEMPERATURE_FIELD, temperature); + } + + if (topP != null) { + map.put(TOP_P_FIELD, topP); + } + + if (topK != null) { + map.put(TOP_K_FIELD, topK); + } + + if (maxNewTokens != null) { + map.put(MAX_NEW_TOKENS_FIELD, maxNewTokens); + } + + return map; + } + + @Override + protected AmazonBedrockChatCompletionTaskSettings mutateInstanceForVersion( + AmazonBedrockChatCompletionTaskSettings instance, + TransportVersion version + ) { + return instance; + } + + @Override + protected Writeable.Reader instanceReader() { + return AmazonBedrockChatCompletionTaskSettings::new; + } + + @Override + protected AmazonBedrockChatCompletionTaskSettings createTestInstance() { + return createRandom(); + } + + @Override + protected AmazonBedrockChatCompletionTaskSettings mutateInstance(AmazonBedrockChatCompletionTaskSettings instance) throws IOException { + return randomValueOtherThan(instance, AmazonBedrockChatCompletionTaskSettingsTests::createRandom); + } + + private static AmazonBedrockChatCompletionTaskSettings createRandom() { + return new AmazonBedrockChatCompletionTaskSettings( + randomFrom(new Double[] { null, randomDouble() }), + randomFrom(new Double[] { null, randomDouble() }), + randomFrom(new Double[] { null, randomDouble() }), + randomFrom(new Integer[] { null, randomNonNegativeInt() }) + ); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/embeddings/AmazonBedrockEmbeddingsModelTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/embeddings/AmazonBedrockEmbeddingsModelTests.java new file mode 100644 index 0000000000000..711e3cbb5a511 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/embeddings/AmazonBedrockEmbeddingsModelTests.java @@ -0,0 +1,81 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings; + +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.common.settings.SecureString; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.inference.EmptyTaskSettings; +import org.elasticsearch.inference.SimilarityMeasure; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockProvider; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockSecretSettings; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; + +import java.util.Map; + +import static org.hamcrest.Matchers.containsString; + +public class AmazonBedrockEmbeddingsModelTests extends ESTestCase { + + public void testCreateModel_withTaskSettings_shouldFail() { + var baseModel = createModel("id", "region", "model", AmazonBedrockProvider.AMAZONTITAN, "accesskey", "secretkey"); + var thrownException = assertThrows( + ValidationException.class, + () -> AmazonBedrockEmbeddingsModel.of(baseModel, Map.of("testkey", "testvalue")) + ); + assertThat(thrownException.getMessage(), containsString("Amazon Bedrock embeddings model cannot have task settings")); + } + + // model creation only - no tests to define, but we want to have the public createModel + // method available + + public static AmazonBedrockEmbeddingsModel createModel( + String inferenceId, + String region, + String model, + AmazonBedrockProvider provider, + String accessKey, + String secretKey + ) { + return createModel(inferenceId, region, model, provider, null, false, null, null, new RateLimitSettings(240), accessKey, secretKey); + } + + public static AmazonBedrockEmbeddingsModel createModel( + String inferenceId, + String region, + String model, + AmazonBedrockProvider provider, + @Nullable Integer dimensions, + boolean dimensionsSetByUser, + @Nullable Integer maxTokens, + @Nullable SimilarityMeasure similarity, + RateLimitSettings rateLimitSettings, + String accessKey, + String secretKey + ) { + return new AmazonBedrockEmbeddingsModel( + inferenceId, + TaskType.TEXT_EMBEDDING, + "amazonbedrock", + new AmazonBedrockEmbeddingsServiceSettings( + region, + model, + provider, + dimensions, + dimensionsSetByUser, + maxTokens, + similarity, + rateLimitSettings + ), + new EmptyTaskSettings(), + new AmazonBedrockSecretSettings(new SecureString(accessKey), new SecureString(secretKey)) + ); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/embeddings/AmazonBedrockEmbeddingsServiceSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/embeddings/AmazonBedrockEmbeddingsServiceSettingsTests.java new file mode 100644 index 0000000000000..a100b89e1db6e --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/embeddings/AmazonBedrockEmbeddingsServiceSettingsTests.java @@ -0,0 +1,404 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.ValidationException; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.inference.SimilarityMeasure; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.core.ml.AbstractBWCWireSerializationTestCase; +import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; +import org.elasticsearch.xpack.inference.services.ServiceFields; +import org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockProvider; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; +import org.elasticsearch.xpack.inference.services.settings.RateLimitSettingsTests; +import org.hamcrest.CoreMatchers; +import org.hamcrest.MatcherAssert; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import static org.elasticsearch.xpack.inference.services.ServiceFields.DIMENSIONS; +import static org.elasticsearch.xpack.inference.services.ServiceFields.MAX_INPUT_TOKENS; +import static org.elasticsearch.xpack.inference.services.ServiceFields.SIMILARITY; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.MODEL_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.PROVIDER_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.AmazonBedrockConstants.REGION_FIELD; +import static org.elasticsearch.xpack.inference.services.amazonbedrock.embeddings.AmazonBedrockEmbeddingsServiceSettings.DIMENSIONS_SET_BY_USER; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.is; + +public class AmazonBedrockEmbeddingsServiceSettingsTests extends AbstractBWCWireSerializationTestCase< + AmazonBedrockEmbeddingsServiceSettings> { + + public void testFromMap_Request_CreatesSettingsCorrectly() { + var region = "region"; + var model = "model-id"; + var provider = "amazontitan"; + var maxInputTokens = 512; + var serviceSettings = AmazonBedrockEmbeddingsServiceSettings.fromMap( + createEmbeddingsRequestSettingsMap(region, model, provider, null, null, maxInputTokens, SimilarityMeasure.COSINE), + ConfigurationParseContext.REQUEST + ); + + assertThat( + serviceSettings, + is( + new AmazonBedrockEmbeddingsServiceSettings( + region, + model, + AmazonBedrockProvider.AMAZONTITAN, + null, + false, + maxInputTokens, + SimilarityMeasure.COSINE, + null + ) + ) + ); + } + + public void testFromMap_RequestWithRateLimit_CreatesSettingsCorrectly() { + var region = "region"; + var model = "model-id"; + var provider = "amazontitan"; + var maxInputTokens = 512; + var settingsMap = createEmbeddingsRequestSettingsMap(region, model, provider, null, null, maxInputTokens, SimilarityMeasure.COSINE); + settingsMap.put(RateLimitSettings.FIELD_NAME, new HashMap<>(Map.of(RateLimitSettings.REQUESTS_PER_MINUTE_FIELD, 3))); + + var serviceSettings = AmazonBedrockEmbeddingsServiceSettings.fromMap(settingsMap, ConfigurationParseContext.REQUEST); + + assertThat( + serviceSettings, + is( + new AmazonBedrockEmbeddingsServiceSettings( + region, + model, + AmazonBedrockProvider.AMAZONTITAN, + null, + false, + maxInputTokens, + SimilarityMeasure.COSINE, + new RateLimitSettings(3) + ) + ) + ); + } + + public void testFromMap_Request_DimensionsSetByUser_IsFalse_WhenDimensionsAreNotPresent() { + var region = "region"; + var model = "model-id"; + var provider = "amazontitan"; + var maxInputTokens = 512; + var settingsMap = createEmbeddingsRequestSettingsMap(region, model, provider, null, null, maxInputTokens, SimilarityMeasure.COSINE); + var serviceSettings = AmazonBedrockEmbeddingsServiceSettings.fromMap(settingsMap, ConfigurationParseContext.REQUEST); + + assertThat( + serviceSettings, + is( + new AmazonBedrockEmbeddingsServiceSettings( + region, + model, + AmazonBedrockProvider.AMAZONTITAN, + null, + false, + maxInputTokens, + SimilarityMeasure.COSINE, + null + ) + ) + ); + } + + public void testFromMap_Request_DimensionsSetByUser_ShouldThrowWhenPresent() { + var region = "region"; + var model = "model-id"; + var provider = "amazontitan"; + var maxInputTokens = 512; + + var settingsMap = createEmbeddingsRequestSettingsMap(region, model, provider, null, true, maxInputTokens, SimilarityMeasure.COSINE); + + var thrownException = expectThrows( + ValidationException.class, + () -> AmazonBedrockEmbeddingsServiceSettings.fromMap(settingsMap, ConfigurationParseContext.REQUEST) + ); + + MatcherAssert.assertThat( + thrownException.getMessage(), + containsString( + Strings.format("Validation Failed: 1: [service_settings] does not allow the setting [%s];", DIMENSIONS_SET_BY_USER) + ) + ); + } + + public void testFromMap_Request_Dimensions_ShouldThrowWhenPresent() { + var region = "region"; + var model = "model-id"; + var provider = "amazontitan"; + var dims = 128; + + var settingsMap = createEmbeddingsRequestSettingsMap(region, model, provider, dims, null, null, null); + + var thrownException = expectThrows( + ValidationException.class, + () -> AmazonBedrockEmbeddingsServiceSettings.fromMap(settingsMap, ConfigurationParseContext.REQUEST) + ); + + MatcherAssert.assertThat( + thrownException.getMessage(), + containsString(Strings.format("[service_settings] does not allow the setting [%s]", DIMENSIONS)) + ); + } + + public void testFromMap_Request_MaxTokensShouldBePositiveInteger() { + var region = "region"; + var model = "model-id"; + var provider = "amazontitan"; + var maxInputTokens = -128; + + var settingsMap = createEmbeddingsRequestSettingsMap(region, model, provider, null, null, maxInputTokens, null); + + var thrownException = expectThrows( + ValidationException.class, + () -> AmazonBedrockEmbeddingsServiceSettings.fromMap(settingsMap, ConfigurationParseContext.REQUEST) + ); + + MatcherAssert.assertThat( + thrownException.getMessage(), + containsString(Strings.format("[%s] must be a positive integer", MAX_INPUT_TOKENS)) + ); + } + + public void testFromMap_Persistent_CreatesSettingsCorrectly() { + var region = "region"; + var model = "model-id"; + var provider = "amazontitan"; + var dims = 1536; + var maxInputTokens = 512; + + var settingsMap = createEmbeddingsRequestSettingsMap( + region, + model, + provider, + dims, + false, + maxInputTokens, + SimilarityMeasure.COSINE + ); + var serviceSettings = AmazonBedrockEmbeddingsServiceSettings.fromMap(settingsMap, ConfigurationParseContext.PERSISTENT); + + assertThat( + serviceSettings, + is( + new AmazonBedrockEmbeddingsServiceSettings( + region, + model, + AmazonBedrockProvider.AMAZONTITAN, + dims, + false, + maxInputTokens, + SimilarityMeasure.COSINE, + null + ) + ) + ); + } + + public void testFromMap_PersistentContext_DoesNotThrowException_WhenDimensionsIsNull() { + var region = "region"; + var model = "model-id"; + var provider = "amazontitan"; + + var settingsMap = createEmbeddingsRequestSettingsMap(region, model, provider, null, true, null, null); + var serviceSettings = AmazonBedrockEmbeddingsServiceSettings.fromMap(settingsMap, ConfigurationParseContext.PERSISTENT); + + assertThat( + serviceSettings, + is(new AmazonBedrockEmbeddingsServiceSettings(region, model, AmazonBedrockProvider.AMAZONTITAN, null, true, null, null, null)) + ); + } + + public void testFromMap_PersistentContext_DoesNotThrowException_WhenSimilarityIsPresent() { + var region = "region"; + var model = "model-id"; + var provider = "amazontitan"; + + var settingsMap = createEmbeddingsRequestSettingsMap(region, model, provider, null, true, null, SimilarityMeasure.DOT_PRODUCT); + var serviceSettings = AmazonBedrockEmbeddingsServiceSettings.fromMap(settingsMap, ConfigurationParseContext.PERSISTENT); + + assertThat( + serviceSettings, + is( + new AmazonBedrockEmbeddingsServiceSettings( + region, + model, + AmazonBedrockProvider.AMAZONTITAN, + null, + true, + null, + SimilarityMeasure.DOT_PRODUCT, + null + ) + ) + ); + } + + public void testFromMap_PersistentContext_ThrowsException_WhenDimensionsSetByUserIsNull() { + var region = "region"; + var model = "model-id"; + var provider = "amazontitan"; + + var settingsMap = createEmbeddingsRequestSettingsMap(region, model, provider, 1, null, null, null); + + var exception = expectThrows( + ValidationException.class, + () -> AmazonBedrockEmbeddingsServiceSettings.fromMap(settingsMap, ConfigurationParseContext.PERSISTENT) + ); + + assertThat( + exception.getMessage(), + containsString("Validation Failed: 1: [service_settings] does not contain the required setting [dimensions_set_by_user];") + ); + } + + public void testToXContent_WritesDimensionsSetByUserTrue() throws IOException { + var entity = new AmazonBedrockEmbeddingsServiceSettings( + "testregion", + "testmodel", + AmazonBedrockProvider.AMAZONTITAN, + null, + true, + null, + null, + new RateLimitSettings(2) + ); + + XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); + entity.toXContent(builder, null); + String xContentResult = Strings.toString(builder); + + assertThat(xContentResult, CoreMatchers.is(""" + {"region":"testregion","model":"testmodel","provider":"AMAZONTITAN",""" + """ + "rate_limit":{"requests_per_minute":2},"dimensions_set_by_user":true}""")); + } + + public void testToXContent_WritesAllValues() throws IOException { + var entity = new AmazonBedrockEmbeddingsServiceSettings( + "testregion", + "testmodel", + AmazonBedrockProvider.AMAZONTITAN, + 1024, + false, + 512, + null, + new RateLimitSettings(3) + ); + + XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); + entity.toXContent(builder, null); + String xContentResult = Strings.toString(builder); + + assertThat(xContentResult, CoreMatchers.is(""" + {"region":"testregion","model":"testmodel","provider":"AMAZONTITAN",""" + """ + "rate_limit":{"requests_per_minute":3},"dimensions":1024,"max_input_tokens":512,"dimensions_set_by_user":false}""")); + } + + public void testToFilteredXContent_WritesAllValues_ExceptDimensionsSetByUser() throws IOException { + var entity = new AmazonBedrockEmbeddingsServiceSettings( + "testregion", + "testmodel", + AmazonBedrockProvider.AMAZONTITAN, + 1024, + false, + 512, + null, + new RateLimitSettings(3) + ); + + XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); + var filteredXContent = entity.getFilteredXContentObject(); + filteredXContent.toXContent(builder, null); + String xContentResult = Strings.toString(builder); + + assertThat(xContentResult, CoreMatchers.is(""" + {"region":"testregion","model":"testmodel","provider":"AMAZONTITAN",""" + """ + "rate_limit":{"requests_per_minute":3},"dimensions":1024,"max_input_tokens":512}""")); + } + + public static HashMap createEmbeddingsRequestSettingsMap( + String region, + String model, + String provider, + @Nullable Integer dimensions, + @Nullable Boolean dimensionsSetByUser, + @Nullable Integer maxTokens, + @Nullable SimilarityMeasure similarityMeasure + ) { + var map = new HashMap(Map.of(REGION_FIELD, region, MODEL_FIELD, model, PROVIDER_FIELD, provider)); + + if (dimensions != null) { + map.put(ServiceFields.DIMENSIONS, dimensions); + } + + if (dimensionsSetByUser != null) { + map.put(DIMENSIONS_SET_BY_USER, dimensionsSetByUser.equals(Boolean.TRUE)); + } + + if (maxTokens != null) { + map.put(ServiceFields.MAX_INPUT_TOKENS, maxTokens); + } + + if (similarityMeasure != null) { + map.put(SIMILARITY, similarityMeasure.toString()); + } + + return map; + } + + @Override + protected AmazonBedrockEmbeddingsServiceSettings mutateInstanceForVersion( + AmazonBedrockEmbeddingsServiceSettings instance, + TransportVersion version + ) { + return instance; + } + + @Override + protected Writeable.Reader instanceReader() { + return AmazonBedrockEmbeddingsServiceSettings::new; + } + + @Override + protected AmazonBedrockEmbeddingsServiceSettings createTestInstance() { + return createRandom(); + } + + @Override + protected AmazonBedrockEmbeddingsServiceSettings mutateInstance(AmazonBedrockEmbeddingsServiceSettings instance) throws IOException { + return randomValueOtherThan(instance, AmazonBedrockEmbeddingsServiceSettingsTests::createRandom); + } + + private static AmazonBedrockEmbeddingsServiceSettings createRandom() { + return new AmazonBedrockEmbeddingsServiceSettings( + randomAlphaOfLength(10), + randomAlphaOfLength(10), + randomFrom(AmazonBedrockProvider.values()), + randomFrom(new Integer[] { null, randomNonNegativeInt() }), + randomBoolean(), + randomFrom(new Integer[] { null, randomNonNegativeInt() }), + randomFrom(new SimilarityMeasure[] { null, randomFrom(SimilarityMeasure.values()) }), + RateLimitSettingsTests.createRandom() + ); + } +} diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml index 2e01e2b9c8d04..6d3c1231440fb 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml @@ -29,6 +29,8 @@ setup: type: text topic: type: keyword + subtopic: + type: keyword - do: index: @@ -37,6 +39,7 @@ setup: body: text: "As seen from Earth, a solar eclipse happens when the Moon is directly between the Earth and the Sun." topic: ["science"] + subtopic: ["technology"] refresh: true - do: @@ -46,6 +49,7 @@ setup: body: text: "The phases of the Moon come from the position of the Moon relative to the Earth and Sun." topic: ["science"] + subtopic: ["astronomy"] refresh: true - do: @@ -88,3 +92,35 @@ setup: - match: { hits.hits.1._id: "doc_1" } - match: { hits.hits.1._rank: 2 } - close_to: { hits.hits.1._score: { value: 0.2, error: 0.001 } } + +--- +"Simple text similarity rank retriever and filtering": + + - do: + search: + index: test-index + body: + track_total_hits: true + fields: [ "text", "topic" ] + retriever: + text_similarity_reranker: + retriever: + standard: + query: + term: + topic: "science" + filter: + term: + subtopic: "technology" + rank_window_size: 10 + inference_id: my-rerank-model + inference_text: "How often does the moon hide the sun?" + field: text + size: 10 + + - match: { hits.total.value : 1 } + - length: { hits.hits: 1 } + + - match: { hits.hits.0._id: "doc_1" } + - match: { hits.hits.0._rank: 1 } + - close_to: { hits.hits.0._score: { value: 0.2, error: 0.001 } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportPutTrainedModelAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportPutTrainedModelAction.java index c4cefc1750c35..cdff8f40e35f5 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportPutTrainedModelAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportPutTrainedModelAction.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.ml.action; import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.ElasticsearchSecurityException; import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.TransportVersion; @@ -37,6 +38,7 @@ import org.elasticsearch.license.License; import org.elasticsearch.license.LicenseUtils; import org.elasticsearch.license.XPackLicenseState; +import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.tasks.Task; @@ -143,61 +145,7 @@ protected void masterOperation( // NOTE: hasModelDefinition is false if we don't parse it. But, if the fully parsed model was already provided, continue boolean hasModelDefinition = config.getModelDefinition() != null; if (hasModelDefinition) { - try { - config.getModelDefinition().getTrainedModel().validate(); - } catch (ElasticsearchException ex) { - finalResponseListener.onFailure( - ExceptionsHelper.badRequestException("Definition for [{}] has validation failures.", ex, config.getModelId()) - ); - return; - } - - TrainedModelType trainedModelType = TrainedModelType.typeFromTrainedModel(config.getModelDefinition().getTrainedModel()); - if (trainedModelType == null) { - finalResponseListener.onFailure( - ExceptionsHelper.badRequestException( - "Unknown trained model definition class [{}]", - config.getModelDefinition().getTrainedModel().getName() - ) - ); - return; - } - - if (config.getModelType() == null) { - // Set the model type from the definition - config = new TrainedModelConfig.Builder(config).setModelType(trainedModelType).build(); - } else if (trainedModelType != config.getModelType()) { - finalResponseListener.onFailure( - ExceptionsHelper.badRequestException( - "{} [{}] does not match the model definition type [{}]", - TrainedModelConfig.MODEL_TYPE.getPreferredName(), - config.getModelType(), - trainedModelType - ) - ); - return; - } - - if (config.getInferenceConfig().isTargetTypeSupported(config.getModelDefinition().getTrainedModel().targetType()) == false) { - finalResponseListener.onFailure( - ExceptionsHelper.badRequestException( - "Model [{}] inference config type [{}] does not support definition target type [{}]", - config.getModelId(), - config.getInferenceConfig().getName(), - config.getModelDefinition().getTrainedModel().targetType() - ) - ); - return; - } - - TransportVersion minCompatibilityVersion = config.getModelDefinition().getTrainedModel().getMinimalCompatibilityVersion(); - if (state.getMinTransportVersion().before(minCompatibilityVersion)) { - finalResponseListener.onFailure( - ExceptionsHelper.badRequestException( - "Cannot create model [{}] while cluster upgrade is in progress.", - config.getModelId() - ) - ); + if (validateModelDefinition(config, state, licenseState, finalResponseListener) == false) { return; } } @@ -507,6 +455,85 @@ private void checkTagsAgainstModelIds(List tags, ActionListener li ); } + public static boolean validateModelDefinition( + TrainedModelConfig config, + ClusterState state, + XPackLicenseState licenseState, + ActionListener finalResponseListener + ) { + try { + config.getModelDefinition().getTrainedModel().validate(); + } catch (ElasticsearchException ex) { + finalResponseListener.onFailure( + ExceptionsHelper.badRequestException("Definition for [{}] has validation failures.", ex, config.getModelId()) + ); + return false; + } + + TrainedModelType trainedModelType = TrainedModelType.typeFromTrainedModel(config.getModelDefinition().getTrainedModel()); + if (trainedModelType == null) { + finalResponseListener.onFailure( + ExceptionsHelper.badRequestException( + "Unknown trained model definition class [{}]", + config.getModelDefinition().getTrainedModel().getName() + ) + ); + return false; + } + + var configModelType = config.getModelType(); + if (configModelType == null) { + // Set the model type from the definition + config = new TrainedModelConfig.Builder(config).setModelType(trainedModelType).build(); + } else if (trainedModelType != configModelType) { + finalResponseListener.onFailure( + ExceptionsHelper.badRequestException( + "{} [{}] does not match the model definition type [{}]", + TrainedModelConfig.MODEL_TYPE.getPreferredName(), + configModelType, + trainedModelType + ) + ); + return false; + } + + var inferenceConfig = config.getInferenceConfig(); + if (inferenceConfig.isTargetTypeSupported(config.getModelDefinition().getTrainedModel().targetType()) == false) { + finalResponseListener.onFailure( + ExceptionsHelper.badRequestException( + "Model [{}] inference config type [{}] does not support definition target type [{}]", + config.getModelId(), + config.getInferenceConfig().getName(), + config.getModelDefinition().getTrainedModel().targetType() + ) + ); + return false; + } + + var minLicenseSupported = inferenceConfig.getMinLicenseSupportedForAction(RestRequest.Method.PUT); + if (licenseState.isAllowedByLicense(minLicenseSupported) == false) { + finalResponseListener.onFailure( + new ElasticsearchSecurityException( + "Model of type [{}] requires [{}] license level", + RestStatus.FORBIDDEN, + config.getInferenceConfig().getName(), + minLicenseSupported + ) + ); + return false; + } + + TransportVersion minCompatibilityVersion = config.getModelDefinition().getTrainedModel().getMinimalCompatibilityVersion(); + if (state.getMinTransportVersion().before(minCompatibilityVersion)) { + finalResponseListener.onFailure( + ExceptionsHelper.badRequestException("Cannot create model [{}] while cluster upgrade is in progress.", config.getModelId()) + ); + return false; + } + + return true; + } + @Override protected ClusterBlockException checkBlock(Request request, ClusterState state) { return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_WRITE); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportPutTrainedModelActionTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportPutTrainedModelActionTests.java index 4a1a654a9a29f..4546058ddda13 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportPutTrainedModelActionTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportPutTrainedModelActionTests.java @@ -8,16 +8,21 @@ package org.elasticsearch.xpack.ml.action; import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.ElasticsearchSecurityException; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.admin.cluster.node.tasks.list.ListTasksResponse; import org.elasticsearch.action.admin.cluster.node.tasks.list.TransportListTasksAction; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.client.internal.Client; +import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.license.License; +import org.elasticsearch.license.XPackLicenseState; +import org.elasticsearch.license.internal.XPackLicenseStatus; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.TestThreadPool; @@ -35,11 +40,13 @@ import org.elasticsearch.xpack.core.ml.inference.MlInferenceNamedXContentProvider; import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig; import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfigTests; +import org.elasticsearch.xpack.core.ml.inference.TrainedModelDefinition; import org.elasticsearch.xpack.core.ml.inference.TrainedModelInputTests; import org.elasticsearch.xpack.core.ml.inference.TrainedModelType; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ClassificationConfigTests; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.FillMaskConfigTests; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.InferenceConfig; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.LearningToRankConfig; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ModelPackageConfig; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ModelPackageConfigTests; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.NerConfigTests; @@ -50,6 +57,7 @@ import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextEmbeddingConfigTests; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextExpansionConfigTests; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextSimilarityConfigTests; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.langident.LangIdentNeuralNetwork; import org.junit.After; import org.junit.Before; import org.mockito.ArgumentCaptor; @@ -60,10 +68,12 @@ import java.util.Map; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; import static org.elasticsearch.xpack.ml.utils.TaskRetrieverTests.getTaskInfoListOfOne; import static org.elasticsearch.xpack.ml.utils.TaskRetrieverTests.mockClientWithTasksResponse; import static org.elasticsearch.xpack.ml.utils.TaskRetrieverTests.mockListTasksClient; +import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.same; @@ -73,6 +83,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; public class TransportPutTrainedModelActionTests extends ESTestCase { private static final TimeValue TIMEOUT = new TimeValue(30, TimeUnit.SECONDS); @@ -273,6 +284,56 @@ public void testVerifyMlNodesAndModelArchitectures_GivenArchitecturesMatch_ThenT ensureNoWarnings(); } + public void testValidateModelDefinition_FailsWhenLicenseIsNotSupported() throws IOException { + ModelPackageConfig packageConfig = ModelPackageConfigTests.randomModulePackageConfig(); + + TrainedModelConfig.Builder trainedModelConfigBuilder = new TrainedModelConfig.Builder().setModelId( + "." + packageConfig.getPackagedModelId() + ).setInput(TrainedModelInputTests.createRandomInput()); + + TransportPutTrainedModelAction.setTrainedModelConfigFieldsFromPackagedModel( + trainedModelConfigBuilder, + packageConfig, + xContentRegistry() + ); + + var mockTrainedModelDefinition = mock(TrainedModelDefinition.class); + when(mockTrainedModelDefinition.getTrainedModel()).thenReturn(mock(LangIdentNeuralNetwork.class)); + var trainedModelConfig = trainedModelConfigBuilder.setLicenseLevel("basic").build(); + + var mockModelInferenceConfig = spy(new LearningToRankConfig(1, List.of(), Map.of())); + when(mockModelInferenceConfig.isTargetTypeSupported(any())).thenReturn(true); + + var mockTrainedModelConfig = spy(trainedModelConfig); + when(mockTrainedModelConfig.getModelType()).thenReturn(TrainedModelType.LANG_IDENT); + when(mockTrainedModelConfig.getModelDefinition()).thenReturn(mockTrainedModelDefinition); + when(mockTrainedModelConfig.getInferenceConfig()).thenReturn(mockModelInferenceConfig); + + ActionListener responseListener = ActionListener.wrap( + response -> fail("Expected exception, but got response: " + response), + exception -> { + assertThat(exception, instanceOf(ElasticsearchSecurityException.class)); + assertThat(exception.getMessage(), is("Model of type [learning_to_rank] requires [ENTERPRISE] license level")); + } + ); + + var mockClusterState = mock(ClusterState.class); + + AtomicInteger currentTime = new AtomicInteger(100); + var mockXPackLicenseStatus = new XPackLicenseStatus(License.OperationMode.BASIC, true, ""); + var mockLicenseState = new XPackLicenseState(currentTime::get, mockXPackLicenseStatus); + + assertThat( + TransportPutTrainedModelAction.validateModelDefinition( + mockTrainedModelConfig, + mockClusterState, + mockLicenseState, + responseListener + ), + is(false) + ); + } + private static void prepareGetTrainedModelResponse(Client client, List trainedModels) { doAnswer(invocationOnMock -> { @SuppressWarnings("unchecked") diff --git a/x-pack/plugin/monitoring/src/main/java/org/elasticsearch/xpack/monitoring/MonitoringTemplateRegistry.java b/x-pack/plugin/monitoring/src/main/java/org/elasticsearch/xpack/monitoring/MonitoringTemplateRegistry.java index 12eeaf8732235..e0433ea6fdd71 100644 --- a/x-pack/plugin/monitoring/src/main/java/org/elasticsearch/xpack/monitoring/MonitoringTemplateRegistry.java +++ b/x-pack/plugin/monitoring/src/main/java/org/elasticsearch/xpack/monitoring/MonitoringTemplateRegistry.java @@ -77,7 +77,7 @@ public class MonitoringTemplateRegistry extends IndexTemplateRegistry { * writes monitoring data in ECS format as of 8.0. These templates define the ECS schema as well as alias fields for the old monitoring * mappings that point to the corresponding ECS fields. */ - public static final int STACK_MONITORING_REGISTRY_VERSION = 8_00_00_99 + 17; + public static final int STACK_MONITORING_REGISTRY_VERSION = 8_00_00_99 + 18; private static final String STACK_MONITORING_REGISTRY_VERSION_VARIABLE = "xpack.stack.monitoring.template.release.version"; private static final String STACK_TEMPLATE_VERSION = "8"; private static final String STACK_TEMPLATE_VERSION_VARIABLE = "xpack.stack.monitoring.template.version"; diff --git a/x-pack/plugin/rollup/src/test/java/org/elasticsearch/xpack/rollup/job/RollupIndexerStateTests.java b/x-pack/plugin/rollup/src/test/java/org/elasticsearch/xpack/rollup/job/RollupIndexerStateTests.java index 105711c4057a6..ad5e6a0cf9b40 100644 --- a/x-pack/plugin/rollup/src/test/java/org/elasticsearch/xpack/rollup/job/RollupIndexerStateTests.java +++ b/x-pack/plugin/rollup/src/test/java/org/elasticsearch/xpack/rollup/job/RollupIndexerStateTests.java @@ -41,6 +41,7 @@ import java.util.function.Function; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.startsWith; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; @@ -556,22 +557,22 @@ public void testMultipleJobTriggering() throws Exception { assertThat(indexer.getState(), equalTo(IndexerState.STARTED)); // This may take more than one attempt due to a cleanup/transition phase // that happens after state change to STARTED (`isJobFinishing`). - assertBusy(() -> indexer.maybeTriggerAsyncJob(System.currentTimeMillis())); + assertBusy(() -> assertTrue(indexer.maybeTriggerAsyncJob(System.currentTimeMillis()))); assertThat(indexer.getState(), equalTo(IndexerState.INDEXING)); assertFalse(indexer.maybeTriggerAsyncJob(System.currentTimeMillis())); assertThat(indexer.getState(), equalTo(IndexerState.INDEXING)); latch.countDown(); assertBusy(() -> assertThat(indexer.getState(), equalTo(IndexerState.STARTED))); - assertThat(indexer.getStats().getNumInvocations(), equalTo((long) i + 1)); assertThat(indexer.getStats().getNumPages(), equalTo((long) i + 1)); } final CountDownLatch latch = indexer.newLatch(); - assertBusy(() -> indexer.maybeTriggerAsyncJob(System.currentTimeMillis())); + assertBusy(() -> assertTrue(indexer.maybeTriggerAsyncJob(System.currentTimeMillis()))); assertThat(indexer.stop(), equalTo(IndexerState.STOPPING)); assertThat(indexer.getState(), Matchers.either(Matchers.is(IndexerState.STOPPING)).or(Matchers.is(IndexerState.STOPPED))); latch.countDown(); assertBusy(() -> assertThat(indexer.getState(), equalTo(IndexerState.STOPPED))); assertTrue(indexer.abort()); + assertThat(indexer.getStats().getNumInvocations(), greaterThanOrEqualTo(6L)); } finally { ThreadPool.terminate(threadPool, 30, TimeUnit.SECONDS); } diff --git a/x-pack/plugin/security/licenses/nimbus-jose-jwt-LICENSE.txt b/x-pack/plugin/security/licenses/nimbus-jose-jwt-LICENSE.txt new file mode 100644 index 0000000000000..d645695673349 --- /dev/null +++ b/x-pack/plugin/security/licenses/nimbus-jose-jwt-LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/x-pack/plugin/core/licenses/nimbus-jose-jwt-NOTICE.txt b/x-pack/plugin/security/licenses/nimbus-jose-jwt-NOTICE.txt similarity index 100% rename from x-pack/plugin/core/licenses/nimbus-jose-jwt-NOTICE.txt rename to x-pack/plugin/security/licenses/nimbus-jose-jwt-NOTICE.txt diff --git a/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java b/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java index ffa4d1082c7e6..2e92b646a45e4 100644 --- a/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java +++ b/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java @@ -39,6 +39,9 @@ public class Constants { "cluster:admin/indices/dangling/find", "cluster:admin/indices/dangling/import", "cluster:admin/indices/dangling/list", + "cluster:admin/ingest/geoip/database/delete", + "cluster:admin/ingest/geoip/database/get", + "cluster:admin/ingest/geoip/database/put", "cluster:admin/ingest/pipeline/delete", "cluster:admin/ingest/pipeline/get", "cluster:admin/ingest/pipeline/put", @@ -353,6 +356,7 @@ public class Constants { "cluster:monitor/main", "cluster:monitor/nodes/capabilities", "cluster:monitor/nodes/data_tier_usage", + "cluster:monitor/nodes/features", "cluster:monitor/nodes/hot_threads", "cluster:monitor/nodes/info", "cluster:monitor/nodes/stats", diff --git a/x-pack/plugin/security/qa/security-trial/src/javaRestTest/java/org/elasticsearch/xpack/security/role/BulkPutRoleRestIT.java b/x-pack/plugin/security/qa/security-trial/src/javaRestTest/java/org/elasticsearch/xpack/security/role/BulkPutRoleRestIT.java index 0297abad7a508..88b952f33394e 100644 --- a/x-pack/plugin/security/qa/security-trial/src/javaRestTest/java/org/elasticsearch/xpack/security/role/BulkPutRoleRestIT.java +++ b/x-pack/plugin/security/qa/security-trial/src/javaRestTest/java/org/elasticsearch/xpack/security/role/BulkPutRoleRestIT.java @@ -181,15 +181,74 @@ public void testPutNoValidRoles() throws Exception { public void testBulkUpdates() throws Exception { String request = """ {"roles": {"test1": {"cluster": ["all"],"indices": [{"names": ["*"],"privileges": ["all"]}]}, "test2": - {"cluster": ["all"],"indices": [{"names": ["*"],"privileges": ["read"]}]}, "test3": - {"cluster": ["all"],"indices": [{"names": ["*"],"privileges": ["write"]}]}}}"""; - + {"cluster": ["all"],"indices": [{"names": ["*"],"privileges": ["read"]}], "description": "something"}, "test3": + {"cluster": ["all"],"indices": [{"names": ["*"],"privileges": ["write"]}], "remote_indices":[{"names":["logs-*"], + "privileges":["read"],"clusters":["my_cluster*","other_cluster"]}]}}}"""; { Map responseMap = upsertRoles(request); assertThat(responseMap, not(hasKey("errors"))); List> items = (List>) responseMap.get("created"); assertEquals(3, items.size()); + + fetchRoleAndAssertEqualsExpected( + "test1", + new RoleDescriptor( + "test1", + new String[] { "all" }, + new RoleDescriptor.IndicesPrivileges[] { + RoleDescriptor.IndicesPrivileges.builder().indices("*").privileges("all").build() }, + null, + null, + null, + null, + null, + null, + null, + null, + null + ) + ); + fetchRoleAndAssertEqualsExpected( + "test2", + new RoleDescriptor( + "test2", + new String[] { "all" }, + new RoleDescriptor.IndicesPrivileges[] { + RoleDescriptor.IndicesPrivileges.builder().indices("*").privileges("read").build() }, + null, + null, + null, + null, + null, + null, + null, + null, + "something" + ) + ); + fetchRoleAndAssertEqualsExpected( + "test3", + new RoleDescriptor( + "test3", + new String[] { "all" }, + new RoleDescriptor.IndicesPrivileges[] { + RoleDescriptor.IndicesPrivileges.builder().indices("*").privileges("write").build() }, + null, + null, + null, + null, + null, + new RoleDescriptor.RemoteIndicesPrivileges[] { + RoleDescriptor.RemoteIndicesPrivileges.builder("my_cluster*", "other_cluster") + .indices("logs-*") + .privileges("read") + .build() }, + null, + null, + null + ) + ); } { Map responseMap = upsertRoles(request); @@ -200,7 +259,7 @@ public void testBulkUpdates() throws Exception { } { request = """ - {"roles": {"test1": {"cluster": ["all"],"indices": [{"names": ["*"],"privileges": ["read"]}]}, "test2": + {"roles": {"test1": {}, "test2": {"cluster": ["all"],"indices": [{"names": ["*"],"privileges": ["all"]}]}, "test3": {"cluster": ["all"],"indices": [{"names": ["*"],"privileges": ["all"]}]}}}"""; @@ -208,6 +267,49 @@ public void testBulkUpdates() throws Exception { assertThat(responseMap, not(hasKey("errors"))); List> items = (List>) responseMap.get("updated"); assertEquals(3, items.size()); + + assertThat(responseMap, not(hasKey("errors"))); + + fetchRoleAndAssertEqualsExpected( + "test1", + new RoleDescriptor("test1", null, null, null, null, null, null, null, null, null, null, null) + ); + fetchRoleAndAssertEqualsExpected( + "test2", + new RoleDescriptor( + "test2", + new String[] { "all" }, + new RoleDescriptor.IndicesPrivileges[] { + RoleDescriptor.IndicesPrivileges.builder().indices("*").privileges("all").build() }, + null, + null, + null, + null, + null, + null, + null, + null, + null + ) + ); + fetchRoleAndAssertEqualsExpected( + "test3", + new RoleDescriptor( + "test3", + new String[] { "all" }, + new RoleDescriptor.IndicesPrivileges[] { + RoleDescriptor.IndicesPrivileges.builder().indices("*").privileges("all").build() }, + null, + null, + null, + null, + null, + null, + null, + null, + null + ) + ); } } } diff --git a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityTests.java b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityTests.java index 704d8b75d9ed3..c0866fa7ea694 100644 --- a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityTests.java +++ b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DocumentLevelSecurityTests.java @@ -1013,6 +1013,10 @@ public void testZeroMinDocAggregation() throws Exception { prepareIndex("test").setId("2").setSource("color", "yellow", "fruit", "banana", "count", -2).setRefreshPolicy(IMMEDIATE).get(); prepareIndex("test").setId("3").setSource("color", "green", "fruit", "grape", "count", -3).setRefreshPolicy(IMMEDIATE).get(); prepareIndex("test").setId("4").setSource("color", "red", "fruit", "grape", "count", -4).setRefreshPolicy(IMMEDIATE).get(); + prepareIndex("test").setId("5") + .setSource("color", new String[] { "green", "black" }, "fruit", "grape", "count", -5) + .setRefreshPolicy(IMMEDIATE) + .get(); indicesAdmin().prepareForceMerge("test").get(); assertResponse( diff --git a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealmSingleNodeTests.java b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealmSingleNodeTests.java index 2ced54a513146..435706dce7019 100644 --- a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealmSingleNodeTests.java +++ b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealmSingleNodeTests.java @@ -52,7 +52,6 @@ import org.elasticsearch.xpack.core.security.action.user.AuthenticateResponse; import org.elasticsearch.xpack.core.security.authc.Authentication; import org.elasticsearch.xpack.core.security.authc.Realm; -import org.elasticsearch.xpack.core.security.authc.jwt.JwtAuthenticationToken; import org.elasticsearch.xpack.core.security.authc.jwt.JwtRealmSettings; import org.elasticsearch.xpack.security.LocalStateSecurity; import org.elasticsearch.xpack.security.Security; diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/TransportGrantAction.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/TransportGrantAction.java index 667b513555594..fffcb476abaa4 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/TransportGrantAction.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/action/TransportGrantAction.java @@ -7,24 +7,33 @@ package org.elasticsearch.xpack.security.action; +import org.elasticsearch.ElasticsearchSecurityException; import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionResponse; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.TransportAction; +import org.elasticsearch.common.settings.SecureString; import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.tasks.Task; import org.elasticsearch.transport.TransportService; +import org.elasticsearch.xpack.core.security.action.Grant; import org.elasticsearch.xpack.core.security.action.GrantRequest; import org.elasticsearch.xpack.core.security.action.user.AuthenticateAction; import org.elasticsearch.xpack.core.security.action.user.AuthenticateRequest; import org.elasticsearch.xpack.core.security.authc.Authentication; import org.elasticsearch.xpack.core.security.authc.AuthenticationServiceField; import org.elasticsearch.xpack.core.security.authc.AuthenticationToken; +import org.elasticsearch.xpack.core.security.authc.support.BearerToken; +import org.elasticsearch.xpack.core.security.authc.support.UsernamePasswordToken; import org.elasticsearch.xpack.security.authc.AuthenticationService; +import org.elasticsearch.xpack.security.authc.jwt.JwtAuthenticationToken; import org.elasticsearch.xpack.security.authz.AuthorizationService; +import static org.elasticsearch.xpack.core.security.action.Grant.ACCESS_TOKEN_GRANT_TYPE; +import static org.elasticsearch.xpack.core.security.action.Grant.PASSWORD_GRANT_TYPE; + public abstract class TransportGrantAction extends TransportAction< Request, Response> { @@ -50,7 +59,7 @@ public TransportGrantAction( @Override public final void doExecute(Task task, Request request, ActionListener listener) { try (ThreadContext.StoredContext ignore = threadContext.stashContext()) { - final AuthenticationToken authenticationToken = request.getGrant().getAuthenticationToken(); + final AuthenticationToken authenticationToken = getAuthenticationToken(request.getGrant()); assert authenticationToken != null : "authentication token must not be null"; final String runAsUsername = request.getGrant().getRunAsUsername(); @@ -109,4 +118,30 @@ protected abstract void doExecuteWithGrantAuthentication( Authentication authentication, ActionListener listener ); + + public static AuthenticationToken getAuthenticationToken(Grant grant) { + assert grant.validate(null) == null : "grant is invalid"; + return switch (grant.getType()) { + case PASSWORD_GRANT_TYPE -> new UsernamePasswordToken(grant.getUsername(), grant.getPassword()); + case ACCESS_TOKEN_GRANT_TYPE -> { + SecureString clientAuthentication = grant.getClientAuthentication() != null + ? grant.getClientAuthentication().value() + : null; + AuthenticationToken token = JwtAuthenticationToken.tryParseJwt(grant.getAccessToken(), clientAuthentication); + if (token != null) { + yield token; + } + if (clientAuthentication != null) { + clientAuthentication.close(); + throw new ElasticsearchSecurityException( + "[client_authentication] not supported with the supplied access_token type", + RestStatus.BAD_REQUEST + ); + } + // here we effectively assume it's an ES access token (from the {@code TokenService}) + yield new BearerToken(grant.getAccessToken()); + } + default -> throw new ElasticsearchSecurityException("the grant type [{}] is not supported", grant.getType()); + }; + } } diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/audit/logfile/LoggingAuditTrail.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/audit/logfile/LoggingAuditTrail.java index 01104806c4a1c..bc5cc4a5e6b3f 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/audit/logfile/LoggingAuditTrail.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/audit/logfile/LoggingAuditTrail.java @@ -44,6 +44,7 @@ import org.elasticsearch.xcontent.json.JsonStringEncoder; import org.elasticsearch.xcontent.json.JsonXContent; import org.elasticsearch.xpack.core.security.SecurityContext; +import org.elasticsearch.xpack.core.security.action.ActionTypes; import org.elasticsearch.xpack.core.security.action.Grant; import org.elasticsearch.xpack.core.security.action.apikey.AbstractCreateApiKeyRequest; import org.elasticsearch.xpack.core.security.action.apikey.BaseSingleUpdateApiKeyRequest; @@ -72,6 +73,8 @@ import org.elasticsearch.xpack.core.security.action.profile.SetProfileEnabledRequest; import org.elasticsearch.xpack.core.security.action.profile.UpdateProfileDataAction; import org.elasticsearch.xpack.core.security.action.profile.UpdateProfileDataRequest; +import org.elasticsearch.xpack.core.security.action.role.BulkDeleteRolesRequest; +import org.elasticsearch.xpack.core.security.action.role.BulkPutRolesRequest; import org.elasticsearch.xpack.core.security.action.role.DeleteRoleAction; import org.elasticsearch.xpack.core.security.action.role.DeleteRoleRequest; import org.elasticsearch.xpack.core.security.action.role.PutRoleAction; @@ -291,6 +294,8 @@ public class LoggingAuditTrail implements AuditTrail, ClusterStateListener { PutUserAction.NAME, PutRoleAction.NAME, PutRoleMappingAction.NAME, + ActionTypes.BULK_PUT_ROLES.name(), + ActionTypes.BULK_DELETE_ROLES.name(), TransportSetEnabledAction.TYPE.name(), TransportChangePasswordAction.TYPE.name(), CreateApiKeyAction.NAME, @@ -731,6 +736,11 @@ public void accessGranted( } else if (msg instanceof PutRoleRequest) { assert PutRoleAction.NAME.equals(action); securityChangeLogEntryBuilder(requestId).withRequestBody((PutRoleRequest) msg).build(); + } else if (msg instanceof BulkPutRolesRequest bulkPutRolesRequest) { + assert ActionTypes.BULK_PUT_ROLES.name().equals(action); + for (RoleDescriptor roleDescriptor : bulkPutRolesRequest.getRoles()) { + securityChangeLogEntryBuilder(requestId).withRequestBody(roleDescriptor.getName(), roleDescriptor).build(); + } } else if (msg instanceof PutRoleMappingRequest) { assert PutRoleMappingAction.NAME.equals(action); securityChangeLogEntryBuilder(requestId).withRequestBody((PutRoleMappingRequest) msg).build(); @@ -755,6 +765,11 @@ public void accessGranted( } else if (msg instanceof DeleteRoleRequest) { assert DeleteRoleAction.NAME.equals(action); securityChangeLogEntryBuilder(requestId).withRequestBody((DeleteRoleRequest) msg).build(); + } else if (msg instanceof BulkDeleteRolesRequest bulkDeleteRolesRequest) { + assert ActionTypes.BULK_DELETE_ROLES.name().equals(action); + for (String roleName : bulkDeleteRolesRequest.getRoleNames()) { + securityChangeLogEntryBuilder(requestId).withDeleteRole(roleName).build(); + } } else if (msg instanceof DeleteRoleMappingRequest) { assert DeleteRoleMappingAction.NAME.equals(action); securityChangeLogEntryBuilder(requestId).withRequestBody((DeleteRoleMappingRequest) msg).build(); @@ -1160,15 +1175,19 @@ LogEntryBuilder withRequestBody(ChangePasswordRequest changePasswordRequest) thr } LogEntryBuilder withRequestBody(PutRoleRequest putRoleRequest) throws IOException { + return withRequestBody(putRoleRequest.name(), putRoleRequest.roleDescriptor()); + } + + LogEntryBuilder withRequestBody(String roleName, RoleDescriptor roleDescriptor) throws IOException { logEntry.with(EVENT_ACTION_FIELD_NAME, "put_role"); XContentBuilder builder = JsonXContent.contentBuilder().humanReadable(true); builder.startObject() .startObject("role") - .field("name", putRoleRequest.name()) + .field("name", roleName) // the "role_descriptor" nested structure, where the "name" is left out, is closer to the event structure // for creating API Keys .field("role_descriptor"); - withRoleDescriptor(builder, putRoleRequest.roleDescriptor()); + withRoleDescriptor(builder, roleDescriptor); builder.endObject() // role .endObject(); logEntry.with(PUT_CONFIG_FIELD_NAME, Strings.toString(builder)); @@ -1350,7 +1369,7 @@ private static void withRoleDescriptor(XContentBuilder builder, RoleDescriptor r withIndicesPrivileges(builder, indicesPrivileges); } builder.endArray(); - // the toXContent method of the {@code RoleDescriptor.ApplicationResourcePrivileges) does a good job + // the toXContent method of the {@code RoleDescriptor.ApplicationResourcePrivileges} does a good job builder.xContentList(RoleDescriptor.Fields.APPLICATIONS.getPreferredName(), roleDescriptor.getApplicationPrivileges()); builder.array(RoleDescriptor.Fields.RUN_AS.getPreferredName(), roleDescriptor.getRunAs()); if (roleDescriptor.getMetadata() != null && false == roleDescriptor.getMetadata().isEmpty()) { @@ -1401,15 +1420,7 @@ LogEntryBuilder withRequestBody(DeleteUserRequest deleteUserRequest) throws IOEx } LogEntryBuilder withRequestBody(DeleteRoleRequest deleteRoleRequest) throws IOException { - logEntry.with(EVENT_ACTION_FIELD_NAME, "delete_role"); - XContentBuilder builder = JsonXContent.contentBuilder().humanReadable(true); - builder.startObject() - .startObject("role") - .field("name", deleteRoleRequest.name()) - .endObject() // role - .endObject(); - logEntry.with(DELETE_CONFIG_FIELD_NAME, Strings.toString(builder)); - return this; + return withDeleteRole(deleteRoleRequest.name()); } LogEntryBuilder withRequestBody(DeleteRoleMappingRequest deleteRoleMappingRequest) throws IOException { @@ -1532,6 +1543,18 @@ LogEntryBuilder withRequestBody(SetProfileEnabledRequest setProfileEnabledReques return this; } + LogEntryBuilder withDeleteRole(String roleName) throws IOException { + logEntry.with(EVENT_ACTION_FIELD_NAME, "delete_role"); + XContentBuilder builder = JsonXContent.contentBuilder().humanReadable(true); + builder.startObject() + .startObject("role") + .field("name", roleName) + .endObject() // role + .endObject(); + logEntry.with(DELETE_CONFIG_FIELD_NAME, Strings.toString(builder)); + return this; + } + static void withGrant(XContentBuilder builder, Grant grant) throws IOException { builder.startObject("grant").field("type", grant.getType()); if (grant.getUsername() != null) { diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/ApiKeyService.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/ApiKeyService.java index aaa1841bd2354..d88577f905e96 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/ApiKeyService.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/ApiKeyService.java @@ -1315,7 +1315,18 @@ void validateApiKeyCredentials( AuthenticationResult.unsuccessful("invalid credentials for API key [" + credentials.getId() + "]", null) ); } - }, listener::onFailure)); + }, exception -> { + // Crypto threadpool queue is full, invalidate this cache entry and make sure nothing is going to wait on it + logger.warn( + Strings.format( + "rejecting possibly valid API key authentication because the [%s] threadpool is full", + SECURITY_CRYPTO_THREAD_POOL_NAME + ) + ); + apiKeyAuthCache.invalidate(credentials.getId(), listenableCacheEntry); + listenableCacheEntry.onFailure(exception); + listener.onFailure(exception); + })); } } else { verifyKeyAgainstHash(apiKeyDoc.hash, credentials, ActionListener.wrap(verified -> { diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwkSetLoader.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwkSetLoader.java index 0266fc7488e29..063cc85ea0187 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwkSetLoader.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwkSetLoader.java @@ -22,7 +22,6 @@ import org.elasticsearch.xpack.core.security.authc.RealmConfig; import org.elasticsearch.xpack.core.security.authc.RealmSettings; import org.elasticsearch.xpack.core.security.authc.jwt.JwtRealmSettings; -import org.elasticsearch.xpack.core.security.authc.jwt.JwtUtil; import org.elasticsearch.xpack.core.ssl.SSLService; import java.io.IOException; diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwkValidateUtil.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwkValidateUtil.java index cc07b7dfa8381..89391f91a2731 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwkValidateUtil.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwkValidateUtil.java @@ -24,7 +24,6 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.SettingsException; import org.elasticsearch.xpack.core.security.authc.jwt.JwtRealmSettings; -import org.elasticsearch.xpack.core.security.authc.jwt.JwtUtil; import java.nio.charset.StandardCharsets; import java.security.PublicKey; diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/jwt/JwtAuthenticationToken.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtAuthenticationToken.java similarity index 98% rename from x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/jwt/JwtAuthenticationToken.java rename to x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtAuthenticationToken.java index ebfaae72b9df2..cfef9aed5967a 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/jwt/JwtAuthenticationToken.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtAuthenticationToken.java @@ -4,7 +4,7 @@ * 2.0; you may not use this file except in compliance with the Elastic License * 2.0. */ -package org.elasticsearch.xpack.core.security.authc.jwt; +package org.elasticsearch.xpack.security.authc.jwt; import com.nimbusds.jwt.JWTClaimsSet; import com.nimbusds.jwt.SignedJWT; diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtAuthenticator.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtAuthenticator.java index b06aba1c9d87a..2345add07ba51 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtAuthenticator.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtAuthenticator.java @@ -19,7 +19,6 @@ import org.elasticsearch.core.Releasable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.xpack.core.security.authc.RealmConfig; -import org.elasticsearch.xpack.core.security.authc.jwt.JwtAuthenticationToken; import org.elasticsearch.xpack.core.security.authc.jwt.JwtRealmSettings; import org.elasticsearch.xpack.core.ssl.SSLService; diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealm.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealm.java index 30a7e438e70b0..7613e7b3972af 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealm.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealm.java @@ -31,9 +31,7 @@ import org.elasticsearch.xpack.core.security.authc.Realm; import org.elasticsearch.xpack.core.security.authc.RealmConfig; import org.elasticsearch.xpack.core.security.authc.RealmSettings; -import org.elasticsearch.xpack.core.security.authc.jwt.JwtAuthenticationToken; import org.elasticsearch.xpack.core.security.authc.jwt.JwtRealmSettings; -import org.elasticsearch.xpack.core.security.authc.jwt.JwtUtil; import org.elasticsearch.xpack.core.security.authc.support.CachingRealm; import org.elasticsearch.xpack.core.security.authc.support.UserRoleMapper; import org.elasticsearch.xpack.core.security.support.CacheIteratorHelper; diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtSignatureValidator.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtSignatureValidator.java index e183ee7d73ac2..b1ee1b77998ec 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtSignatureValidator.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtSignatureValidator.java @@ -35,14 +35,13 @@ import org.elasticsearch.xpack.core.security.authc.RealmConfig; import org.elasticsearch.xpack.core.security.authc.RealmSettings; import org.elasticsearch.xpack.core.security.authc.jwt.JwtRealmSettings; -import org.elasticsearch.xpack.core.security.authc.jwt.JwtUtil; import org.elasticsearch.xpack.core.ssl.SSLService; import java.util.Arrays; import java.util.List; import java.util.stream.Stream; -import static org.elasticsearch.xpack.core.security.authc.jwt.JwtUtil.toStringRedactSignature; +import static org.elasticsearch.xpack.security.authc.jwt.JwtUtil.toStringRedactSignature; public interface JwtSignatureValidator extends Releasable { diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/jwt/JwtUtil.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtUtil.java similarity index 99% rename from x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/jwt/JwtUtil.java rename to x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtUtil.java index d70b76f8bc574..928ecd7fa265d 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/jwt/JwtUtil.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtUtil.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.core.security.authc.jwt; +package org.elasticsearch.xpack.security.authc.jwt; import com.nimbusds.jose.JWSObject; import com.nimbusds.jose.jwk.JWK; @@ -47,6 +47,7 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.xpack.core.security.authc.RealmConfig; import org.elasticsearch.xpack.core.security.authc.RealmSettings; +import org.elasticsearch.xpack.core.security.authc.jwt.JwtRealmSettings; import org.elasticsearch.xpack.core.ssl.SSLService; import java.io.InputStream; diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/oidc/OpenIdConnectAuthenticator.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/oidc/OpenIdConnectAuthenticator.java index e637bda19d886..0f34850b861b7 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/oidc/OpenIdConnectAuthenticator.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/oidc/OpenIdConnectAuthenticator.java @@ -91,9 +91,9 @@ import org.elasticsearch.watcher.ResourceWatcherService; import org.elasticsearch.xpack.core.security.authc.RealmConfig; import org.elasticsearch.xpack.core.security.authc.RealmSettings; -import org.elasticsearch.xpack.core.security.authc.jwt.JwtUtil; import org.elasticsearch.xpack.core.security.authc.oidc.OpenIdConnectRealmSettings; import org.elasticsearch.xpack.core.ssl.SSLService; +import org.elasticsearch.xpack.security.authc.jwt.JwtUtil; import java.io.IOException; import java.net.URI; diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/store/NativeRolesStore.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/store/NativeRolesStore.java index adeada6cbf6cf..a2d2b21b489ea 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/store/NativeRolesStore.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authz/store/NativeRolesStore.java @@ -59,6 +59,7 @@ import org.elasticsearch.xpack.core.security.action.role.RoleDescriptorRequestValidator; import org.elasticsearch.xpack.core.security.authz.RoleDescriptor; import org.elasticsearch.xpack.core.security.authz.RoleDescriptor.IndicesPrivileges; +import org.elasticsearch.xpack.core.security.authz.permission.RemoteClusterPermissions; import org.elasticsearch.xpack.core.security.authz.store.RoleRetrievalResult; import org.elasticsearch.xpack.core.security.authz.support.DLSRoleQueryValidator; import org.elasticsearch.xpack.core.security.support.NativeRealmValidationUtil; @@ -607,16 +608,41 @@ private DeleteRequest createRoleDeleteRequest(final String roleName) { return client.prepareDelete(SECURITY_MAIN_ALIAS, getIdForRole(roleName)).request(); } - private XContentBuilder createRoleXContentBuilder(RoleDescriptor role) throws IOException { + // Package private for testing + XContentBuilder createRoleXContentBuilder(RoleDescriptor role) throws IOException { assert NativeRealmValidationUtil.validateRoleName(role.getName(), false) == null : "Role name was invalid or reserved: " + role.getName(); assert false == role.hasRestriction() : "restriction is not supported for native roles"; - return role.toXContent( - jsonBuilder(), - ToXContent.EMPTY_PARAMS, - true, - featureService.clusterHasFeature(clusterService.state(), SECURITY_ROLES_METADATA_FLATTENED) - ); + + XContentBuilder builder = jsonBuilder().startObject(); + role.innerToXContent(builder, ToXContent.EMPTY_PARAMS, true); + + if (featureService.clusterHasFeature(clusterService.state(), SECURITY_ROLES_METADATA_FLATTENED)) { + builder.field(RoleDescriptor.Fields.METADATA_FLATTENED.getPreferredName(), role.getMetadata()); + } + + // When role descriptor XContent is generated for the security index all empty fields need to have default values to make sure + // existing values are overwritten if not present since the request to update could be an UpdateRequest + // (update provided fields in existing document or create document) or IndexRequest (replace and reindex document) + if (role.hasConfigurableClusterPrivileges() == false) { + builder.startObject(RoleDescriptor.Fields.GLOBAL.getPreferredName()).endObject(); + } + + if (role.hasRemoteIndicesPrivileges() == false) { + builder.field(RoleDescriptor.Fields.REMOTE_INDICES.getPreferredName(), RoleDescriptor.RemoteIndicesPrivileges.NONE); + } + + if (role.hasRemoteClusterPermissions() == false + && clusterService.state().getMinTransportVersion().onOrAfter(ROLE_REMOTE_CLUSTER_PRIVS)) { + builder.array(RoleDescriptor.Fields.REMOTE_CLUSTER.getPreferredName(), RemoteClusterPermissions.NONE); + } + if (role.hasDescription() == false + && clusterService.state().getMinTransportVersion().onOrAfter(TransportVersions.SECURITY_ROLE_DESCRIPTION)) { + builder.field(RoleDescriptor.Fields.DESCRIPTION.getPreferredName(), ""); + } + + builder.endObject(); + return builder; } public void usageStats(ActionListener> listener) { diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/SecurityTests.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/SecurityTests.java index 1aa40a48ecc97..400bc35b93fd5 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/SecurityTests.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/SecurityTests.java @@ -748,7 +748,7 @@ public void testLicenseUpdateFailureHandlerUpdate() throws Exception { // On trial license, kerberos is allowed and the WWW-Authenticate response header should reflect that verifyHasAuthenticationHeaderValue( e, - "Basic realm=\"" + XPackField.SECURITY + "\" charset=\"UTF-8\"", + "Basic realm=\"" + XPackField.SECURITY + "\", charset=\"UTF-8\"", "Negotiate", "ApiKey" ); @@ -760,7 +760,7 @@ public void testLicenseUpdateFailureHandlerUpdate() throws Exception { request.getHttpRequest(), ActionListener.wrap(result -> { assertTrue(completed.compareAndSet(false, true)); }, e -> { // On basic or gold license, kerberos is not allowed and the WWW-Authenticate response header should also reflect that - verifyHasAuthenticationHeaderValue(e, "Basic realm=\"" + XPackField.SECURITY + "\" charset=\"UTF-8\"", "ApiKey"); + verifyHasAuthenticationHeaderValue(e, "Basic realm=\"" + XPackField.SECURITY + "\", charset=\"UTF-8\"", "ApiKey"); }) ); if (completed.get()) { diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/audit/logfile/LoggingAuditTrailTests.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/audit/logfile/LoggingAuditTrailTests.java index a3292a6ab5f4e..17bad90415e7c 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/audit/logfile/LoggingAuditTrailTests.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/audit/logfile/LoggingAuditTrailTests.java @@ -47,6 +47,7 @@ import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xpack.core.XPackSettings; +import org.elasticsearch.xpack.core.security.action.ActionTypes; import org.elasticsearch.xpack.core.security.action.apikey.ApiKeyTests; import org.elasticsearch.xpack.core.security.action.apikey.BulkUpdateApiKeyAction; import org.elasticsearch.xpack.core.security.action.apikey.BulkUpdateApiKeyRequest; @@ -73,6 +74,8 @@ import org.elasticsearch.xpack.core.security.action.profile.SetProfileEnabledRequest; import org.elasticsearch.xpack.core.security.action.profile.UpdateProfileDataAction; import org.elasticsearch.xpack.core.security.action.profile.UpdateProfileDataRequest; +import org.elasticsearch.xpack.core.security.action.role.BulkDeleteRolesRequest; +import org.elasticsearch.xpack.core.security.action.role.BulkPutRolesRequest; import org.elasticsearch.xpack.core.security.action.role.DeleteRoleAction; import org.elasticsearch.xpack.core.security.action.role.DeleteRoleRequest; import org.elasticsearch.xpack.core.security.action.role.PutRoleAction; @@ -772,20 +775,19 @@ public void testSecurityConfigChangeEventFormattingForRoles() throws IOException auditTrail.accessGranted(requestId, authentication, PutRoleAction.NAME, putRoleRequest, authorizationInfo); output = CapturingLogger.output(logger.getName(), Level.INFO); assertThat(output.size(), is(2)); - String generatedPutRoleAuditEventString = output.get(1); - String expectedPutRoleAuditEventString = Strings.format(""" - "put":{"role":{"name":"%s","role_descriptor":%s}}\ - """, putRoleRequest.name(), auditedRolesMap.get(putRoleRequest.name())); - assertThat(generatedPutRoleAuditEventString, containsString(expectedPutRoleAuditEventString)); - generatedPutRoleAuditEventString = generatedPutRoleAuditEventString.replace(", " + expectedPutRoleAuditEventString, ""); - checkedFields = new HashMap<>(commonFields); - checkedFields.remove(LoggingAuditTrail.ORIGIN_ADDRESS_FIELD_NAME); - checkedFields.remove(LoggingAuditTrail.ORIGIN_TYPE_FIELD_NAME); - checkedFields.put("type", "audit"); - checkedFields.put(LoggingAuditTrail.EVENT_TYPE_FIELD_NAME, "security_config_change"); - checkedFields.put(LoggingAuditTrail.EVENT_ACTION_FIELD_NAME, "put_role"); - checkedFields.put(LoggingAuditTrail.REQUEST_ID_FIELD_NAME, requestId); - assertMsg(generatedPutRoleAuditEventString, checkedFields); + assertPutRoleAuditLogLine(putRoleRequest.name(), output.get(1), auditedRolesMap, requestId); + // clear log + CapturingLogger.output(logger.getName(), Level.INFO).clear(); + + BulkPutRolesRequest bulkPutRolesRequest = new BulkPutRolesRequest(allTestRoleDescriptors); + bulkPutRolesRequest.setRefreshPolicy(randomFrom(WriteRequest.RefreshPolicy.values())); + auditTrail.accessGranted(requestId, authentication, ActionTypes.BULK_PUT_ROLES.name(), bulkPutRolesRequest, authorizationInfo); + output = CapturingLogger.output(logger.getName(), Level.INFO); + assertThat(output.size(), is(allTestRoleDescriptors.size() + 1)); + + for (int i = 0; i < allTestRoleDescriptors.size(); i++) { + assertPutRoleAuditLogLine(allTestRoleDescriptors.get(i).getName(), output.get(i + 1), auditedRolesMap, requestId); + } // clear log CapturingLogger.output(logger.getName(), Level.INFO).clear(); @@ -795,25 +797,64 @@ public void testSecurityConfigChangeEventFormattingForRoles() throws IOException auditTrail.accessGranted(requestId, authentication, DeleteRoleAction.NAME, deleteRoleRequest, authorizationInfo); output = CapturingLogger.output(logger.getName(), Level.INFO); assertThat(output.size(), is(2)); - String generatedDeleteRoleAuditEventString = output.get(1); + assertDeleteRoleAuditLogLine(putRoleRequest.name(), output.get(1), requestId); + // clear log + CapturingLogger.output(logger.getName(), Level.INFO).clear(); + + BulkDeleteRolesRequest bulkDeleteRolesRequest = new BulkDeleteRolesRequest( + allTestRoleDescriptors.stream().map(RoleDescriptor::getName).toList() + ); + bulkDeleteRolesRequest.setRefreshPolicy(randomFrom(WriteRequest.RefreshPolicy.values())); + auditTrail.accessGranted( + requestId, + authentication, + ActionTypes.BULK_DELETE_ROLES.name(), + bulkDeleteRolesRequest, + authorizationInfo + ); + output = CapturingLogger.output(logger.getName(), Level.INFO); + assertThat(output.size(), is(allTestRoleDescriptors.size() + 1)); + for (int i = 0; i < allTestRoleDescriptors.size(); i++) { + assertDeleteRoleAuditLogLine(allTestRoleDescriptors.get(i).getName(), output.get(i + 1), requestId); + } + } + + private void assertPutRoleAuditLogLine(String roleName, String logLine, Map expectedLogByRoleName, String requestId) { + String expectedPutRoleAuditEventString = Strings.format(""" + "put":{"role":{"name":"%s","role_descriptor":%s}}\ + """, roleName, expectedLogByRoleName.get(roleName)); + + assertThat(logLine, containsString(expectedPutRoleAuditEventString)); + String reducedLogLine = logLine.replace(", " + expectedPutRoleAuditEventString, ""); + Map checkedFields = new HashMap<>(commonFields); + checkedFields.remove(LoggingAuditTrail.ORIGIN_ADDRESS_FIELD_NAME); + checkedFields.remove(LoggingAuditTrail.ORIGIN_TYPE_FIELD_NAME); + checkedFields.put("type", "audit"); + checkedFields.put(LoggingAuditTrail.EVENT_TYPE_FIELD_NAME, "security_config_change"); + checkedFields.put(LoggingAuditTrail.EVENT_ACTION_FIELD_NAME, "put_role"); + checkedFields.put(LoggingAuditTrail.REQUEST_ID_FIELD_NAME, requestId); + assertMsg(reducedLogLine, checkedFields); + } + + private void assertDeleteRoleAuditLogLine(String roleName, String logLine, String requestId) { StringBuilder deleteRoleStringBuilder = new StringBuilder().append("\"delete\":{\"role\":{\"name\":"); - if (deleteRoleRequest.name() == null) { + if (roleName == null) { deleteRoleStringBuilder.append("null"); } else { - deleteRoleStringBuilder.append("\"").append(deleteRoleRequest.name()).append("\""); + deleteRoleStringBuilder.append("\"").append(roleName).append("\""); } deleteRoleStringBuilder.append("}}"); String expectedDeleteRoleAuditEventString = deleteRoleStringBuilder.toString(); - assertThat(generatedDeleteRoleAuditEventString, containsString(expectedDeleteRoleAuditEventString)); - generatedDeleteRoleAuditEventString = generatedDeleteRoleAuditEventString.replace(", " + expectedDeleteRoleAuditEventString, ""); - checkedFields = new HashMap<>(commonFields); + assertThat(logLine, containsString(expectedDeleteRoleAuditEventString)); + String reducedLogLine = logLine.replace(", " + expectedDeleteRoleAuditEventString, ""); + Map checkedFields = new HashMap<>(commonFields); checkedFields.remove(LoggingAuditTrail.ORIGIN_ADDRESS_FIELD_NAME); checkedFields.remove(LoggingAuditTrail.ORIGIN_TYPE_FIELD_NAME); checkedFields.put("type", "audit"); checkedFields.put(LoggingAuditTrail.EVENT_TYPE_FIELD_NAME, "security_config_change"); checkedFields.put(LoggingAuditTrail.EVENT_ACTION_FIELD_NAME, "delete_role"); checkedFields.put(LoggingAuditTrail.REQUEST_ID_FIELD_NAME, requestId); - assertMsg(generatedDeleteRoleAuditEventString, checkedFields); + assertMsg(reducedLogLine, checkedFields); } public void testSecurityConfigChangeEventForCrossClusterApiKeys() throws IOException { @@ -1975,6 +2016,11 @@ public void testSecurityConfigChangedEventSelection() { Tuple actionAndRequest = randomFrom( new Tuple<>(PutUserAction.NAME, new PutUserRequest()), new Tuple<>(PutRoleAction.NAME, new PutRoleRequest()), + new Tuple<>( + ActionTypes.BULK_PUT_ROLES.name(), + new BulkPutRolesRequest(List.of(new RoleDescriptor(randomAlphaOfLength(20), null, null, null))) + ), + new Tuple<>(ActionTypes.BULK_DELETE_ROLES.name(), new BulkDeleteRolesRequest(List.of(randomAlphaOfLength(20)))), new Tuple<>(PutRoleMappingAction.NAME, new PutRoleMappingRequest()), new Tuple<>(TransportSetEnabledAction.TYPE.name(), new SetEnabledRequest()), new Tuple<>(TransportChangePasswordAction.TYPE.name(), new ChangePasswordRequest()), diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/ApiKeyServiceTests.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/ApiKeyServiceTests.java index 1dce6a038638b..f4d75434b92de 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/ApiKeyServiceTests.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/ApiKeyServiceTests.java @@ -146,6 +146,7 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CyclicBarrier; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Semaphore; @@ -230,6 +231,9 @@ public class ApiKeyServiceTests extends ESTestCase { "search": [ {"names": ["logs"]} ], "replication": [ {"names": ["archive"]} ] }"""); + + private static final int TEST_THREADPOOL_QUEUE_SIZE = 1000; + private ThreadPool threadPool; private Client client; private SecurityIndexManager securityIndex; @@ -245,7 +249,7 @@ public void createThreadPool() { Settings.EMPTY, SECURITY_CRYPTO_THREAD_POOL_NAME, 1, - 1000, + TEST_THREADPOOL_QUEUE_SIZE, "xpack.security.crypto.thread_pool", EsExecutors.TaskTrackingConfig.DO_NOT_TRACK ) @@ -268,6 +272,90 @@ public void setupMocks() { doAnswer(invocation -> Instant.now()).when(clock).instant(); } + public void testFloodThreadpool() throws Exception { + // We're going to be blocking the security-crypto threadpool so we need a new one for the client + ThreadPool clientThreadpool = new TestThreadPool( + this.getTestName(), + new FixedExecutorBuilder( + Settings.EMPTY, + this.getTestName(), + 1, + 100, + "no_settings_used", + EsExecutors.TaskTrackingConfig.DO_NOT_TRACK + ) + ); + try { + when(client.threadPool()).thenReturn(clientThreadpool); + + // setup copied from testAuthenticateWithApiKey + final Settings settings = Settings.builder().put(XPackSettings.API_KEY_SERVICE_ENABLED_SETTING.getKey(), true).build(); + final ApiKeyService service = createApiKeyService(settings); + + final String id = randomAlphaOfLength(12); + final String key = randomAlphaOfLength(16); + + final User user, authUser; + if (randomBoolean()) { + user = new User("hulk", new String[] { "superuser" }, "Bruce Banner", "hulk@test.com", Map.of(), true); + authUser = new User("authenticated_user", "other"); + } else { + user = new User("hulk", new String[] { "superuser" }, "Bruce Banner", "hulk@test.com", Map.of(), true); + authUser = null; + } + final ApiKey.Type type = randomFrom(ApiKey.Type.values()); + final Map metadata = mockKeyDocument(id, key, user, authUser, false, Duration.ofSeconds(3600), null, type); + + // Block the security crypto threadpool + CyclicBarrier barrier = new CyclicBarrier(2); + threadPool.executor(SECURITY_CRYPTO_THREAD_POOL_NAME).execute(() -> safeAwait(barrier)); + // Now fill it up while the one thread is blocked + for (int i = 0; i < TEST_THREADPOOL_QUEUE_SIZE; i++) { + threadPool.executor(SECURITY_CRYPTO_THREAD_POOL_NAME).execute(() -> {}); + } + + // Check that it's full + for (var stat : threadPool.stats().stats()) { + if (stat.name().equals(SECURITY_CRYPTO_THREAD_POOL_NAME)) { + assertThat(stat.queue(), equalTo(TEST_THREADPOOL_QUEUE_SIZE)); + assertThat(stat.rejected(), equalTo(0L)); + } + } + + // now try to auth with an API key + final AuthenticationResult auth = tryAuthenticate(service, id, key, type); + assertThat(auth.getStatus(), is(AuthenticationResult.Status.TERMINATE)); + + // Make sure one was rejected and the queue is still full + for (var stat : threadPool.stats().stats()) { + if (stat.name().equals(SECURITY_CRYPTO_THREAD_POOL_NAME)) { + assertThat(stat.queue(), equalTo(TEST_THREADPOOL_QUEUE_SIZE)); + assertThat(stat.rejected(), equalTo(1L)); + } + } + ListenableFuture cachedValue = service.getApiKeyAuthCache().get(id); + assertThat("since the request was rejected, there should be no cache entry for this key", cachedValue, nullValue()); + + // unblock the threadpool + safeAwait(barrier); + + // wait for the threadpool queue to drain & check that the stats as as expected + flushThreadPoolExecutor(threadPool, SECURITY_CRYPTO_THREAD_POOL_NAME); + for (var stat : threadPool.stats().stats()) { + if (stat.name().equals(SECURITY_CRYPTO_THREAD_POOL_NAME)) { + assertThat(stat.rejected(), equalTo(1L)); + assertThat(stat.queue(), equalTo(0)); + } + } + + // try to authenticate again with the same key - if this hangs, check the future caching + final AuthenticationResult shouldSucceed = tryAuthenticate(service, id, key, type); + assertThat(shouldSucceed.getStatus(), is(AuthenticationResult.Status.SUCCESS)); + } finally { + terminate(clientThreadpool); + } + } + public void testCreateApiKeyUsesBulkIndexAction() throws Exception { final Settings settings = Settings.builder().put(XPackSettings.API_KEY_SERVICE_ENABLED_SETTING.getKey(), true).build(); final ApiKeyService service = createApiKeyService(settings); diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/AuthenticationServiceTests.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/AuthenticationServiceTests.java index 62b72b4f9750c..85a1dc1aa029d 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/AuthenticationServiceTests.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/AuthenticationServiceTests.java @@ -1500,7 +1500,7 @@ public void testRealmAuthenticateTerminateAuthenticationProcessWithException() { final boolean throwElasticsearchSecurityException = randomBoolean(); final boolean withAuthenticateHeader = throwElasticsearchSecurityException && randomBoolean(); Exception throwE = new Exception("general authentication error"); - final String basicScheme = "Basic realm=\"" + XPackField.SECURITY + "\" charset=\"UTF-8\""; + final String basicScheme = "Basic realm=\"" + XPackField.SECURITY + "\", charset=\"UTF-8\""; String selectedScheme = randomFrom(basicScheme, "Negotiate IOJoj"); if (throwElasticsearchSecurityException) { throwE = new ElasticsearchSecurityException("authentication error", RestStatus.UNAUTHORIZED); @@ -1547,7 +1547,7 @@ public void testRealmAuthenticateGracefulTerminateAuthenticationProcess() { when(token.principal()).thenReturn(principal); when(firstRealm.token(threadContext)).thenReturn(token); when(firstRealm.supports(token)).thenReturn(true); - final String basicScheme = "Basic realm=\"" + XPackField.SECURITY + "\" charset=\"UTF-8\""; + final String basicScheme = "Basic realm=\"" + XPackField.SECURITY + "\", charset=\"UTF-8\""; mockAuthenticate(firstRealm, token, null, true); ElasticsearchSecurityException e = expectThrows( diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtAuthenticatorTests.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtAuthenticatorTests.java index 7a44ebae95738..6d4861212e286 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtAuthenticatorTests.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtAuthenticatorTests.java @@ -24,7 +24,6 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.core.security.authc.RealmConfig; import org.elasticsearch.xpack.core.security.authc.RealmSettings; -import org.elasticsearch.xpack.core.security.authc.jwt.JwtAuthenticationToken; import org.elasticsearch.xpack.core.security.authc.jwt.JwtRealmSettings; import org.elasticsearch.xpack.core.ssl.SSLService; import org.junit.Before; diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtIssuer.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtIssuer.java index 3d4d9eae6acd0..789ac04c40622 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtIssuer.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtIssuer.java @@ -14,7 +14,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.xpack.core.security.authc.jwt.JwtRealmSettings; -import org.elasticsearch.xpack.core.security.authc.jwt.JwtUtil; import org.elasticsearch.xpack.core.security.user.User; import java.io.Closeable; diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealmAuthenticateTests.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealmAuthenticateTests.java index bf6c64242701b..4f7b82a16e8f1 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealmAuthenticateTests.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealmAuthenticateTests.java @@ -25,7 +25,6 @@ import org.elasticsearch.xpack.core.security.authc.AuthenticationToken; import org.elasticsearch.xpack.core.security.authc.Realm; import org.elasticsearch.xpack.core.security.authc.RealmSettings; -import org.elasticsearch.xpack.core.security.authc.jwt.JwtAuthenticationToken; import org.elasticsearch.xpack.core.security.authc.jwt.JwtRealmSettings; import org.elasticsearch.xpack.core.security.user.User; diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealmGenerateTests.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealmGenerateTests.java index 7a0e138305b83..8a5daa642002e 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealmGenerateTests.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealmGenerateTests.java @@ -23,7 +23,6 @@ import org.elasticsearch.xpack.core.security.authc.RealmConfig; import org.elasticsearch.xpack.core.security.authc.RealmSettings; import org.elasticsearch.xpack.core.security.authc.jwt.JwtRealmSettings; -import org.elasticsearch.xpack.core.security.authc.jwt.JwtUtil; import org.elasticsearch.xpack.core.security.authc.support.DelegatedAuthorizationSettings; import org.elasticsearch.xpack.core.security.authc.support.UserRoleMapper; import org.elasticsearch.xpack.core.security.user.User; diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealmInspector.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealmInspector.java index 40a613a0907c8..7697849179acf 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealmInspector.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealmInspector.java @@ -11,7 +11,6 @@ import org.elasticsearch.common.settings.SecureString; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.xpack.core.security.authc.jwt.JwtRealmSettings; -import org.elasticsearch.xpack.core.security.authc.jwt.JwtUtil; import org.elasticsearch.xpack.core.security.authc.support.ClaimSetting; import java.net.URI; diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealmTestCase.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealmTestCase.java index 1bc49cb628464..ffc1fec1f5788 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealmTestCase.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtRealmTestCase.java @@ -28,7 +28,6 @@ import org.elasticsearch.xpack.core.security.authc.Realm; import org.elasticsearch.xpack.core.security.authc.RealmConfig; import org.elasticsearch.xpack.core.security.authc.RealmSettings; -import org.elasticsearch.xpack.core.security.authc.jwt.JwtAuthenticationToken; import org.elasticsearch.xpack.core.security.authc.jwt.JwtRealmSettings; import org.elasticsearch.xpack.core.security.authc.jwt.JwtRealmSettings.ClientAuthenticationType; import org.elasticsearch.xpack.core.security.authc.support.DelegatedAuthorizationSettings; diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtUtilTests.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtUtilTests.java index 7d90dffd7517c..6fab33b4d6adf 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtUtilTests.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authc/jwt/JwtUtilTests.java @@ -10,7 +10,6 @@ import org.elasticsearch.common.settings.SecureString; import org.elasticsearch.common.settings.SettingsException; import org.elasticsearch.xpack.core.security.authc.jwt.JwtRealmSettings; -import org.elasticsearch.xpack.core.security.authc.jwt.JwtUtil; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authz/store/NativeRolesStoreTests.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authz/store/NativeRolesStoreTests.java index a4ee449438fe0..bfa358d0b7d6e 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authz/store/NativeRolesStoreTests.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/authz/store/NativeRolesStoreTests.java @@ -55,6 +55,7 @@ import org.elasticsearch.threadpool.TestThreadPool; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xcontent.NamedXContentRegistry; +import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentType; @@ -78,6 +79,7 @@ import org.mockito.Mockito; import java.io.IOException; +import java.lang.reflect.Field; import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Path; @@ -138,7 +140,7 @@ private NativeRolesStore createRoleStoreForTest() { private NativeRolesStore createRoleStoreForTest(Settings settings) { new ReservedRolesStore(Set.of("superuser")); - final ClusterService clusterService = mock(ClusterService.class); + final ClusterService clusterService = mockClusterServiceWithMinNodeVersion(TransportVersion.current()); final SecuritySystemIndices systemIndices = new SecuritySystemIndices(settings); final FeatureService featureService = mock(FeatureService.class); systemIndices.init(client, featureService, clusterService); @@ -807,6 +809,62 @@ public void testBulkDeleteReservedRole() { verify(client, times(0)).bulk(any(BulkRequest.class), any()); } + /** + * Make sure all top level fields for a RoleDescriptor have default values to make sure they can be set to empty in an upsert + * call to the roles API + */ + public void testAllTopFieldsHaveEmptyDefaultsForUpsert() throws IOException, IllegalAccessException { + final NativeRolesStore rolesStore = createRoleStoreForTest(); + RoleDescriptor allNullDescriptor = new RoleDescriptor( + "all-null-descriptor", + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null + ); + + Set fieldsWithoutDefaultValue = Set.of( + RoleDescriptor.Fields.INDEX, + RoleDescriptor.Fields.NAMES, + RoleDescriptor.Fields.ALLOW_RESTRICTED_INDICES, + RoleDescriptor.Fields.RESOURCES, + RoleDescriptor.Fields.QUERY, + RoleDescriptor.Fields.PRIVILEGES, + RoleDescriptor.Fields.CLUSTERS, + RoleDescriptor.Fields.APPLICATION, + RoleDescriptor.Fields.FIELD_PERMISSIONS, + RoleDescriptor.Fields.FIELD_PERMISSIONS_2X, + RoleDescriptor.Fields.GRANT_FIELDS, + RoleDescriptor.Fields.EXCEPT_FIELDS, + RoleDescriptor.Fields.METADATA_FLATTENED, + RoleDescriptor.Fields.TRANSIENT_METADATA, + RoleDescriptor.Fields.RESTRICTION, + RoleDescriptor.Fields.WORKFLOWS + ); + + String serializedOutput = Strings.toString(rolesStore.createRoleXContentBuilder(allNullDescriptor)); + Field[] fields = RoleDescriptor.Fields.class.getFields(); + + for (Field field : fields) { + ParseField fieldValue = (ParseField) field.get(null); + if (fieldsWithoutDefaultValue.contains(fieldValue) == false) { + assertThat( + "New RoleDescriptor field without a default value detected. " + + "Set a value or add to excluded list if not expected to be set to empty through role APIs", + serializedOutput, + containsString(fieldValue.getPreferredName()) + ); + } + } + } + private ClusterService mockClusterServiceWithMinNodeVersion(TransportVersion transportVersion) { final ClusterService clusterService = mock(ClusterService.class, Mockito.RETURNS_DEEP_STUBS); when(clusterService.state().getMinTransportVersion()).thenReturn(transportVersion); diff --git a/x-pack/plugin/sql/sql-client/src/test/java/org/elasticsearch/xpack/sql/client/RemoteFailureTests.java b/x-pack/plugin/sql/sql-client/src/test/java/org/elasticsearch/xpack/sql/client/RemoteFailureTests.java index d093332e48422..258a738a076c8 100644 --- a/x-pack/plugin/sql/sql-client/src/test/java/org/elasticsearch/xpack/sql/client/RemoteFailureTests.java +++ b/x-pack/plugin/sql/sql-client/src/test/java/org/elasticsearch/xpack/sql/client/RemoteFailureTests.java @@ -61,7 +61,7 @@ public void testParseMissingAuth() throws IOException { assertEquals("missing authentication token for REST request [/?pretty&error_trace]", failure.reason()); assertThat(failure.remoteTrace(), containsString("DefaultAuthenticationFailureHandler.missingToken")); assertNull(failure.cause()); - assertEquals(singletonMap("WWW-Authenticate", "Basic realm=\"security\" charset=\"UTF-8\""), failure.headers()); + assertEquals(singletonMap("WWW-Authenticate", "Basic realm=\"security\", charset=\"UTF-8\""), failure.headers()); } public void testNoError() { diff --git a/x-pack/plugin/sql/sql-client/src/test/resources/remote_failure/missing_auth.json b/x-pack/plugin/sql/sql-client/src/test/resources/remote_failure/missing_auth.json index 3d2927f85d6b1..d21fece75f7ac 100644 --- a/x-pack/plugin/sql/sql-client/src/test/resources/remote_failure/missing_auth.json +++ b/x-pack/plugin/sql/sql-client/src/test/resources/remote_failure/missing_auth.json @@ -5,7 +5,7 @@ "type" : "security_exception", "reason" : "missing authentication token for REST request [/?pretty&error_trace]", "header" : { - "WWW-Authenticate" : "Basic realm=\"security\" charset=\"UTF-8\"" + "WWW-Authenticate" : "Basic realm=\"security\", charset=\"UTF-8\"" }, "stack_trace" : "ElasticsearchSecurityException[missing authentication token for REST request [/?pretty&error_trace]]\n\tat org.elasticsearch.xpack.security.support.Exceptions.authenticationError(Exceptions.java:36)\n\tat org.elasticsearch.xpack.security.authc.DefaultAuthenticationFailureHandler.missingToken(DefaultAuthenticationFailureHandler.java:69)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$AuditableRestRequest.anonymousAccessDenied(AuthenticationService.java:603)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$handleNullToken$17(AuthenticationService.java:357)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.handleNullToken(AuthenticationService.java:362)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.consumeToken(AuthenticationService.java:277)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$extractToken$7(AuthenticationService.java:249)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.extractToken(AuthenticationService.java:266)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$null$0(AuthenticationService.java:201)\n\tat org.elasticsearch.action.ActionListener$1.onResponse(ActionListener.java:59)\n\tat org.elasticsearch.xpack.security.authc.TokenService.getAndValidateToken(TokenService.java:230)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$authenticateAsync$2(AuthenticationService.java:197)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$lookForExistingAuthentication$4(AuthenticationService.java:228)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lookForExistingAuthentication(AuthenticationService.java:239)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.authenticateAsync(AuthenticationService.java:193)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.access$000(AuthenticationService.java:147)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService.authenticate(AuthenticationService.java:99)\n\tat org.elasticsearch.xpack.security.rest.SecurityRestFilter.handleRequest(SecurityRestFilter.java:69)\n\tat org.elasticsearch.rest.RestController.dispatchRequest(RestController.java:240)\n\tat org.elasticsearch.rest.RestController.tryAllHandlers(RestController.java:336)\n\tat org.elasticsearch.rest.RestController.dispatchRequest(RestController.java:174)\n\tat org.elasticsearch.http.netty4.Netty4HttpServerTransport.dispatchRequest(Netty4HttpServerTransport.java:469)\n\tat org.elasticsearch.http.netty4.Netty4HttpRequestHandler.channelRead0(Netty4HttpRequestHandler.java:80)\n\tat io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)\n\tat org.elasticsearch.http.netty4.pipelining.HttpPipeliningHandler.channelRead(HttpPipeliningHandler.java:68)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)\n\tat io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)\n\tat io.netty.handler.codec.MessageToMessageCodec.channelRead(MessageToMessageCodec.java:111)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)\n\tat io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)\n\tat io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)\n\tat io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:310)\n\tat io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:284)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)\n\tat io.netty.channel.ChannelInboundHandlerAdapter.channelRead(ChannelInboundHandlerAdapter.java:86)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)\n\tat io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1334)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)\n\tat io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:926)\n\tat io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:134)\n\tat io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:644)\n\tat io.netty.channel.nio.NioEventLoop.processSelectedKeysPlain(NioEventLoop.java:544)\n\tat io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:498)\n\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:458)\n\tat io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:858)\n\tat java.lang.Thread.run(Thread.java:748)\n" } @@ -13,7 +13,7 @@ "type" : "security_exception", "reason" : "missing authentication token for REST request [/?pretty&error_trace]", "header" : { - "WWW-Authenticate" : "Basic realm=\"security\" charset=\"UTF-8\"" + "WWW-Authenticate" : "Basic realm=\"security\", charset=\"UTF-8\"" }, "stack_trace" : "ElasticsearchSecurityException[missing authentication token for REST request [/?pretty&error_trace]]\n\tat org.elasticsearch.xpack.security.support.Exceptions.authenticationError(Exceptions.java:36)\n\tat org.elasticsearch.xpack.security.authc.DefaultAuthenticationFailureHandler.missingToken(DefaultAuthenticationFailureHandler.java:69)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$AuditableRestRequest.anonymousAccessDenied(AuthenticationService.java:603)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$handleNullToken$17(AuthenticationService.java:357)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.handleNullToken(AuthenticationService.java:362)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.consumeToken(AuthenticationService.java:277)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$extractToken$7(AuthenticationService.java:249)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.extractToken(AuthenticationService.java:266)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$null$0(AuthenticationService.java:201)\n\tat org.elasticsearch.action.ActionListener$1.onResponse(ActionListener.java:59)\n\tat org.elasticsearch.xpack.security.authc.TokenService.getAndValidateToken(TokenService.java:230)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$authenticateAsync$2(AuthenticationService.java:197)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lambda$lookForExistingAuthentication$4(AuthenticationService.java:228)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.lookForExistingAuthentication(AuthenticationService.java:239)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.authenticateAsync(AuthenticationService.java:193)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService$Authenticator.access$000(AuthenticationService.java:147)\n\tat org.elasticsearch.xpack.security.authc.AuthenticationService.authenticate(AuthenticationService.java:99)\n\tat org.elasticsearch.xpack.security.rest.SecurityRestFilter.handleRequest(SecurityRestFilter.java:69)\n\tat org.elasticsearch.rest.RestController.dispatchRequest(RestController.java:240)\n\tat org.elasticsearch.rest.RestController.tryAllHandlers(RestController.java:336)\n\tat org.elasticsearch.rest.RestController.dispatchRequest(RestController.java:174)\n\tat org.elasticsearch.http.netty4.Netty4HttpServerTransport.dispatchRequest(Netty4HttpServerTransport.java:469)\n\tat org.elasticsearch.http.netty4.Netty4HttpRequestHandler.channelRead0(Netty4HttpRequestHandler.java:80)\n\tat io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)\n\tat org.elasticsearch.http.netty4.pipelining.HttpPipeliningHandler.channelRead(HttpPipeliningHandler.java:68)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)\n\tat io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)\n\tat io.netty.handler.codec.MessageToMessageCodec.channelRead(MessageToMessageCodec.java:111)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)\n\tat io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)\n\tat io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)\n\tat io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:310)\n\tat io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:284)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)\n\tat io.netty.channel.ChannelInboundHandlerAdapter.channelRead(ChannelInboundHandlerAdapter.java:86)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)\n\tat io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:340)\n\tat io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1334)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)\n\tat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:348)\n\tat io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:926)\n\tat io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:134)\n\tat io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:644)\n\tat io.netty.channel.nio.NioEventLoop.processSelectedKeysPlain(NioEventLoop.java:544)\n\tat io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:498)\n\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:458)\n\tat io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:858)\n\tat java.lang.Thread.run(Thread.java:748)\n" }, diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/140_metadata.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/140_metadata.yml index d6c1c6c97944a..33c9cc7558672 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/140_metadata.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/140_metadata.yml @@ -5,7 +5,7 @@ setup: - method: POST path: /_query parameters: [method, path, parameters, capabilities] - capabilities: [metadata_fields, metadata_field_ignored] + capabilities: [metadata_fields, metadata_ignored_field] reason: "Ignored metadata field capability required" - do: @@ -140,3 +140,18 @@ setup: - match: {columns.0.name: "count_distinct(_ignored)"} - match: {columns.0.type: "long"} - match: {values.0.0: 2} + +--- +"Count_distinct on _source is a 400 error": + - requires: + capabilities: + - method: POST + path: /_query + parameters: [method, path, parameters, capabilities] + capabilities: [fix_count_distinct_source_error] + reason: "Capability marks fixing this bug" + - do: + catch: bad_request + esql.query: + body: + query: 'FROM test [metadata _source] | STATS COUNT_DISTINCT(_source)' diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/160_union_types.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/160_union_types.yml index f3403ca8751c0..92b3f4d1b084d 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/160_union_types.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/160_union_types.yml @@ -4,8 +4,8 @@ setup: - method: POST path: /_query parameters: [method, path, parameters, capabilities] - capabilities: [union_types] - reason: "Union types introduced in 8.15.0" + capabilities: [union_types, union_types_remove_fields, casting_operator] + reason: "Union types and casting operator introduced in 8.15.0" test_runner_features: [capabilities, allowed_warnings_regex] - do: @@ -147,6 +147,9 @@ setup: - '{"index": {}}' - '{"@timestamp": "2023-10-23T12:15:03.360Z", "client_ip": "172.21.2.162", "event_duration": "3450233", "message": "Connected to 10.1.0.3"}' +############################################################################################################ +# Test a single index as a control of the expected results + --- load single index ip_long: - do: @@ -173,9 +176,6 @@ load single index ip_long: - match: { values.0.3: 1756467 } - match: { values.0.4: "Connected to 10.1.0.1" } -############################################################################################################ -# Test a single index as a control of the expected results - --- load single index keyword_keyword: - do: @@ -202,6 +202,62 @@ load single index keyword_keyword: - match: { values.0.3: "1756467" } - match: { values.0.4: "Connected to 10.1.0.1" } +--- +load single index ip_long and aggregate by client_ip: + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM events_ip_long | STATS count = COUNT(*) BY client_ip::ip | SORT count DESC, `client_ip::ip` ASC' + + - match: { columns.0.name: "count" } + - match: { columns.0.type: "long" } + - match: { columns.1.name: "client_ip::ip" } + - match: { columns.1.type: "ip" } + - length: { values: 4 } + - match: { values.0.0: 4 } + - match: { values.0.1: "172.21.3.15" } + - match: { values.1.0: 1 } + - match: { values.1.1: "172.21.0.5" } + - match: { values.2.0: 1 } + - match: { values.2.1: "172.21.2.113" } + - match: { values.3.0: 1 } + - match: { values.3.1: "172.21.2.162" } + +--- +load single index ip_long and aggregate client_ip my message: + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM events_ip_long | STATS count = COUNT(client_ip::ip) BY message | SORT count DESC, message ASC' + + - match: { columns.0.name: "count" } + - match: { columns.0.type: "long" } + - match: { columns.1.name: "message" } + - match: { columns.1.type: "keyword" } + - length: { values: 5 } + - match: { values.0.0: 3 } + - match: { values.0.1: "Connection error" } + - match: { values.1.0: 1 } + - match: { values.1.1: "Connected to 10.1.0.1" } + - match: { values.2.0: 1 } + - match: { values.2.1: "Connected to 10.1.0.2" } + - match: { values.3.0: 1 } + - match: { values.3.1: "Connected to 10.1.0.3" } + - match: { values.4.0: 1 } + - match: { values.4.1: "Disconnected" } + +--- +load single index ip_long stats invalid grouping: + - do: + catch: '/Unknown column \[x\]/' + esql.query: + body: + query: 'FROM events_ip_long | STATS count = COUNT(client_ip::ip) BY x' + ############################################################################################################ # Test two indices where the event_duration is mapped as a LONG and as a KEYWORD @@ -240,12 +296,44 @@ load two indices, showing unsupported type and null value for event_duration: --- load two indices with no conversion function, but needs TO_LONG conversion: + - requires: + test_runner_features: [capabilities] + capabilities: + - method: POST + path: /_query + parameters: [] + capabilities: [union_types_fix_rename_resolution] + reason: "Union type resolution fix for rename also allows direct usage of unsupported fields in KEEP" + - do: - catch: '/Cannot use field \[event_duration\] due to ambiguities being mapped as \[2\] incompatible types: \[keyword\] in \[events_ip_keyword\], \[long\] in \[events_ip_long\]/' + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" esql.query: body: query: 'FROM events_ip_* METADATA _index | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC' + - match: { columns.0.name: "_index" } + - match: { columns.0.type: "keyword" } + - match: { columns.1.name: "@timestamp" } + - match: { columns.1.type: "date" } + - match: { columns.2.name: "client_ip" } + - match: { columns.2.type: "ip" } + - match: { columns.3.name: "event_duration" } + - match: { columns.3.type: "unsupported" } + - match: { columns.4.name: "message" } + - match: { columns.4.type: "keyword" } + - length: { values: 14 } + - match: { values.0.0: "events_ip_keyword" } + - match: { values.0.1: "2023-10-23T13:55:01.543Z" } + - match: { values.0.2: "172.21.3.15" } + - match: { values.0.3: null } + - match: { values.0.4: "Connected to 10.1.0.1" } + - match: { values.7.0: "events_ip_long" } + - match: { values.7.1: "2023-10-23T13:55:01.543Z" } + - match: { values.7.2: "172.21.3.15" } + - match: { values.7.3: null } + - match: { values.7.4: "Connected to 10.1.0.1" } + --- load two indices with incorrect conversion function, TO_IP instead of TO_LONG: - do: @@ -394,12 +482,44 @@ load two indices, showing unsupported type and null value for client_ip: --- load two indices with no conversion function, but needs TO_IP conversion: + - requires: + test_runner_features: [capabilities] + capabilities: + - method: POST + path: /_query + parameters: [] + capabilities: [union_types_fix_rename_resolution] + reason: "Union type resolution fix for rename also allows direct usage of unsupported fields in KEEP" + - do: - catch: '/Cannot use field \[client_ip\] due to ambiguities being mapped as \[2\] incompatible types: \[ip\] in \[events_ip_long\], \[keyword\] in \[events_keyword_long\]/' + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" esql.query: body: query: 'FROM events_*_long METADATA _index | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC' + - match: { columns.0.name: "_index" } + - match: { columns.0.type: "keyword" } + - match: { columns.1.name: "@timestamp" } + - match: { columns.1.type: "date" } + - match: { columns.2.name: "client_ip" } + - match: { columns.2.type: "unsupported" } + - match: { columns.3.name: "event_duration" } + - match: { columns.3.type: "long" } + - match: { columns.4.name: "message" } + - match: { columns.4.type: "keyword" } + - length: { values: 14 } + - match: { values.0.0: "events_ip_long" } + - match: { values.0.1: "2023-10-23T13:55:01.543Z" } + - match: { values.0.2: null } + - match: { values.0.3: 1756467 } + - match: { values.0.4: "Connected to 10.1.0.1" } + - match: { values.7.0: "events_keyword_long" } + - match: { values.7.1: "2023-10-23T13:55:01.543Z" } + - match: { values.7.2: null } + - match: { values.7.3: 1756467 } + - match: { values.7.4: "Connected to 10.1.0.1" } + --- load two indices with incorrect conversion function, TO_LONG instead of TO_IP: - do: @@ -512,25 +632,164 @@ load two indices, convert, rename but not drop ambiguous field client_ip: - match: { values.1.5: "172.21.3.15" } - match: { values.1.6: "172.21.3.15" } +--- +load two indexes and group by converted client_ip: + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM events_*_long | STATS count = COUNT(*) BY client_ip::ip | SORT count DESC, `client_ip::ip` ASC' + + - match: { columns.0.name: "count" } + - match: { columns.0.type: "long" } + - match: { columns.1.name: "client_ip::ip" } + - match: { columns.1.type: "ip" } + - length: { values: 4 } + - match: { values.0.0: 8 } + - match: { values.0.1: "172.21.3.15" } + - match: { values.1.0: 2 } + - match: { values.1.1: "172.21.0.5" } + - match: { values.2.0: 2 } + - match: { values.2.1: "172.21.2.113" } + - match: { values.3.0: 2 } + - match: { values.3.1: "172.21.2.162" } + +--- +load two indexes and aggregate converted client_ip: + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM events_*_long | STATS count = COUNT(client_ip::ip) BY message | SORT count DESC, message ASC' + + - match: { columns.0.name: "count" } + - match: { columns.0.type: "long" } + - match: { columns.1.name: "message" } + - match: { columns.1.type: "keyword" } + - length: { values: 5 } + - match: { values.0.0: 6 } + - match: { values.0.1: "Connection error" } + - match: { values.1.0: 2 } + - match: { values.1.1: "Connected to 10.1.0.1" } + - match: { values.2.0: 2 } + - match: { values.2.1: "Connected to 10.1.0.2" } + - match: { values.3.0: 2 } + - match: { values.3.1: "Connected to 10.1.0.3" } + - match: { values.4.0: 2 } + - match: { values.4.1: "Disconnected" } + +--- +load two indexes, convert client_ip and group by something invalid: + - do: + catch: '/Unknown column \[x\]/' + esql.query: + body: + query: 'FROM events_*_long | STATS count = COUNT(client_ip::ip) BY x' + ############################################################################################################ # Test four indices with both the client_IP (IP and KEYWORD) and event_duration (LONG and KEYWORD) mappings --- load four indices with single conversion function TO_LONG: + - requires: + test_runner_features: [capabilities] + capabilities: + - method: POST + path: /_query + parameters: [] + capabilities: [union_types_fix_rename_resolution] + reason: "Union type resolution fix for rename also allows direct usage of unsupported fields in KEEP" + - do: - catch: '/Cannot use field \[client_ip\] due to ambiguities being mapped as \[2\] incompatible types: \[ip\] in \[events_ip_keyword, events_ip_long\], \[keyword\] in \[events_keyword_keyword, events_keyword_long\]/' + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" esql.query: body: query: 'FROM events_* METADATA _index | EVAL event_duration = TO_LONG(event_duration) | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC' + - match: { columns.0.name: "_index" } + - match: { columns.0.type: "keyword" } + - match: { columns.1.name: "@timestamp" } + - match: { columns.1.type: "date" } + - match: { columns.2.name: "client_ip" } + - match: { columns.2.type: "unsupported" } + - match: { columns.3.name: "event_duration" } + - match: { columns.3.type: "long" } + - match: { columns.4.name: "message" } + - match: { columns.4.type: "keyword" } + - length: { values: 28 } + - match: { values.0.0: "events_ip_keyword" } + - match: { values.0.1: "2023-10-23T13:55:01.543Z" } + - match: { values.0.2: null } + - match: { values.0.3: 1756467 } + - match: { values.0.4: "Connected to 10.1.0.1" } + - match: { values.7.0: "events_ip_long" } + - match: { values.7.1: "2023-10-23T13:55:01.543Z" } + - match: { values.7.2: null } + - match: { values.7.3: 1756467 } + - match: { values.7.4: "Connected to 10.1.0.1" } + - match: { values.14.0: "events_keyword_keyword" } + - match: { values.14.1: "2023-10-23T13:55:01.543Z" } + - match: { values.14.2: null } + - match: { values.14.3: 1756467 } + - match: { values.14.4: "Connected to 10.1.0.1" } + - match: { values.21.0: "events_keyword_long" } + - match: { values.21.1: "2023-10-23T13:55:01.543Z" } + - match: { values.21.2: null } + - match: { values.21.3: 1756467 } + - match: { values.21.4: "Connected to 10.1.0.1" } + --- load four indices with single conversion function TO_IP: + - requires: + test_runner_features: [capabilities] + capabilities: + - method: POST + path: /_query + parameters: [] + capabilities: [union_types_fix_rename_resolution] + reason: "Union type resolution fix for rename also allows direct usage of unsupported fields in KEEP" + - do: - catch: '/Cannot use field \[event_duration\] due to ambiguities being mapped as \[2\] incompatible types: \[keyword\] in \[events_ip_keyword, events_keyword_keyword\], \[long\] in \[events_ip_long, events_keyword_long\]/' + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" esql.query: body: query: 'FROM events_* METADATA _index | EVAL client_ip = TO_IP(client_ip) | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC' + - match: { columns.0.name: "_index" } + - match: { columns.0.type: "keyword" } + - match: { columns.1.name: "@timestamp" } + - match: { columns.1.type: "date" } + - match: { columns.2.name: "client_ip" } + - match: { columns.2.type: "ip" } + - match: { columns.3.name: "event_duration" } + - match: { columns.3.type: "unsupported" } + - match: { columns.4.name: "message" } + - match: { columns.4.type: "keyword" } + - length: { values: 28 } + - match: { values.0.0: "events_ip_keyword" } + - match: { values.0.1: "2023-10-23T13:55:01.543Z" } + - match: { values.0.2: "172.21.3.15" } + - match: { values.0.3: null } + - match: { values.0.4: "Connected to 10.1.0.1" } + - match: { values.7.0: "events_ip_long" } + - match: { values.7.1: "2023-10-23T13:55:01.543Z" } + - match: { values.7.2: "172.21.3.15" } + - match: { values.7.3: null } + - match: { values.7.4: "Connected to 10.1.0.1" } + - match: { values.14.0: "events_keyword_keyword" } + - match: { values.14.1: "2023-10-23T13:55:01.543Z" } + - match: { values.14.2: "172.21.3.15" } + - match: { values.14.3: null } + - match: { values.14.4: "Connected to 10.1.0.1" } + - match: { values.21.0: "events_keyword_long" } + - match: { values.21.1: "2023-10-23T13:55:01.543Z" } + - match: { values.21.2: "172.21.3.15" } + - match: { values.21.3: null } + - match: { values.21.4: "Connected to 10.1.0.1" } --- load four indices with multiple conversion functions TO_LONG and TO_IP: - do: diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/161_union_types_subfields.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/161_union_types_subfields.yml index 99bd1d6508895..ccf6512ca1ff7 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/161_union_types_subfields.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/161_union_types_subfields.yml @@ -4,7 +4,7 @@ setup: - method: POST path: /_query parameters: [ method, path, parameters, capabilities ] - capabilities: [ union_types ] + capabilities: [ union_types, union_types_remove_fields ] reason: "Union types introduced in 8.15.0" test_runner_features: [ capabilities, allowed_warnings_regex ] diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/40_tsdb.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/40_tsdb.yml index 8f291600acbf6..642407ac6d45b 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/40_tsdb.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/40_tsdb.yml @@ -253,12 +253,24 @@ from index pattern unsupported counter: --- from index pattern explicit counter use: + - requires: + test_runner_features: [capabilities] + capabilities: + - method: POST + path: /_query + parameters: [] + capabilities: [union_types_fix_rename_resolution] + reason: "Union type resolution fix for rename also allows direct usage of unsupported fields in KEEP" + - do: - catch: '/Cannot use field \[k8s.pod.network.tx\] due to ambiguities being mapped as different metric types in indices: \[test, test2\]/' + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" esql.query: body: query: 'FROM test* | keep *.tx' - + - match: {columns.0.name: "k8s.pod.network.tx"} + - match: {columns.0.type: "unsupported"} + - length: {values: 10} --- _source: diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rollup/security_tests.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rollup/security_tests.yml deleted file mode 100644 index 5bae9469a3c47..0000000000000 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/rollup/security_tests.yml +++ /dev/null @@ -1,191 +0,0 @@ -setup: - - skip: - features: headers - - - do: - cluster.health: - wait_for_status: yellow - - - do: - indices.create: - index: dummy-rollup-index - body: - settings: - number_of_shards: 1 - number_of_replicas: 0 - mappings: - _meta: - _rollup: - my-id: { } - ---- -teardown: - - do: - security.delete_user: - username: "test_user" - ignore: 404 - - - do: - security.delete_role: - name: "foo_only_access" - ignore: 404 - ---- -"Index-based access": - - - do: - security.put_role: - name: "foo_only_access" - body: > - { - "cluster": [ "all" ], - "indices": [ - { "names": ["foo"], "privileges": ["all"] }, - { "names": ["rollup"], "privileges": ["all"] } - ] - } - - - do: - security.put_user: - username: "test_user" - body: > - { - "password" : "x-pack-test-password", - "roles" : [ "foo_only_access" ], - "full_name" : "foo only" - } - - - do: - indices.create: - index: foo - body: - mappings: - properties: - timestamp: - type: date - value_field: - type: integer - - do: - headers: - Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser - index: - index: foo - body: - timestamp: 123 - value_field: 1232 - - - do: - indices.create: - index: foobar - body: - mappings: - properties: - timestamp: - type: date - value_field: - type: integer - - do: - headers: - Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser - index: - index: foobar - body: - timestamp: 123 - value_field: 456 - - - do: - headers: { Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" } # test_user - indices.refresh: - index: foo - - # This index pattern will match both indices, but we only have permission to read one - - do: - headers: { Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" } # test_user - rollup.put_job: - id: foo - body: > - { - "index_pattern": "foo*", - "rollup_index": "rollup", - "cron": "*/1 * * * * ?", - "page_size" :10, - "groups" : { - "date_histogram": { - "field": "timestamp", - "interval": "1s" - } - }, - "metrics": [ - { - "field": "value_field", - "metrics": ["min", "max", "sum"] - } - ] - } - - - is_true: acknowledged - - - do: - headers: { Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" } # test_user - rollup.start_job: - id: foo - - is_true: started - - - do: - headers: { Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" } # test_user - indices.refresh: - index: rollup - - # this is a hacky way to sleep for 5s, since we will never have 10 nodes - - do: - catch: request_timeout - cluster.health: - wait_for_nodes: 10 - timeout: "5s" - - match: - timed_out: true - - - do: - headers: { Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" } # test_user - rollup.get_jobs: - id: foo - - match: - jobs.0.stats.documents_processed: 1 - - - do: - headers: { Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" } # test_user - search: - rest_total_hits_as_int: true - index: foo - body: - query: - match_all: {} - - - match: - hits.total: 1 - - - do: - headers: { Authorization: "Basic dGVzdF91c2VyOngtcGFjay10ZXN0LXBhc3N3b3Jk" } # test_user - search: - rest_total_hits_as_int: true - index: rollup - body: - query: - match_all: {} - - - match: - hits.total: 1 - - match: - hits.hits.0._id: "foo$VxMkzTqILshClbtbFi4-rQ" - - match: - hits.hits.0._source: - timestamp.date_histogram.time_zone: "UTC" - timestamp.date_histogram.timestamp: 0 - value_field.max.value: 1232.0 - _rollup.version: 2 - timestamp.date_histogram.interval: "1s" - value_field.sum.value: 1232.0 - value_field.min.value: 1232.0 - timestamp.date_histogram._count: 1 - _rollup.id: "foo" - diff --git a/x-pack/plugin/stack/src/main/java/org/elasticsearch/xpack/stack/LegacyStackTemplateRegistry.java b/x-pack/plugin/stack/src/main/java/org/elasticsearch/xpack/stack/LegacyStackTemplateRegistry.java index 4d2789dbb8591..62d22c0c0a9cc 100644 --- a/x-pack/plugin/stack/src/main/java/org/elasticsearch/xpack/stack/LegacyStackTemplateRegistry.java +++ b/x-pack/plugin/stack/src/main/java/org/elasticsearch/xpack/stack/LegacyStackTemplateRegistry.java @@ -54,7 +54,7 @@ public class LegacyStackTemplateRegistry extends IndexTemplateRegistry { private static final Map ADDITIONAL_TEMPLATE_VARIABLES = Map.of( "xpack.stack.template.deprecated", "true", - "xpack.stack.template.logs.index.mode", + "xpack.stack.template.logsdb.index.mode", "standard" ); diff --git a/x-pack/plugin/stack/src/main/java/org/elasticsearch/xpack/stack/StackTemplateRegistry.java b/x-pack/plugin/stack/src/main/java/org/elasticsearch/xpack/stack/StackTemplateRegistry.java index aa1e8858163a5..6a9936f4f27d3 100644 --- a/x-pack/plugin/stack/src/main/java/org/elasticsearch/xpack/stack/StackTemplateRegistry.java +++ b/x-pack/plugin/stack/src/main/java/org/elasticsearch/xpack/stack/StackTemplateRegistry.java @@ -18,6 +18,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.features.FeatureService; import org.elasticsearch.features.NodeFeature; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xcontent.NamedXContentRegistry; import org.elasticsearch.xcontent.XContentParserConfiguration; @@ -47,7 +48,7 @@ public class StackTemplateRegistry extends IndexTemplateRegistry { // The stack template registry version. This number must be incremented when we make changes // to built-in templates. - public static final int REGISTRY_VERSION = 11; + public static final int REGISTRY_VERSION = 12; public static final String TEMPLATE_VERSION_VARIABLE = "xpack.stack.template.version"; public static final Setting STACK_TEMPLATES_ENABLED = Setting.boolSetting( @@ -58,7 +59,7 @@ public class StackTemplateRegistry extends IndexTemplateRegistry { ); /** - * if index.mode "logs" is applied by default in logs@settings for 'logs-*-*' + * if index.mode "logsdb" is applied by default in logs@settings for 'logs-*-*' */ public static final Setting CLUSTER_LOGSDB_ENABLED = Setting.boolSetting( "cluster.logsdb.enabled", @@ -146,7 +147,7 @@ private Map loadComponentTemplateConfigs(boolean logs ), new IndexTemplateConfig( LOGS_MAPPINGS_COMPONENT_TEMPLATE_NAME, - logsDbEnabled ? "/logs@mappings-logsdb.json" : "/logs@mappings.json", + "/logs@mappings.json", REGISTRY_VERSION, TEMPLATE_VERSION_VARIABLE, ADDITIONAL_TEMPLATE_VARIABLES @@ -166,8 +167,8 @@ private Map loadComponentTemplateConfigs(boolean logs Map.of( "xpack.stack.template.deprecated", "false", - "xpack.stack.template.logs.index.mode", - logsDbEnabled ? "logs" : "standard" + "xpack.stack.template.logsdb.index.mode", + logsDbEnabled ? IndexMode.LOGSDB.getName() : IndexMode.STANDARD.getName() ) ), new IndexTemplateConfig( diff --git a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/trigger/schedule/engine/TickerScheduleTriggerEngine.java b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/trigger/schedule/engine/TickerScheduleTriggerEngine.java index ba07c3137340d..ced131640f0ee 100644 --- a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/trigger/schedule/engine/TickerScheduleTriggerEngine.java +++ b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/trigger/schedule/engine/TickerScheduleTriggerEngine.java @@ -34,6 +34,7 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicBoolean; import static org.elasticsearch.common.settings.Setting.positiveTimeSetting; @@ -50,6 +51,7 @@ public class TickerScheduleTriggerEngine extends ScheduleTriggerEngine { private final TimeValue tickInterval; private final Map schedules = new ConcurrentHashMap<>(); private final Ticker ticker; + private final AtomicBoolean isRunning = new AtomicBoolean(false); public TickerScheduleTriggerEngine(Settings settings, ScheduleRegistry scheduleRegistry, Clock clock) { super(scheduleRegistry, clock); @@ -60,7 +62,8 @@ public TickerScheduleTriggerEngine(Settings settings, ScheduleRegistry scheduleR @Override public synchronized void start(Collection jobs) { long startTime = clock.millis(); - logger.info("Watcher starting watches at {}", WatcherDateTimeUtils.dateTimeFormatter.formatMillis(startTime)); + isRunning.set(true); + logger.info("Starting watcher engine at {}", WatcherDateTimeUtils.dateTimeFormatter.formatMillis(startTime)); Map startingSchedules = Maps.newMapWithExpectedSize(jobs.size()); for (Watch job : jobs) { if (job.trigger() instanceof ScheduleTrigger trigger) { @@ -81,17 +84,22 @@ public synchronized void start(Collection jobs) { @Override public void stop() { + logger.info("Stopping watcher engine"); + isRunning.set(false); schedules.clear(); ticker.close(); } @Override - public synchronized void pauseExecution() { + public void pauseExecution() { + logger.info("Pausing watcher engine"); + isRunning.set(false); schedules.clear(); } @Override public void add(Watch watch) { + logger.trace("Adding watch [{}] to engine (engine is running: {})", watch.id(), isRunning.get()); assert watch.trigger() instanceof ScheduleTrigger; ScheduleTrigger trigger = (ScheduleTrigger) watch.trigger(); ActiveSchedule currentSchedule = schedules.get(watch.id()); @@ -106,13 +114,25 @@ public void add(Watch watch) { @Override public boolean remove(String jobId) { + logger.debug("Removing watch [{}] from engine (engine is running: {})", jobId, isRunning.get()); return schedules.remove(jobId) != null; } void checkJobs() { + if (isRunning.get() == false) { + logger.debug( + "Watcher not running because the engine is paused. Currently scheduled watches being skipped: {}", + schedules.size() + ); + return; + } long triggeredTime = clock.millis(); List events = new ArrayList<>(); for (ActiveSchedule schedule : schedules.values()) { + if (isRunning.get() == false) { + logger.debug("Watcher paused while running [{}]", schedule.name); + break; + } long scheduledTime = schedule.check(triggeredTime); if (scheduledTime > 0) { ZonedDateTime triggeredDateTime = utcDateTimeAtEpochMillis(triggeredTime);