From fd68b040498d2905856c12d1ccbccc7e08ccd9d5 Mon Sep 17 00:00:00 2001 From: bellengao Date: Thu, 21 Oct 2021 19:16:36 +0800 Subject: [PATCH 01/21] Throw a RepositoryConflictException when trying to put or delete a repository in use (#56070) --- .../migration/migrate_8_0/api.asciidoc | 14 +++- .../blobstore/BlobStoreDynamicSettingsIT.java | 7 +- .../DedicatedClusterSnapshotRestoreIT.java | 2 +- .../snapshots/RepositoriesIT.java | 66 +++++++++++++++++++ .../SharedClusterSnapshotRestoreIT.java | 2 +- .../elasticsearch/ElasticsearchException.java | 7 +- .../repositories/RepositoriesService.java | 16 +++-- .../RepositoryConflictException.java | 47 +++++++++++++ .../cluster/RestDeleteRepositoryAction.java | 13 +++- .../cluster/RestPutRepositoryAction.java | 15 ++++- .../ExceptionSerializationTests.java | 2 + .../rest-api-spec/test/repository.yml | 4 +- ...archableSnapshotsRepositoryIntegTests.java | 9 +-- 13 files changed, 181 insertions(+), 23 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/repositories/RepositoryConflictException.java diff --git a/docs/reference/migration/migrate_8_0/api.asciidoc b/docs/reference/migration/migrate_8_0/api.asciidoc index 13dacdda0dd9e..2c86e0a45442c 100644 --- a/docs/reference/migration/migrate_8_0/api.asciidoc +++ b/docs/reference/migration/migrate_8_0/api.asciidoc @@ -83,10 +83,22 @@ Use {ref}/ml-apis.html#ml-api-datafeed-endpoint[{dfeeds}] instead. [%collapsible] ==== *Details* + -The ability to update a `job_id` in a {dfeed} was deprecated in 7.3.0. and is +The ability to update a `job_id` in a {dfeed} was deprecated in 7.3.0. and is removed in 8.0. *Impact* + It is not possible to move {dfeeds} between {anomaly-jobs}. ==== + +.Create repository and delete repository API's return `409` status code when a repository is in use instead of `500`. +[%collapsible] +==== +*Details* + +The {ref}/put-snapshot-repo-api.html[Create or update snapshot repository API] and +{ref}/delete-snapshot-repo-api.html[Delete snapshot repository API] return `409` +status code when the request is attempting to modify an existing repository that's in use instead of status code `500`. + +*Impact* + +Update client code that handles creation and deletion of repositories to reflect this change. +==== // end::notable-breaking-changes[] diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreDynamicSettingsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreDynamicSettingsIT.java index 595a59b7c38f6..d58b97485bd35 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreDynamicSettingsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreDynamicSettingsIT.java @@ -13,6 +13,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.RepositoryConflictException; import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase; import org.elasticsearch.snapshots.mockstore.MockRepository; import org.elasticsearch.test.ESIntegTestCase; @@ -92,11 +93,11 @@ largeSnapshotPool && randomBoolean() randomBoolean() ); } catch (Exception e) { - final Throwable ise = ExceptionsHelper.unwrap(e, IllegalStateException.class); - assertThat(ise, instanceOf(IllegalStateException.class)); + final Throwable ise = ExceptionsHelper.unwrap(e, RepositoryConflictException.class); + assertThat(ise, instanceOf(RepositoryConflictException.class)); assertEquals( ise.getMessage(), - "trying to modify or unregister repository [test-repo] that is currently used (snapshot is in progress)" + "[test-repo] trying to modify or unregister repository that is currently used (snapshot is in progress)" ); } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java index 7f7eb35b1ef84..214d55770b044 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java @@ -1251,7 +1251,7 @@ public void testConcurrentSnapshotAndRepoDelete() throws Exception { assertThat( e.getMessage(), containsString( - "trying to modify or unregister repository [test-repo] that is currently used (snapshot deletion is in progress)" + "[test-repo] trying to modify or unregister repository that is currently used (snapshot deletion is in progress)" ) ); } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositoriesIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositoriesIT.java index b6ccc015cf499..489845ef7fd68 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositoriesIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositoriesIT.java @@ -8,6 +8,7 @@ package org.elasticsearch.snapshots; import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.action.ActionFuture; import org.elasticsearch.action.admin.cluster.repositories.get.GetRepositoriesResponse; import org.elasticsearch.action.admin.cluster.repositories.verify.VerifyRepositoryResponse; import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse; @@ -19,8 +20,12 @@ import org.elasticsearch.common.io.FileSystemUtils; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeUnit; +import org.elasticsearch.repositories.RepositoriesService; +import org.elasticsearch.repositories.RepositoryConflictException; import org.elasticsearch.repositories.RepositoryException; import org.elasticsearch.repositories.RepositoryVerificationException; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.snapshots.mockstore.MockRepository; import org.elasticsearch.test.ESIntegTestCase; import java.nio.file.Path; @@ -237,4 +242,65 @@ public void testRepositoryVerification() { assertThat(ExceptionsHelper.stackTrace(ex), containsString("is not shared")); } } + + public void testRepositoryConflict() throws Exception { + logger.info("--> creating repository"); + final String repo = "test-repo"; + assertAcked( + client().admin() + .cluster() + .preparePutRepository(repo) + .setType("mock") + .setSettings( + Settings.builder() + .put("location", randomRepoPath()) + .put("random", randomAlphaOfLength(10)) + .put("wait_after_unblock", 200) + ) + .get() + ); + + logger.info("--> snapshot"); + final String index = "test-idx"; + assertAcked(prepareCreate(index, 1, Settings.builder().put("number_of_shards", 1).put("number_of_replicas", 0))); + for (int i = 0; i < 10; i++) { + indexDoc(index, Integer.toString(i), "foo", "bar" + i); + } + refresh(); + final String snapshot1 = "test-snap1"; + client().admin().cluster().prepareCreateSnapshot(repo, snapshot1).setWaitForCompletion(true).get(); + String blockedNode = internalCluster().getMasterName(); + ((MockRepository) internalCluster().getInstance(RepositoriesService.class, blockedNode).repository(repo)).blockOnDataFiles(); + logger.info("--> start deletion of snapshot"); + ActionFuture future = client().admin().cluster().prepareDeleteSnapshot(repo, snapshot1).execute(); + logger.info("--> waiting for block to kick in on node [{}]", blockedNode); + waitForBlock(blockedNode, repo); + + logger.info("--> try deleting the repository, should fail because the deletion of the snapshot is in progress"); + RepositoryConflictException e1 = expectThrows( + RepositoryConflictException.class, + () -> client().admin().cluster().prepareDeleteRepository(repo).get() + ); + assertThat(e1.status(), equalTo(RestStatus.CONFLICT)); + assertThat(e1.getMessage(), containsString("trying to modify or unregister repository that is currently used")); + + logger.info("--> try updating the repository, should fail because the deletion of the snapshot is in progress"); + RepositoryConflictException e2 = expectThrows( + RepositoryConflictException.class, + () -> client().admin() + .cluster() + .preparePutRepository(repo) + .setType("mock") + .setSettings(Settings.builder().put("location", randomRepoPath())) + .get() + ); + assertThat(e2.status(), equalTo(RestStatus.CONFLICT)); + assertThat(e2.getMessage(), containsString("trying to modify or unregister repository that is currently used")); + + logger.info("--> unblocking blocked node [{}]", blockedNode); + unblockNode(repo, blockedNode); + + logger.info("--> wait until snapshot deletion is finished"); + assertAcked(future.actionGet()); + } } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java index 8b06e39dae1ad..5f68fecfda41e 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java @@ -971,7 +971,7 @@ public void testDeleteRepositoryWhileSnapshotting() throws Exception { logger.info("--> in-use repository deletion failed"); assertThat( ex.getMessage(), - equalTo("trying to modify or unregister repository [test-repo] that is currently used (snapshot is in progress)") + equalTo("[test-repo] trying to modify or unregister repository that is currently used (snapshot is in progress)") ); } diff --git a/server/src/main/java/org/elasticsearch/ElasticsearchException.java b/server/src/main/java/org/elasticsearch/ElasticsearchException.java index c5d0938a7bdb2..d8ee54807cbd6 100644 --- a/server/src/main/java/org/elasticsearch/ElasticsearchException.java +++ b/server/src/main/java/org/elasticsearch/ElasticsearchException.java @@ -1043,7 +1043,12 @@ private enum ElasticsearchExceptionHandle { org.elasticsearch.ElasticsearchAuthenticationProcessingError.class, org.elasticsearch.ElasticsearchAuthenticationProcessingError::new, 162, - Version.V_7_16_0); + Version.V_7_16_0), + REPOSITORY_CONFLICT_EXCEPTION( + org.elasticsearch.repositories.RepositoryConflictException.class, + org.elasticsearch.repositories.RepositoryConflictException::new, + 163, + Version.V_8_0_0); final Class exceptionClass; final CheckedFunction constructor; diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java index f87cdf9c50a59..c92760f98f275 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java @@ -683,14 +683,14 @@ private static void validate(final String repositoryName) { private static void ensureRepositoryNotInUse(ClusterState clusterState, String repository) { final SnapshotsInProgress snapshots = clusterState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); if (snapshots.forRepo(repository).isEmpty() == false) { - throw newRepositoryInUseException(repository, "snapshot is in progress"); + throw newRepositoryConflictException(repository, "snapshot is in progress"); } for (SnapshotDeletionsInProgress.Entry entry : clusterState.custom( SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.EMPTY ).getEntries()) { if (entry.repository().equals(repository)) { - throw newRepositoryInUseException(repository, "snapshot deletion is in progress"); + throw newRepositoryConflictException(repository, "snapshot deletion is in progress"); } } for (RepositoryCleanupInProgress.Entry entry : clusterState.custom( @@ -698,12 +698,12 @@ private static void ensureRepositoryNotInUse(ClusterState clusterState, String r RepositoryCleanupInProgress.EMPTY ).entries()) { if (entry.repository().equals(repository)) { - throw newRepositoryInUseException(repository, "repository clean up is in progress"); + throw newRepositoryConflictException(repository, "repository clean up is in progress"); } } for (RestoreInProgress.Entry entry : clusterState.custom(RestoreInProgress.TYPE, RestoreInProgress.EMPTY)) { if (repository.equals(entry.snapshot().getRepository())) { - throw newRepositoryInUseException(repository, "snapshot restore is in progress"); + throw newRepositoryConflictException(repository, "snapshot restore is in progress"); } } } @@ -723,7 +723,7 @@ private static void ensureNoSearchableSnapshotsIndicesInUse(ClusterState cluster } } if (indices != null && indices.isEmpty() == false) { - throw newRepositoryInUseException( + throw newRepositoryConflictException( repositoryMetadata.name(), "found " + count @@ -746,8 +746,10 @@ private static boolean indexSettingsMatchRepositoryMetadata(Settings indexSettin return false; } - private static IllegalStateException newRepositoryInUseException(String repository, String reason) { - return new IllegalStateException( + private static RepositoryConflictException newRepositoryConflictException(String repository, String reason) { + return new RepositoryConflictException( + repository, + "trying to modify or unregister repository that is currently used (" + reason + ')', "trying to modify or unregister repository [" + repository + "] that is currently used (" + reason + ')' ); } diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoryConflictException.java b/server/src/main/java/org/elasticsearch/repositories/RepositoryConflictException.java new file mode 100644 index 0000000000000..1e51053320924 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoryConflictException.java @@ -0,0 +1,47 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.repositories; + +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.rest.RestStatus; + +import java.io.IOException; + +/** + * Repository conflict exception + */ +public class RepositoryConflictException extends RepositoryException { + private final String backwardCompatibleMessage; + + public RepositoryConflictException(String repository, String message, String backwardCompatibleMessage) { + super(repository, message); + this.backwardCompatibleMessage = backwardCompatibleMessage; + } + + @Override + public RestStatus status() { + return RestStatus.CONFLICT; + } + + public String getBackwardCompatibleMessage() { + return backwardCompatibleMessage; + } + + public RepositoryConflictException(StreamInput in) throws IOException { + super(in); + this.backwardCompatibleMessage = in.readString(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeString(backwardCompatibleMessage); + } +} diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestDeleteRepositoryAction.java b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestDeleteRepositoryAction.java index 1198f58b1c027..3fa52539bddef 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestDeleteRepositoryAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestDeleteRepositoryAction.java @@ -9,7 +9,10 @@ package org.elasticsearch.rest.action.admin.cluster; import org.elasticsearch.action.admin.cluster.repositories.delete.DeleteRepositoryRequest; +import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.client.node.NodeClient; +import org.elasticsearch.core.RestApiVersion; +import org.elasticsearch.repositories.RepositoryConflictException; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.action.RestToXContentListener; @@ -40,6 +43,14 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC DeleteRepositoryRequest deleteRepositoryRequest = deleteRepositoryRequest(request.param("repository")); deleteRepositoryRequest.timeout(request.paramAsTime("timeout", deleteRepositoryRequest.timeout())); deleteRepositoryRequest.masterNodeTimeout(request.paramAsTime("master_timeout", deleteRepositoryRequest.masterNodeTimeout())); - return channel -> client.admin().cluster().deleteRepository(deleteRepositoryRequest, new RestToXContentListener<>(channel)); + return channel -> client.admin().cluster().deleteRepository(deleteRepositoryRequest, + new RestToXContentListener(channel).delegateResponse((delegate, err) -> { + if (request.getRestApiVersion().equals(RestApiVersion.V_7) && err instanceof RepositoryConflictException) { + delegate.onFailure(new IllegalStateException(((RepositoryConflictException) err).getBackwardCompatibleMessage())); + } else { + delegate.onFailure(err); + } + }) + ); } } diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestPutRepositoryAction.java b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestPutRepositoryAction.java index 3f0271282b3b2..5905eef05627c 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestPutRepositoryAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestPutRepositoryAction.java @@ -9,11 +9,14 @@ package org.elasticsearch.rest.action.admin.cluster; import org.elasticsearch.action.admin.cluster.repositories.put.PutRepositoryRequest; +import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.client.node.NodeClient; -import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.core.RestApiVersion; +import org.elasticsearch.repositories.RepositoryConflictException; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.action.RestToXContentListener; +import org.elasticsearch.xcontent.XContentParser; import java.io.IOException; import java.util.List; @@ -48,6 +51,14 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC putRepositoryRequest.verify(request.paramAsBoolean("verify", true)); putRepositoryRequest.masterNodeTimeout(request.paramAsTime("master_timeout", putRepositoryRequest.masterNodeTimeout())); putRepositoryRequest.timeout(request.paramAsTime("timeout", putRepositoryRequest.timeout())); - return channel -> client.admin().cluster().putRepository(putRepositoryRequest, new RestToXContentListener<>(channel)); + return channel -> client.admin().cluster().putRepository(putRepositoryRequest, + new RestToXContentListener(channel).delegateResponse((delegate, err) -> { + if (request.getRestApiVersion().equals(RestApiVersion.V_7) && err instanceof RepositoryConflictException) { + delegate.onFailure(new IllegalStateException(((RepositoryConflictException) err).getBackwardCompatibleMessage())); + } else { + delegate.onFailure(err); + } + }) + ); } } diff --git a/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java b/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java index 40a1647c85d6a..2a9479e37b140 100644 --- a/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java +++ b/server/src/test/java/org/elasticsearch/ExceptionSerializationTests.java @@ -62,6 +62,7 @@ import org.elasticsearch.indices.recovery.PeerRecoveryNotFound; import org.elasticsearch.indices.recovery.RecoverFilesRecoveryException; import org.elasticsearch.ingest.IngestProcessorException; +import org.elasticsearch.repositories.RepositoryConflictException; import org.elasticsearch.repositories.RepositoryException; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.rest.action.admin.indices.AliasesNotFoundException; @@ -822,6 +823,7 @@ public void testIds() { ids.put(160, NoSeedNodeLeftException.class); ids.put(161, VersionMismatchException.class); ids.put(162, ElasticsearchAuthenticationProcessingError.class); + ids.put(163, RepositoryConflictException.class); Map, Integer> reverse = new HashMap<>(); for (Map.Entry> entry : ids.entrySet()) { diff --git a/x-pack/plugin/searchable-snapshots/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/repository.yml b/x-pack/plugin/searchable-snapshots/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/repository.yml index 57115b8358849..e620301dc3fae 100644 --- a/x-pack/plugin/searchable-snapshots/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/repository.yml +++ b/x-pack/plugin/searchable-snapshots/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/repository.yml @@ -70,9 +70,9 @@ setup: - match: { snapshot.shards.failed: 0 } - match: { snapshot.shards.successful: 1 } - # Returns an illegal state exception + # Returns a repository conflict exception - do: - catch: request + catch: conflict snapshot.delete_repository: repository: repository-fs diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsRepositoryIntegTests.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsRepositoryIntegTests.java index 1cdc6d228ac57..6dfb07e2db852 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsRepositoryIntegTests.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsRepositoryIntegTests.java @@ -14,6 +14,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.core.Nullable; +import org.elasticsearch.repositories.RepositoryConflictException; import org.elasticsearch.repositories.fs.FsRepository; import org.elasticsearch.snapshots.SnapshotRestoreException; @@ -102,16 +103,16 @@ public void testRepositoryUsedBySearchableSnapshotCanBeUpdatedButNotUnregistered } for (int i = 0; i < nbMountedIndices; i++) { - IllegalStateException exception = expectThrows( - IllegalStateException.class, + RepositoryConflictException exception = expectThrows( + RepositoryConflictException.class, () -> clusterAdmin().prepareDeleteRepository(updatedRepositoryName).get() ); assertThat( exception.getMessage(), containsString( - "trying to modify or unregister repository [" + "[" + updatedRepositoryName - + "] that is currently used (found " + + "] trying to modify or unregister repository that is currently used (found " + (nbMountedIndices - i) + " searchable snapshots indices that use the repository:" ) From 1621c8666569f03b09db60ead75a028fe4a55981 Mon Sep 17 00:00:00 2001 From: Dan Hermann Date: Thu, 21 Oct 2021 06:49:24 -0500 Subject: [PATCH 02/21] Reenable ingest YAML tests --- .../rest-api-spec/test/ingest/10_basic.yml | 24 ++----------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/ingest/10_basic.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/ingest/10_basic.yml index 57cc4a1aa0deb..dc6e730248860 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/ingest/10_basic.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/ingest/10_basic.yml @@ -1,9 +1,5 @@ --- "Test basic pipeline crud": - - skip: - version: all - reason: https://github.com/elastic/elasticsearch/issues/79377 - - do: ingest.put_pipeline: id: "my_pipeline" @@ -32,10 +28,6 @@ --- "Test Put Versioned Pipeline": - - skip: - version: all - reason: https://github.com/elastic/elasticsearch/issues/79377 - - do: ingest.put_pipeline: id: "my_pipeline" @@ -126,10 +118,6 @@ id: "my_pipeline" --- "Test Get All Pipelines": - - skip: - version: all - reason: https://github.com/elastic/elasticsearch/issues/79377 - - do: ingest.put_pipeline: id: "first_pipeline" @@ -154,10 +142,6 @@ --- "Test invalid config": - - skip: - version: all - reason: https://github.com/elastic/elasticsearch/issues/79377 - - do: catch: /parse_exception/ ingest.put_pipeline: @@ -172,12 +156,8 @@ --- "Test Get Summarized Pipelines": - skip: - version: all - reason: https://github.com/elastic/elasticsearch/issues/79377 - -# - skip: -# version: " - 7.12.99" -# reason: "summary option added in 7.13" + version: " - 7.12.99" + reason: "summary option added in 7.13" - do: ingest.put_pipeline: From 78fcd0e1b42a883ac0def0c59cf58800390c5e1e Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Thu, 21 Oct 2021 14:54:12 +0200 Subject: [PATCH 03/21] Disable request cache for non-deterministic runtime fields (#75054) This change ensures that a non-deterministic script defined in the runtime mapping is not eligible to the request cache. It also moves the scripts that extract values from source to be considered deterministic. Currently a source-only runtime field defined in a search request is not eligible to the request cache. This commit fixes this discrepancy. --- .../index/mapper/AbstractScriptFieldType.java | 8 +++- .../index/mapper/BooleanScriptFieldType.java | 13 ++--- .../index/mapper/DateScriptFieldType.java | 12 ++--- .../index/mapper/DoubleScriptFieldType.java | 11 +++-- .../index/mapper/GeoPointScriptFieldType.java | 5 +- .../index/mapper/IpScriptFieldType.java | 11 +++-- .../index/mapper/KeywordScriptFieldType.java | 23 ++++----- .../index/mapper/LongScriptFieldType.java | 11 +++-- .../index/query/SearchExecutionContext.java | 8 ++++ .../script/BooleanFieldScript.java | 23 +++++---- .../elasticsearch/script/DateFieldScript.java | 24 +++++----- .../script/DoubleFieldScript.java | 23 +++++---- .../script/GeoPointFieldScript.java | 22 +++++---- .../elasticsearch/script/IpFieldScript.java | 23 +++++---- .../elasticsearch/script/LongFieldScript.java | 23 +++++---- .../script/StringFieldScript.java | 23 +++++---- .../AbstractScriptFieldTypeTestCase.java | 48 ++++++++++++++++--- .../index/mapper/BooleanFieldScriptTests.java | 5 ++ .../index/mapper/DateFieldScriptTests.java | 5 ++ .../index/mapper/DoubleFieldScriptTests.java | 5 ++ .../index/mapper/FieldScriptTestCase.java | 9 +++- .../mapper/GeoPointFieldScriptTests.java | 5 ++ .../index/mapper/IndexFieldMapperTests.java | 10 +--- .../index/mapper/IpFieldScriptTests.java | 5 ++ .../index/mapper/LongFieldScriptTests.java | 5 ++ .../index/mapper/StringFieldScriptTests.java | 5 ++ .../index/mapper/VersionFieldMapperTests.java | 6 --- .../index/mapper/MapperServiceTestCase.java | 46 +++++++++--------- 28 files changed, 260 insertions(+), 157 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldType.java index f57bbb3fc2c2a..3fbf2c5d27557 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldType.java @@ -41,16 +41,19 @@ abstract class AbstractScriptFieldType extends MappedFieldType { protected final Script script; private final Function factory; + private final boolean isResultDeterministic; AbstractScriptFieldType( String name, Function factory, Script script, + boolean isResultDeterministic, Map meta ) { super(name, false, false, false, TextSearchInfo.SIMPLE_MATCH_WITHOUT_TERMS, meta); this.factory = factory; this.script = Objects.requireNonNull(script); + this.isResultDeterministic = isResultDeterministic; } @Override @@ -156,12 +159,15 @@ private String unsupported(String query, String supported) { ); } - protected final void checkAllowExpensiveQueries(SearchExecutionContext context) { + protected final void applyScriptContext(SearchExecutionContext context) { if (context.allowExpensiveQueries() == false) { throw new ElasticsearchException( "queries cannot be executed against runtime fields while [" + ALLOW_EXPENSIVE_QUERIES.getKey() + "] is set to [false]." ); } + if (isResultDeterministic == false) { + context.disableCache(); + } } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BooleanScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/BooleanScriptFieldType.java index e349bbb9fd20d..0f5ee2506cf09 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BooleanScriptFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BooleanScriptFieldType.java @@ -70,7 +70,8 @@ public static RuntimeField sourceOnly(String name) { Script script, Map meta ) { - super(name, searchLookup -> scriptFactory.newFactory(name, script.getParams(), searchLookup), script, meta); + super(name, searchLookup -> scriptFactory.newFactory(name, script.getParams(), searchLookup), script, + scriptFactory.isResultDeterministic(), meta); } @Override @@ -107,7 +108,7 @@ public BooleanScriptFieldData.Builder fielddataBuilder(String fullyQualifiedInde @Override public Query existsQuery(SearchExecutionContext context) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new BooleanScriptFieldExistsQuery(script, leafFactory(context), name()); } @@ -178,13 +179,13 @@ public Query rangeQuery( @Override public Query termQueryCaseInsensitive(Object value, SearchExecutionContext context) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new BooleanScriptFieldTermQuery(script, leafFactory(context.lookup()), name(), toBoolean(value, true)); } @Override public Query termQuery(Object value, SearchExecutionContext context) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new BooleanScriptFieldTermQuery(script, leafFactory(context), name(), toBoolean(value, false)); } @@ -211,11 +212,11 @@ private Query termsQuery(boolean trueAllowed, boolean falseAllowed, SearchExecut // Either true or false return existsQuery(context); } - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new BooleanScriptFieldTermQuery(script, leafFactory(context), name(), true); } if (falseAllowed) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new BooleanScriptFieldTermQuery(script, leafFactory(context), name(), false); } return new MatchNoDocsQuery("neither true nor false allowed"); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DateScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/DateScriptFieldType.java index 15cf62882deb4..ccf8bb2fcae4f 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DateScriptFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DateScriptFieldType.java @@ -120,7 +120,7 @@ public static RuntimeField sourceOnly(String name, DateFormatter dateTimeFormatt Map meta ) { super(name, searchLookup -> scriptFactory.newFactory(name, script.getParams(), searchLookup, dateTimeFormatter), - script, meta); + script, scriptFactory.isResultDeterministic(), meta); this.dateTimeFormatter = dateTimeFormatter; this.dateMathParser = dateTimeFormatter.toDateMathParser(); } @@ -158,7 +158,7 @@ public DateScriptFieldData.Builder fielddataBuilder(String fullyQualifiedIndexNa @Override public Query distanceFeatureQuery(Object origin, String pivot, SearchExecutionContext context) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return DateFieldType.handleNow(context, now -> { long originLong = DateFieldType.parseToLong( origin, @@ -181,7 +181,7 @@ public Query distanceFeatureQuery(Object origin, String pivot, SearchExecutionCo @Override public Query existsQuery(SearchExecutionContext context) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new LongScriptFieldExistsQuery(script, leafFactory(context)::newInstance, name()); } @@ -196,7 +196,7 @@ public Query rangeQuery( SearchExecutionContext context ) { parser = parser == null ? this.dateMathParser : parser; - checkAllowExpensiveQueries(context); + applyScriptContext(context); return DateFieldType.dateRangeQuery( lowerTerm, upperTerm, @@ -214,7 +214,7 @@ public Query rangeQuery( public Query termQuery(Object value, SearchExecutionContext context) { return DateFieldType.handleNow(context, now -> { long l = DateFieldType.parseToLong(value, false, null, this.dateMathParser, now, DateFieldMapper.Resolution.MILLISECONDS); - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new LongScriptFieldTermQuery(script, leafFactory(context)::newInstance, name(), l); }); } @@ -229,7 +229,7 @@ public Query termsQuery(Collection values, SearchExecutionContext context) { for (Object value : values) { terms.add(DateFieldType.parseToLong(value, false, null, this.dateMathParser, now, DateFieldMapper.Resolution.MILLISECONDS)); } - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new LongScriptFieldTermsQuery(script, leafFactory(context)::newInstance, name(), terms); }); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DoubleScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/DoubleScriptFieldType.java index 66a90e0fae321..b9cb58442e9e5 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DoubleScriptFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DoubleScriptFieldType.java @@ -71,7 +71,8 @@ public static RuntimeField sourceOnly(String name) { Script script, Map meta ) { - super(name, searchLookup -> scriptFactory.newFactory(name, script.getParams(), searchLookup), script, meta); + super(name, searchLookup -> scriptFactory.newFactory(name, script.getParams(), searchLookup), + script, scriptFactory.isResultDeterministic(), meta); } @Override @@ -100,7 +101,7 @@ public DoubleScriptFieldData.Builder fielddataBuilder(String fullyQualifiedIndex @Override public Query existsQuery(SearchExecutionContext context) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new DoubleScriptFieldExistsQuery(script, leafFactory(context), name()); } @@ -114,7 +115,7 @@ public Query rangeQuery( DateMathParser parser, SearchExecutionContext context ) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return NumberType.doubleRangeQuery( lowerTerm, upperTerm, @@ -126,7 +127,7 @@ public Query rangeQuery( @Override public Query termQuery(Object value, SearchExecutionContext context) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new DoubleScriptFieldTermQuery(script, leafFactory(context), name(), NumberType.objectToDouble(value)); } @@ -139,7 +140,7 @@ public Query termsQuery(Collection values, SearchExecutionContext context) { for (Object value : values) { terms.add(Double.doubleToLongBits(NumberType.objectToDouble(value))); } - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new DoubleScriptFieldTermsQuery(script, leafFactory(context), name(), terms); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/GeoPointScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/GeoPointScriptFieldType.java index d5547dfdded73..c129f95b53165 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/GeoPointScriptFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/GeoPointScriptFieldType.java @@ -65,7 +65,8 @@ GeoPointFieldScript.Factory getCompositeLeafFactory( Script script, Map meta ) { - super(name, searchLookup -> scriptFactory.newFactory(name, script.getParams(), searchLookup), script, meta); + super(name, searchLookup -> scriptFactory.newFactory(name, script.getParams(), searchLookup), + script, scriptFactory.isResultDeterministic(), meta); } @Override @@ -100,7 +101,7 @@ public GeoPointScriptFieldData.Builder fielddataBuilder(String fullyQualifiedInd @Override public Query existsQuery(SearchExecutionContext context) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new GeoPointScriptFieldExistsQuery(script, leafFactory(context), name()); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IpScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/IpScriptFieldType.java index dc70b14a86518..21186406f4efb 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IpScriptFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IpScriptFieldType.java @@ -69,7 +69,8 @@ IpFieldScript.Factory getCompositeLeafFactory(Function meta ) { - super(name, searchLookup -> scriptFactory.newFactory(name, script.getParams(), searchLookup), script, meta); + super(name, searchLookup -> scriptFactory.newFactory(name, script.getParams(), searchLookup), + script, scriptFactory.isResultDeterministic(), meta); } @Override @@ -99,7 +100,7 @@ public IpScriptFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName @Override public Query existsQuery(SearchExecutionContext context) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new IpScriptFieldExistsQuery(script, leafFactory(context), name()); } @@ -113,7 +114,7 @@ public Query rangeQuery( DateMathParser parser, SearchExecutionContext context ) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return IpFieldMapper.IpFieldType.rangeQuery( lowerTerm, upperTerm, @@ -131,7 +132,7 @@ public Query rangeQuery( @Override public Query termQuery(Object value, SearchExecutionContext context) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); if (value instanceof InetAddress) { return inetAddressQuery((InetAddress) value, context); } @@ -149,7 +150,7 @@ private Query inetAddressQuery(InetAddress address, SearchExecutionContext conte @Override public Query termsQuery(Collection values, SearchExecutionContext context) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); BytesRefHash terms = new BytesRefHash(values.size(), BigArrays.NON_RECYCLING_INSTANCE); List cidrQueries = null; for (Object value : values) { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordScriptFieldType.java index 13726b6a67666..7f71c55d2cadc 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordScriptFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordScriptFieldType.java @@ -77,7 +77,8 @@ public KeywordScriptFieldType( Script script, Map meta ) { - super(name, searchLookup -> scriptFactory.newFactory(name, script.getParams(), searchLookup), script, meta); + super(name, searchLookup -> scriptFactory.newFactory(name, script.getParams(), searchLookup), + script, scriptFactory.isResultDeterministic(), meta); } @Override @@ -102,7 +103,7 @@ public StringScriptFieldData.Builder fielddataBuilder(String fullyQualifiedIndex @Override public Query existsQuery(SearchExecutionContext context) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new StringScriptFieldExistsQuery(script, leafFactory(context), name()); } @@ -115,7 +116,7 @@ public Query fuzzyQuery( boolean transpositions, SearchExecutionContext context ) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return StringScriptFieldFuzzyQuery.build( script, leafFactory(context), @@ -129,7 +130,7 @@ public Query fuzzyQuery( @Override public Query prefixQuery(String value, RewriteMethod method, boolean caseInsensitive, SearchExecutionContext context) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new StringScriptFieldPrefixQuery(script, leafFactory(context), name(), value, caseInsensitive); } @@ -143,7 +144,7 @@ public Query rangeQuery( DateMathParser parser, SearchExecutionContext context ) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new StringScriptFieldRangeQuery( script, leafFactory(context), @@ -164,7 +165,7 @@ public Query regexpQuery( RewriteMethod method, SearchExecutionContext context ) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); if (matchFlags != 0) { throw new IllegalArgumentException("Match flags not yet implemented [" + matchFlags + "]"); } @@ -181,7 +182,7 @@ public Query regexpQuery( @Override public Query termQueryCaseInsensitive(Object value, SearchExecutionContext context) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new StringScriptFieldTermQuery( script, leafFactory(context), @@ -193,7 +194,7 @@ public Query termQueryCaseInsensitive(Object value, SearchExecutionContext conte @Override public Query termQuery(Object value, SearchExecutionContext context) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new StringScriptFieldTermQuery( script, leafFactory(context), @@ -205,20 +206,20 @@ public Query termQuery(Object value, SearchExecutionContext context) { @Override public Query termsQuery(Collection values, SearchExecutionContext context) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); Set terms = values.stream().map(v -> BytesRefs.toString(Objects.requireNonNull(v))).collect(toSet()); return new StringScriptFieldTermsQuery(script, leafFactory(context), name(), terms); } @Override public Query wildcardQuery(String value, RewriteMethod method, boolean caseInsensitive, SearchExecutionContext context) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new StringScriptFieldWildcardQuery(script, leafFactory(context), name(), value, caseInsensitive); } @Override public Query normalizedWildcardQuery(String value, RewriteMethod method, SearchExecutionContext context) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new StringScriptFieldWildcardQuery(script, leafFactory(context), name(), value, false); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/LongScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/LongScriptFieldType.java index 8c14a660a32c3..b3bfdbadd986d 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/LongScriptFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/LongScriptFieldType.java @@ -68,7 +68,8 @@ public LongScriptFieldType( Script script, Map meta ) { - super(name, searchLookup -> scriptFactory.newFactory(name, script.getParams(), searchLookup), script, meta); + super(name, searchLookup -> scriptFactory.newFactory(name, script.getParams(), searchLookup), + script, scriptFactory.isResultDeterministic(), meta); } @Override @@ -97,7 +98,7 @@ public LongScriptFieldData.Builder fielddataBuilder(String fullyQualifiedIndexNa @Override public Query existsQuery(SearchExecutionContext context) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new LongScriptFieldExistsQuery(script, leafFactory(context)::newInstance, name()); } @@ -111,7 +112,7 @@ public Query rangeQuery( DateMathParser parser, SearchExecutionContext context ) { - checkAllowExpensiveQueries(context); + applyScriptContext(context); return NumberType.longRangeQuery( lowerTerm, upperTerm, @@ -126,7 +127,7 @@ public Query termQuery(Object value, SearchExecutionContext context) { if (NumberType.hasDecimalPart(value)) { return Queries.newMatchNoDocsQuery("Value [" + value + "] has a decimal part"); } - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new LongScriptFieldTermQuery(script, leafFactory(context)::newInstance, name(), NumberType.objectToLong(value, true)); } @@ -145,7 +146,7 @@ public Query termsQuery(Collection values, SearchExecutionContext context) { if (terms.isEmpty()) { return Queries.newMatchNoDocsQuery("All values have a decimal part"); } - checkAllowExpensiveQueries(context); + applyScriptContext(context); return new LongScriptFieldTermsQuery(script, leafFactory(context)::newInstance, name(), terms); } } diff --git a/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java b/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java index 10ce5568317ab..b79e958a72a72 100644 --- a/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java +++ b/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java @@ -538,6 +538,14 @@ public final void freezeContext() { this.frozen.set(Boolean.TRUE); } + /** + * Marks this context as not cacheable. + * This method fails if {@link #freezeContext()} is called before on this context. + */ + public void disableCache() { + failIfFrozen(); + } + /** * This method fails if {@link #freezeContext()} is called before on this * context. This is used to seal. diff --git a/server/src/main/java/org/elasticsearch/script/BooleanFieldScript.java b/server/src/main/java/org/elasticsearch/script/BooleanFieldScript.java index fe2f8e6a6338e..5591ff6d796fd 100644 --- a/server/src/main/java/org/elasticsearch/script/BooleanFieldScript.java +++ b/server/src/main/java/org/elasticsearch/script/BooleanFieldScript.java @@ -20,17 +20,20 @@ public abstract class BooleanFieldScript extends AbstractFieldScript { public static final ScriptContext CONTEXT = newContext("boolean_field", Factory.class); - public static final BooleanFieldScript.Factory PARSE_FROM_SOURCE - = (field, params, lookup) -> (BooleanFieldScript.LeafFactory) ctx -> new BooleanFieldScript - ( - field, - params, - lookup, - ctx - ) { + public static final Factory PARSE_FROM_SOURCE = new Factory() { @Override - public void execute() { - emitFromSource(); + public LeafFactory newFactory(String field, Map params, SearchLookup lookup) { + return ctx -> new BooleanFieldScript(field, params, lookup, ctx) { + @Override + public void execute() { + emitFromSource(); + } + }; + } + + @Override + public boolean isResultDeterministic() { + return true; } }; diff --git a/server/src/main/java/org/elasticsearch/script/DateFieldScript.java b/server/src/main/java/org/elasticsearch/script/DateFieldScript.java index 7cda02b1e85d9..c8006eb486922 100644 --- a/server/src/main/java/org/elasticsearch/script/DateFieldScript.java +++ b/server/src/main/java/org/elasticsearch/script/DateFieldScript.java @@ -19,18 +19,20 @@ public abstract class DateFieldScript extends AbstractLongFieldScript { public static final ScriptContext CONTEXT = newContext("date_field", Factory.class); - public static final DateFieldScript.Factory PARSE_FROM_SOURCE - = (field, params, lookup, formatter) -> (DateFieldScript.LeafFactory) ctx -> new DateFieldScript - ( - field, - params, - lookup, - formatter, - ctx - ) { + public static final Factory PARSE_FROM_SOURCE = new Factory() { @Override - public void execute() { - emitFromSource(); + public LeafFactory newFactory(String field, Map params, SearchLookup lookup, DateFormatter formatter) { + return ctx -> new DateFieldScript(field, params, lookup, formatter, ctx) { + @Override + public void execute() { + emitFromSource(); + } + }; + } + + @Override + public boolean isResultDeterministic() { + return true; } }; diff --git a/server/src/main/java/org/elasticsearch/script/DoubleFieldScript.java b/server/src/main/java/org/elasticsearch/script/DoubleFieldScript.java index 5dc0725f4b3d8..651a1ac3a1f4a 100644 --- a/server/src/main/java/org/elasticsearch/script/DoubleFieldScript.java +++ b/server/src/main/java/org/elasticsearch/script/DoubleFieldScript.java @@ -19,17 +19,20 @@ public abstract class DoubleFieldScript extends AbstractFieldScript { public static final ScriptContext CONTEXT = newContext("double_field", Factory.class); - public static final DoubleFieldScript.Factory PARSE_FROM_SOURCE - = (field, params, lookup) -> (DoubleFieldScript.LeafFactory) ctx -> new DoubleFieldScript - ( - field, - params, - lookup, - ctx - ) { + public static final Factory PARSE_FROM_SOURCE = new Factory() { @Override - public void execute() { - emitFromSource(); + public LeafFactory newFactory(String field, Map params, SearchLookup lookup) { + return ctx -> new DoubleFieldScript(field, params, lookup, ctx) { + @Override + public void execute() { + emitFromSource(); + } + }; + } + + @Override + public boolean isResultDeterministic() { + return true; } }; diff --git a/server/src/main/java/org/elasticsearch/script/GeoPointFieldScript.java b/server/src/main/java/org/elasticsearch/script/GeoPointFieldScript.java index 6a8aba788c957..404e60ce691e9 100644 --- a/server/src/main/java/org/elasticsearch/script/GeoPointFieldScript.java +++ b/server/src/main/java/org/elasticsearch/script/GeoPointFieldScript.java @@ -32,18 +32,20 @@ public abstract class GeoPointFieldScript extends AbstractLongFieldScript { public static final ScriptContext CONTEXT = newContext("geo_point_field", Factory.class); - public static final GeoPointFieldScript.Factory PARSE_FROM_SOURCE - = (field, params, lookup) -> (GeoPointFieldScript.LeafFactory) ctx -> new GeoPointFieldScript - ( - field, - params, - lookup, - ctx - ) { + public static final Factory PARSE_FROM_SOURCE = new Factory() { + @Override + public LeafFactory newFactory(String field, Map params, SearchLookup lookup) { + return ctx -> new GeoPointFieldScript(field, params, lookup, ctx) { + @Override + public void execute() { + emitFromSource(); + } + }; + } @Override - public void execute() { - emitFromSource(); + public boolean isResultDeterministic() { + return true; } }; diff --git a/server/src/main/java/org/elasticsearch/script/IpFieldScript.java b/server/src/main/java/org/elasticsearch/script/IpFieldScript.java index cc36dc8888d31..cf49b7eaed23b 100644 --- a/server/src/main/java/org/elasticsearch/script/IpFieldScript.java +++ b/server/src/main/java/org/elasticsearch/script/IpFieldScript.java @@ -40,17 +40,20 @@ public abstract class IpFieldScript extends AbstractFieldScript { public static final ScriptContext CONTEXT = newContext("ip_field", Factory.class); - public static final IpFieldScript.Factory PARSE_FROM_SOURCE - = (field, params, lookup) -> (IpFieldScript.LeafFactory) ctx -> new IpFieldScript - ( - field, - params, - lookup, - ctx - ) { + public static final Factory PARSE_FROM_SOURCE = new Factory() { @Override - public void execute() { - emitFromSource(); + public LeafFactory newFactory(String field, Map params, SearchLookup lookup) { + return ctx -> new IpFieldScript(field, params, lookup, ctx) { + @Override + public void execute() { + emitFromSource(); + } + }; + } + + @Override + public boolean isResultDeterministic() { + return true; } }; diff --git a/server/src/main/java/org/elasticsearch/script/LongFieldScript.java b/server/src/main/java/org/elasticsearch/script/LongFieldScript.java index cf58561d12155..db1a27a07db35 100644 --- a/server/src/main/java/org/elasticsearch/script/LongFieldScript.java +++ b/server/src/main/java/org/elasticsearch/script/LongFieldScript.java @@ -18,17 +18,20 @@ public abstract class LongFieldScript extends AbstractLongFieldScript { public static final ScriptContext CONTEXT = newContext("long_field", Factory.class); - public static final LongFieldScript.Factory PARSE_FROM_SOURCE - = (field, params, lookup) -> (LongFieldScript.LeafFactory) ctx -> new LongFieldScript - ( - field, - params, - lookup, - ctx - ) { + public static final Factory PARSE_FROM_SOURCE = new Factory() { @Override - public void execute() { - emitFromSource(); + public LeafFactory newFactory(String field, Map params, SearchLookup lookup) { + return ctx -> new LongFieldScript(field, params, lookup, ctx) { + @Override + public void execute() { + emitFromSource(); + } + }; + } + + @Override + public boolean isResultDeterministic() { + return true; } }; diff --git a/server/src/main/java/org/elasticsearch/script/StringFieldScript.java b/server/src/main/java/org/elasticsearch/script/StringFieldScript.java index 112193e8a74f1..0907234e5a119 100644 --- a/server/src/main/java/org/elasticsearch/script/StringFieldScript.java +++ b/server/src/main/java/org/elasticsearch/script/StringFieldScript.java @@ -26,17 +26,20 @@ public abstract class StringFieldScript extends AbstractFieldScript { public static final ScriptContext CONTEXT = newContext("keyword_field", Factory.class); - public static final StringFieldScript.Factory PARSE_FROM_SOURCE - = (field, params, lookup) -> (StringFieldScript.LeafFactory) ctx -> new StringFieldScript - ( - field, - params, - lookup, - ctx - ) { + public static final StringFieldScript.Factory PARSE_FROM_SOURCE = new Factory() { @Override - public void execute() { - emitFromSource(); + public LeafFactory newFactory(String field, Map params, SearchLookup lookup) { + return ctx -> new StringFieldScript(field, params, lookup, ctx) { + @Override + public void execute() { + emitFromSource(); + } + }; + } + + @Override + public boolean isResultDeterministic() { + return true; } }; diff --git a/server/src/test/java/org/elasticsearch/index/mapper/AbstractScriptFieldTypeTestCase.java b/server/src/test/java/org/elasticsearch/index/mapper/AbstractScriptFieldTypeTestCase.java index cedc12608fc4a..4b01d85bfcbad 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/AbstractScriptFieldTypeTestCase.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/AbstractScriptFieldTypeTestCase.java @@ -274,6 +274,39 @@ public void testSpanPrefixQueryIsError() { assertQueryOnlyOnText("span prefix", () -> simpleMappedFieldType().spanPrefixQuery(null, null, null)); } + public final void testCacheable() throws IOException { + XContentBuilder mapping = runtimeMapping(b -> { + b.startObject("field") + .field("type", typeName()) + .startObject("script") + .field("source", "dummy_source") + .field("lang", "test") + .endObject() + .endObject() + .startObject("field_source") + .field("type", typeName()) + .startObject("script") + .field("source", "deterministic_source") + .field("lang", "test") + .endObject() + .endObject(); + }); + + MapperService mapperService = createMapperService(mapping); + + { + SearchExecutionContext c = createSearchExecutionContext(mapperService); + c.getFieldType("field").existsQuery(c); + assertFalse(c.isCacheable()); + } + + { + SearchExecutionContext c = createSearchExecutionContext(mapperService); + c.getFieldType("field_source").existsQuery(c); + assertTrue(c.isCacheable()); + } + } + private void assertQueryOnlyOnText(String queryName, ThrowingRunnable buildQuery) { Exception e = expectThrows(IllegalArgumentException.class, buildQuery); assertThat( @@ -313,26 +346,27 @@ protected final void minimalMapping(XContentBuilder b) throws IOException { @Override @SuppressWarnings("unchecked") protected T compileScript(Script script, ScriptContext context) { + boolean deterministicSource = "deterministic_source".equals(script.getIdOrCode()); if (context == BooleanFieldScript.CONTEXT) { - return (T) BooleanFieldScriptTests.DUMMY; + return deterministicSource ? (T) BooleanFieldScript.PARSE_FROM_SOURCE : (T) BooleanFieldScriptTests.DUMMY; } if (context == DateFieldScript.CONTEXT) { - return (T) DateFieldScriptTests.DUMMY; + return deterministicSource ? (T) DateFieldScript.PARSE_FROM_SOURCE : (T) DateFieldScriptTests.DUMMY; } if (context == DoubleFieldScript.CONTEXT) { - return (T) DoubleFieldScriptTests.DUMMY; + return deterministicSource ? (T) DoubleFieldScript.PARSE_FROM_SOURCE : (T) DoubleFieldScriptTests.DUMMY; } if (context == IpFieldScript.CONTEXT) { - return (T) IpFieldScriptTests.DUMMY; + return deterministicSource ? (T) IpFieldScript.PARSE_FROM_SOURCE : (T) IpFieldScriptTests.DUMMY; } if (context == LongFieldScript.CONTEXT) { - return (T) LongFieldScriptTests.DUMMY; + return deterministicSource ? (T) LongFieldScript.PARSE_FROM_SOURCE : (T) LongFieldScriptTests.DUMMY; } if (context == StringFieldScript.CONTEXT) { - return (T) StringFieldScriptTests.DUMMY; + return deterministicSource ? (T) StringFieldScript.PARSE_FROM_SOURCE : (T) StringFieldScriptTests.DUMMY; } if (context == GeoPointFieldScript.CONTEXT) { - return (T) GeoPointFieldScriptTests.DUMMY; + return deterministicSource ? (T) GeoPointFieldScript.PARSE_FROM_SOURCE : (T) GeoPointFieldScriptTests.DUMMY; } throw new IllegalArgumentException("Unsupported context: " + context); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/BooleanFieldScriptTests.java b/server/src/test/java/org/elasticsearch/index/mapper/BooleanFieldScriptTests.java index 4281ab937929c..5d938636c3ed1 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/BooleanFieldScriptTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/BooleanFieldScriptTests.java @@ -45,6 +45,11 @@ protected BooleanFieldScript.Factory dummyScript() { return DUMMY; } + @Override + protected BooleanFieldScript.Factory fromSource() { + return BooleanFieldScript.PARSE_FROM_SOURCE; + } + public void testTooManyValues() throws IOException { try (Directory directory = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) { iw.addDocument(List.of(new StoredField("_source", new BytesRef("{}")))); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldScriptTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldScriptTests.java index aa1ee07bb2d63..f09a5ee026b46 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldScriptTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldScriptTests.java @@ -49,6 +49,11 @@ protected DateFieldScript.Factory dummyScript() { return DUMMY; } + @Override + protected DateFieldScript.Factory fromSource() { + return DateFieldScript.PARSE_FROM_SOURCE; + } + public void testTooManyValues() throws IOException { try (Directory directory = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) { iw.addDocument(List.of(new StoredField("_source", new BytesRef("{}")))); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DoubleFieldScriptTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DoubleFieldScriptTests.java index 8b737e513ba38..ac73aec558faf 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DoubleFieldScriptTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DoubleFieldScriptTests.java @@ -47,6 +47,11 @@ protected DoubleFieldScript.Factory dummyScript() { return DUMMY; } + @Override + protected DoubleFieldScript.Factory fromSource() { + return DoubleFieldScript.PARSE_FROM_SOURCE; + } + public void testTooManyValues() throws IOException { try (Directory directory = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) { iw.addDocument(List.of(new StoredField("_source", new BytesRef("{}")))); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/FieldScriptTestCase.java b/server/src/test/java/org/elasticsearch/index/mapper/FieldScriptTestCase.java index ecc2340326eb2..b04d6fd2028b4 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/FieldScriptTestCase.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/FieldScriptTestCase.java @@ -10,6 +10,7 @@ import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptContext; +import org.elasticsearch.script.ScriptFactory; import org.elasticsearch.script.ScriptService; import org.elasticsearch.script.ScriptType; import org.elasticsearch.test.ESTestCase; @@ -17,11 +18,17 @@ import java.io.IOException; import java.util.Map; -public abstract class FieldScriptTestCase extends ESTestCase { +public abstract class FieldScriptTestCase extends ESTestCase { protected abstract ScriptContext context(); protected abstract T dummyScript(); + protected abstract T fromSource(); + + public final void testFromSourceIsDeterministic() { + assertTrue(fromSource().isResultDeterministic()); + } + public final void testRateLimitingDisabled() throws IOException { try (ScriptService scriptService = TestScriptEngine.scriptService(context(), dummyScript())) { for (int i = 0; i < 1000; i++) { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/GeoPointFieldScriptTests.java b/server/src/test/java/org/elasticsearch/index/mapper/GeoPointFieldScriptTests.java index 5921cbd8bafae..51474cc2f2a16 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/GeoPointFieldScriptTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/GeoPointFieldScriptTests.java @@ -47,6 +47,11 @@ protected GeoPointFieldScript.Factory dummyScript() { return DUMMY; } + @Override + protected GeoPointFieldScript.Factory fromSource() { + return GeoPointFieldScript.PARSE_FROM_SOURCE; + } + public void testTooManyValues() throws IOException { try (Directory directory = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) { iw.addDocument(List.of(new StoredField("_source", new BytesRef("{}")))); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IndexFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IndexFieldMapperTests.java index 830b5a4dba2f3..342d7bba12021 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IndexFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IndexFieldMapperTests.java @@ -10,10 +10,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.IndexSearcher; -import org.elasticsearch.index.Index; -import org.elasticsearch.index.fielddata.IndexFieldDataCache; import org.elasticsearch.index.query.SearchExecutionContext; -import org.elasticsearch.indices.breaker.NoneCircuitBreakerService; import org.elasticsearch.search.lookup.SearchLookup; import java.io.IOException; @@ -24,7 +21,6 @@ import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.nullValue; -import static org.mockito.Mockito.when; public class IndexFieldMapperTests extends MapperServiceTestCase { @@ -45,7 +41,7 @@ public void testFetchFieldValue() throws IOException { MapperService mapperService = createMapperService( fieldMapping(b -> b.field("type", "keyword")) ); - String index = randomAlphaOfLength(12); + String index = mapperService.index().getName(); withLuceneIndex(mapperService, iw -> { SourceToParse source = source(index, "id", b -> b.field("field", "value"), "", Map.of()); iw.addDocument(mapperService.documentMapper().parse(source).rootDoc()); @@ -53,10 +49,6 @@ public void testFetchFieldValue() throws IOException { IndexFieldMapper.IndexFieldType ft = (IndexFieldMapper.IndexFieldType) mapperService.fieldType("_index"); SearchLookup lookup = new SearchLookup(mapperService::fieldType, fieldDataLookup()); SearchExecutionContext searchExecutionContext = createSearchExecutionContext(mapperService); - when(searchExecutionContext.getForField(ft)).thenReturn( - ft.fielddataBuilder(index, () -> lookup).build(new IndexFieldDataCache.None(), new NoneCircuitBreakerService()) - ); - when(searchExecutionContext.getFullyQualifiedIndex()).thenReturn(new Index(index, "indexUUid")); ValueFetcher valueFetcher = ft.valueFetcher(searchExecutionContext, null); IndexSearcher searcher = newSearcher(iw); LeafReaderContext context = searcher.getIndexReader().leaves().get(0); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IpFieldScriptTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IpFieldScriptTests.java index 0da0bae7df6ef..6c74cec1a3cc3 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IpFieldScriptTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IpFieldScriptTests.java @@ -47,6 +47,11 @@ protected IpFieldScript.Factory dummyScript() { return DUMMY; } + @Override + protected IpFieldScript.Factory fromSource() { + return IpFieldScript.PARSE_FROM_SOURCE; + } + public void testTooManyValues() throws IOException { try (Directory directory = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) { iw.addDocument(List.of(new StoredField("_source", new BytesRef("{}")))); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/LongFieldScriptTests.java b/server/src/test/java/org/elasticsearch/index/mapper/LongFieldScriptTests.java index 30bb4807400ed..8ee06710c644e 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/LongFieldScriptTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/LongFieldScriptTests.java @@ -47,6 +47,11 @@ protected LongFieldScript.Factory dummyScript() { return DUMMY; } + @Override + protected LongFieldScript.Factory fromSource() { + return LongFieldScript.PARSE_FROM_SOURCE; + } + public void testTooManyValues() throws IOException { try (Directory directory = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) { iw.addDocument(List.of(new StoredField("_source", new BytesRef("{}")))); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/StringFieldScriptTests.java b/server/src/test/java/org/elasticsearch/index/mapper/StringFieldScriptTests.java index 210dc2c3d9b5e..c1d30b3036da3 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/StringFieldScriptTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/StringFieldScriptTests.java @@ -47,6 +47,11 @@ protected StringFieldScript.Factory dummyScript() { return DUMMY; } + @Override + protected StringFieldScript.Factory fromSource() { + return StringFieldScript.PARSE_FROM_SOURCE; + } + public void testTooManyValues() throws IOException { try (Directory directory = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), directory)) { iw.addDocument(List.of(new StoredField("_source", new BytesRef("{}")))); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/VersionFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/VersionFieldMapperTests.java index 5f32fa48c0ab4..4c7f15d34951d 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/VersionFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/VersionFieldMapperTests.java @@ -13,9 +13,7 @@ import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.IndexSearcher; -import org.elasticsearch.index.fielddata.IndexFieldDataCache; import org.elasticsearch.index.query.SearchExecutionContext; -import org.elasticsearch.indices.breaker.NoneCircuitBreakerService; import org.elasticsearch.search.lookup.SearchLookup; import java.io.IOException; @@ -23,7 +21,6 @@ import java.util.List; import static org.hamcrest.Matchers.containsString; -import static org.mockito.Mockito.when; public class VersionFieldMapperTests extends MapperServiceTestCase { @@ -59,9 +56,6 @@ public void testFetchFieldValue() throws IOException { VersionFieldMapper.VersionFieldType ft = (VersionFieldMapper.VersionFieldType) mapperService.fieldType("_version"); SearchLookup lookup = new SearchLookup(mapperService::fieldType, fieldDataLookup()); SearchExecutionContext searchExecutionContext = createSearchExecutionContext(mapperService); - when(searchExecutionContext.getForField(ft)).thenReturn( - ft.fielddataBuilder("test", () -> lookup).build(new IndexFieldDataCache.None(), new NoneCircuitBreakerService()) - ); ValueFetcher valueFetcher = ft.valueFetcher(searchExecutionContext, null); IndexSearcher searcher = newSearcher(iw); LeafReaderContext context = searcher.getIndexReader().leaves().get(0); diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java index 644e4d86e6769..e626254b11a17 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java @@ -67,6 +67,7 @@ import java.io.IOException; import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Optional; @@ -78,10 +79,7 @@ import static java.util.Collections.emptyList; import static java.util.stream.Collectors.toList; -import static org.mockito.Matchers.anyObject; -import static org.mockito.Matchers.anyString; import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; /** * Test case that lets you easilly build {@link MapperService} based on some @@ -537,26 +535,30 @@ protected final void withAggregationContext( } protected SearchExecutionContext createSearchExecutionContext(MapperService mapperService) { - SearchExecutionContext searchExecutionContext = mock(SearchExecutionContext.class); - when(searchExecutionContext.getFieldType(anyString())).thenAnswer(inv -> mapperService.fieldType(inv.getArguments()[0].toString())); - when(searchExecutionContext.isFieldMapped(anyString())) - .thenAnswer(inv -> mapperService.fieldType(inv.getArguments()[0].toString()) != null); - when(searchExecutionContext.getIndexAnalyzers()).thenReturn(mapperService.getIndexAnalyzers()); - when(searchExecutionContext.getIndexSettings()).thenReturn(mapperService.getIndexSettings()); - when(searchExecutionContext.getObjectMapper(anyString())).thenAnswer( - inv -> mapperService.mappingLookup().objectMappers().get(inv.getArguments()[0].toString())); - when(searchExecutionContext.getMatchingFieldNames(anyObject())).thenAnswer( - inv -> mapperService.mappingLookup().getMatchingFieldNames(inv.getArguments()[0].toString()) + final SimilarityService similarityService = new SimilarityService(mapperService.getIndexSettings(), null, Map.of()); + final long nowInMillis = randomNonNegativeLong(); + return new SearchExecutionContext( + 0, + 0, + mapperService.getIndexSettings(), + null, + (ft, idxName, lookup) -> + ft.fielddataBuilder(idxName, lookup).build(new IndexFieldDataCache.None(), new NoneCircuitBreakerService()), + mapperService, + mapperService.mappingLookup(), + similarityService, + null, + xContentRegistry(), + writableRegistry(), + null, + null, + () -> nowInMillis, + null, + null, + () -> true, + null, + Collections.emptyMap() ); - when(searchExecutionContext.allowExpensiveQueries()).thenReturn(true); - when(searchExecutionContext.lookup()).thenReturn(new SearchLookup(mapperService::fieldType, (ft, s) -> { - throw new UnsupportedOperationException("search lookup not available"); - })); - - SimilarityService similarityService = new SimilarityService(mapperService.getIndexSettings(), null, Map.of()); - when(searchExecutionContext.getDefaultSimilarity()).thenReturn(similarityService.getDefaultSimilarity()); - - return searchExecutionContext; } protected BiFunction, IndexFieldData> fieldDataLookup() { From 808b70d2f906892e0b27fe62367723b70a48c8ae Mon Sep 17 00:00:00 2001 From: Stuart Tettemer Date: Thu, 21 Oct 2021 07:57:27 -0500 Subject: [PATCH 04/21] Script: Restore the scripting general cache (#79453) Deprecate the script context cache in favor of the general cache. Users should use the following settings: `script.max_compilations_rate` to set the max compilation rate for user scripts such as filter scripts. Certain script contexts that submit scripts outside of the control of the user are exempted from this rate limit. Examples include runtime fields, ingest and watcher. `script.cache.max_size` to set the max size of the cache. `script.cache.expire` to set the expiration time for entries in the cache. Whats deprecated? `script.max_compilations_rate: use-context`. This special setting value was used to turn on the script context-specific caches. `script.context.$CONTEXT.cache_max_size`, use `script.cache.max_size` instead. `script.context.$CONTEXT.cache_expire`, use `script.cache.expire` instead. `script.context.$CONTEXT.max_compilations_rate`, use `script.max_compilations_rate` instead. The default cache size was increased from `100` to `3000`, which was approximately the max cache size when using context-specific caches. The default compilation rate limit was increased from `75/5m` to `150/5m` to account for increasing uses of scripts. System script contexts can now opt-out of compilation rate limiting using a flag rather than a sentinel rate limit value. 7.16: Script: Deprecate script context cache #79508 Refs: #62899 7.16: Script: Opt-out system contexts from script compilation rate limit #79459 Refs: #62899 --- .../modules/indices/circuit_breaker.asciidoc | 6 +- docs/reference/scripting/using.asciidoc | 12 +- .../admin/cluster/node/stats/NodeStats.java | 15 + .../common/settings/ClusterSettings.java | 3 + .../org/elasticsearch/node/NodeService.java | 1 + .../script/AbstractFieldScript.java | 2 +- .../script/IngestConditionalScript.java | 2 +- .../elasticsearch/script/IngestScript.java | 2 +- .../org/elasticsearch/script/ScriptCache.java | 17 +- .../script/ScriptCacheStats.java | 149 ++++++++ .../elasticsearch/script/ScriptContext.java | 14 +- .../elasticsearch/script/ScriptMetrics.java | 7 +- .../elasticsearch/script/ScriptService.java | 164 +++++++- .../org/elasticsearch/script/ScriptStats.java | 33 +- .../elasticsearch/script/TemplateScript.java | 2 +- .../cluster/node/stats/NodeStatsTests.java | 32 +- .../elasticsearch/cluster/DiskUsageTests.java | 18 +- .../script/ScriptCacheTests.java | 45 ++- .../script/ScriptServiceTests.java | 359 ++++++++++++++++-- .../MockInternalClusterInfoService.java | 3 +- .../test/InternalTestCluster.java | 9 +- .../AutoscalingMemoryInfoServiceTests.java | 1 + .../xpack/deprecation/DeprecationChecks.java | 6 +- .../deprecation/NodeDeprecationChecks.java | 77 ++++ .../NodeDeprecationChecksTests.java | 110 ++++++ ...chineLearningInfoTransportActionTests.java | 2 +- ...sportGetTrainedModelsStatsActionTests.java | 2 +- .../node/NodeStatsMonitoringDocTests.java | 3 +- .../elasticsearch/xpack/watcher/Watcher.java | 3 +- .../condition/WatcherConditionScript.java | 3 +- .../script/WatcherTransformScript.java | 3 +- 31 files changed, 989 insertions(+), 116 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/script/ScriptCacheStats.java diff --git a/docs/reference/modules/indices/circuit_breaker.asciidoc b/docs/reference/modules/indices/circuit_breaker.asciidoc index b128ce5dfb371..161f3a8876f18 100644 --- a/docs/reference/modules/indices/circuit_breaker.asciidoc +++ b/docs/reference/modules/indices/circuit_breaker.asciidoc @@ -126,11 +126,11 @@ within a period of time. See the "prefer-parameters" section of the <> documentation for more information. -`script.context.$CONTEXT.max_compilations_rate`:: +`script.max_compilations_rate`:: (<>) Limit for the number of unique dynamic scripts within a certain interval - that are allowed to be compiled for a given context. Defaults to `75/5m`, - meaning 75 every 5 minutes. + that are allowed to be compiled. Defaults to `150/5m`, + meaning 150 every 5 minutes. [[regex-circuit-breaker]] [discrete] diff --git a/docs/reference/scripting/using.asciidoc b/docs/reference/scripting/using.asciidoc index f49be226c2d37..e1eaeaaedad93 100644 --- a/docs/reference/scripting/using.asciidoc +++ b/docs/reference/scripting/using.asciidoc @@ -120,12 +120,8 @@ the `multiplier` parameter without {es} recompiling the script. } ---- -For most contexts, you can compile up to 75 scripts per 5 minutes by default. -For ingest contexts, the default script compilation rate is unlimited. You -can change these settings dynamically by setting -`script.context.$CONTEXT.max_compilations_rate`. For example, the following -setting limits script compilation to 100 scripts every 10 minutes for the -{painless}/painless-field-context.html[field context]: +You can compile up to 150 scripts per 5 minutes by default. +For ingest contexts, the default script compilation rate is unlimited. [source,js] ---- @@ -406,8 +402,8 @@ small. All scripts are cached by default so that they only need to be recompiled when updates occur. By default, scripts do not have a time-based expiration. -You can change this behavior by using the `script.context.$CONTEXT.cache_expire` setting. -Use the `script.context.$CONTEXT.cache_max_size` setting to configure the size of the cache. +You can change this behavior by using the `script.cache.expire` setting. +Use the `script.cache.max_size` setting to configure the size of the cache. NOTE: The size of scripts is limited to 65,535 bytes. Set the value of `script.max_size_in_bytes` to increase that soft limit. If your scripts are really large, then consider using a diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStats.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStats.java index 7c17367f9ecbe..855ed0c8676c2 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStats.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStats.java @@ -27,6 +27,7 @@ import org.elasticsearch.monitor.os.OsStats; import org.elasticsearch.monitor.process.ProcessStats; import org.elasticsearch.node.AdaptiveSelectionStats; +import org.elasticsearch.script.ScriptCacheStats; import org.elasticsearch.script.ScriptStats; import org.elasticsearch.threadpool.ThreadPoolStats; import org.elasticsearch.transport.TransportStats; @@ -71,6 +72,9 @@ public class NodeStats extends BaseNodeResponse implements ToXContentFragment { @Nullable private ScriptStats scriptStats; + @Nullable + private ScriptCacheStats scriptCacheStats; + @Nullable private DiscoveryStats discoveryStats; @@ -98,6 +102,7 @@ public NodeStats(StreamInput in) throws IOException { http = in.readOptionalWriteable(HttpStats::new); breaker = in.readOptionalWriteable(AllCircuitBreakerStats::new); scriptStats = in.readOptionalWriteable(ScriptStats::new); + scriptCacheStats = scriptStats != null ? scriptStats.toScriptCacheStats() : null; discoveryStats = in.readOptionalWriteable(DiscoveryStats::new); ingestStats = in.readOptionalWriteable(IngestStats::new); adaptiveSelectionStats = in.readOptionalWriteable(AdaptiveSelectionStats::new); @@ -112,6 +117,7 @@ public NodeStats(DiscoveryNode node, long timestamp, @Nullable NodeIndicesStats @Nullable DiscoveryStats discoveryStats, @Nullable IngestStats ingestStats, @Nullable AdaptiveSelectionStats adaptiveSelectionStats, + @Nullable ScriptCacheStats scriptCacheStats, @Nullable IndexingPressureStats indexingPressureStats) { super(node); this.timestamp = timestamp; @@ -128,6 +134,7 @@ public NodeStats(DiscoveryNode node, long timestamp, @Nullable NodeIndicesStats this.discoveryStats = discoveryStats; this.ingestStats = ingestStats; this.adaptiveSelectionStats = adaptiveSelectionStats; + this.scriptCacheStats = scriptCacheStats; this.indexingPressureStats = indexingPressureStats; } @@ -223,6 +230,11 @@ public AdaptiveSelectionStats getAdaptiveSelectionStats() { return adaptiveSelectionStats; } + @Nullable + public ScriptCacheStats getScriptCacheStats() { + return scriptCacheStats; + } + @Nullable public IndexingPressureStats getIndexingPressureStats() { return indexingPressureStats; @@ -314,6 +326,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (getAdaptiveSelectionStats() != null) { getAdaptiveSelectionStats().toXContent(builder, params); } + if (getScriptCacheStats() != null) { + getScriptCacheStats().toXContent(builder, params); + } if (getIndexingPressureStats() != null) { getIndexingPressureStats().toXContent(builder, params); } diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index a1f2606782f19..9993eaa344650 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -372,6 +372,9 @@ public void apply(Settings value, Settings current, Settings previous) { ScriptService.SCRIPT_CACHE_SIZE_SETTING, ScriptService.SCRIPT_CACHE_EXPIRE_SETTING, ScriptService.SCRIPT_DISABLE_MAX_COMPILATIONS_RATE_SETTING, + ScriptService.SCRIPT_GENERAL_CACHE_EXPIRE_SETTING, + ScriptService.SCRIPT_GENERAL_CACHE_SIZE_SETTING, + ScriptService.SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING, ScriptService.SCRIPT_MAX_COMPILATIONS_RATE_SETTING, ScriptService.SCRIPT_MAX_SIZE_IN_BYTES, ScriptService.TYPES_ALLOWED_SETTING, diff --git a/server/src/main/java/org/elasticsearch/node/NodeService.java b/server/src/main/java/org/elasticsearch/node/NodeService.java index 590a64ccd7b60..305d0aa995835 100644 --- a/server/src/main/java/org/elasticsearch/node/NodeService.java +++ b/server/src/main/java/org/elasticsearch/node/NodeService.java @@ -118,6 +118,7 @@ public NodeStats stats(CommonStatsFlags indices, boolean os, boolean process, bo discoveryStats ? coordinator.stats() : null, ingest ? ingestService.stats() : null, adaptiveSelection ? responseCollectorService.getAdaptiveStats(searchTransportService.getPendingSearchRequests()) : null, + scriptCache ? scriptService.cacheStats() : null, indexingPressure ? this.indexingPressure.stats() : null); } diff --git a/server/src/main/java/org/elasticsearch/script/AbstractFieldScript.java b/server/src/main/java/org/elasticsearch/script/AbstractFieldScript.java index 2792ce48b94f2..7cd479a388289 100644 --- a/server/src/main/java/org/elasticsearch/script/AbstractFieldScript.java +++ b/server/src/main/java/org/elasticsearch/script/AbstractFieldScript.java @@ -52,7 +52,7 @@ static ScriptContext newContext(String name, Class factoryClass) { * source of runaway script compilations. We think folks will * mostly reuse scripts though. */ - ScriptCache.UNLIMITED_COMPILATION_RATE.asTuple(), + false, /* * Disable runtime fields scripts from being allowed * to be stored as part of the script meta data. diff --git a/server/src/main/java/org/elasticsearch/script/IngestConditionalScript.java b/server/src/main/java/org/elasticsearch/script/IngestConditionalScript.java index 430f6c22a116f..caa6cbbe0164b 100644 --- a/server/src/main/java/org/elasticsearch/script/IngestConditionalScript.java +++ b/server/src/main/java/org/elasticsearch/script/IngestConditionalScript.java @@ -21,7 +21,7 @@ public abstract class IngestConditionalScript { /** The context used to compile {@link IngestConditionalScript} factories. */ public static final ScriptContext CONTEXT = new ScriptContext<>("processor_conditional", Factory.class, - 200, TimeValue.timeValueMillis(0), ScriptCache.UNLIMITED_COMPILATION_RATE.asTuple(), true); + 200, TimeValue.timeValueMillis(0), false, true); /** The generic runtime parameters for the script. */ private final Map params; diff --git a/server/src/main/java/org/elasticsearch/script/IngestScript.java b/server/src/main/java/org/elasticsearch/script/IngestScript.java index a0fa0d9bbdde8..bb444b132d1d0 100644 --- a/server/src/main/java/org/elasticsearch/script/IngestScript.java +++ b/server/src/main/java/org/elasticsearch/script/IngestScript.java @@ -22,7 +22,7 @@ public abstract class IngestScript { /** The context used to compile {@link IngestScript} factories. */ public static final ScriptContext CONTEXT = new ScriptContext<>("ingest", Factory.class, - 200, TimeValue.timeValueMillis(0), ScriptCache.UNLIMITED_COMPILATION_RATE.asTuple(), true); + 200, TimeValue.timeValueMillis(0), false, true); /** The generic runtime parameters for the script. */ private final Map params; diff --git a/server/src/main/java/org/elasticsearch/script/ScriptCache.java b/server/src/main/java/org/elasticsearch/script/ScriptCache.java index 622e9910ac372..b2fcdc9f24572 100644 --- a/server/src/main/java/org/elasticsearch/script/ScriptCache.java +++ b/server/src/main/java/org/elasticsearch/script/ScriptCache.java @@ -44,12 +44,7 @@ public class ScriptCache { private final double compilesAllowedPerNano; private final String contextRateSetting; - ScriptCache( - int cacheMaxSize, - TimeValue cacheExpire, - CompilationRate maxCompilationRate, - String contextRateSetting - ) { + ScriptCache(int cacheMaxSize, TimeValue cacheExpire, CompilationRate maxCompilationRate, String contextRateSetting) { this.cacheSize = cacheMaxSize; this.cacheExpire = cacheExpire; this.contextRateSetting = contextRateSetting; @@ -94,8 +89,10 @@ FactoryType compile( logger.trace("context [{}]: compiling script, type: [{}], lang: [{}], options: [{}]", context.name, type, lang, options); } - // Check whether too many compilations have happened - checkCompilationLimit(); + if (context.compilationRateLimited) { + // Check whether too many compilations have happened + checkCompilationLimit(); + } Object compiledScript = scriptEngine.compile(id, idOrCode, context, options); // Since the cache key is the script content itself we don't need to // invalidate/check the cache if an indexed script changes. @@ -121,6 +118,10 @@ static void rethrow(Throwable t) throws T { throw (T) t; } + public ScriptStats stats() { + return scriptMetrics.stats(); + } + public ScriptContextStats stats(String context) { return scriptMetrics.stats(context); } diff --git a/server/src/main/java/org/elasticsearch/script/ScriptCacheStats.java b/server/src/main/java/org/elasticsearch/script/ScriptCacheStats.java new file mode 100644 index 0000000000000..28183c1d46308 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/script/ScriptCacheStats.java @@ -0,0 +1,149 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.script; + +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.xcontent.ToXContentFragment; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +// This class is deprecated in favor of ScriptStats and ScriptContextStats +public class ScriptCacheStats implements Writeable, ToXContentFragment { + private final Map context; + private final ScriptStats general; + + public ScriptCacheStats(Map context) { + this.context = Collections.unmodifiableMap(context); + this.general = null; + } + + public ScriptCacheStats(ScriptStats general) { + this.general = Objects.requireNonNull(general); + this.context = null; + } + + public ScriptCacheStats(StreamInput in) throws IOException { + boolean isContext = in.readBoolean(); + if (isContext == false) { + general = new ScriptStats(in); + context = null; + return; + } + + general = null; + int size = in.readInt(); + Map context = new HashMap<>(size); + for (int i=0; i < size; i++) { + String name = in.readString(); + context.put(name, new ScriptStats(in)); + } + this.context = Collections.unmodifiableMap(context); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + if (general != null) { + out.writeBoolean(false); + general.writeTo(out); + return; + } + + out.writeBoolean(true); + out.writeInt(context.size()); + for (String name: context.keySet().stream().sorted().collect(Collectors.toList())) { + out.writeString(name); + context.get(name).writeTo(out); + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(Fields.SCRIPT_CACHE_STATS); + builder.startObject(Fields.SUM); + if (general != null) { + builder.field(ScriptStats.Fields.COMPILATIONS, general.getCompilations()); + builder.field(ScriptStats.Fields.CACHE_EVICTIONS, general.getCacheEvictions()); + builder.field(ScriptStats.Fields.COMPILATION_LIMIT_TRIGGERED, general.getCompilationLimitTriggered()); + builder.endObject().endObject(); + return builder; + } + + ScriptStats sum = sum(); + builder.field(ScriptStats.Fields.COMPILATIONS, sum.getCompilations()); + builder.field(ScriptStats.Fields.CACHE_EVICTIONS, sum.getCacheEvictions()); + builder.field(ScriptStats.Fields.COMPILATION_LIMIT_TRIGGERED, sum.getCompilationLimitTriggered()); + builder.endObject(); + + builder.startArray(Fields.CONTEXTS); + for (String name: context.keySet().stream().sorted().collect(Collectors.toList())) { + ScriptStats stats = context.get(name); + builder.startObject(); + builder.field(Fields.CONTEXT, name); + builder.field(ScriptStats.Fields.COMPILATIONS, stats.getCompilations()); + builder.field(ScriptStats.Fields.CACHE_EVICTIONS, stats.getCacheEvictions()); + builder.field(ScriptStats.Fields.COMPILATION_LIMIT_TRIGGERED, stats.getCompilationLimitTriggered()); + builder.endObject(); + } + builder.endArray(); + builder.endObject(); + + return builder; + } + + /** + * Get the context specific stats, null if using general cache + */ + public Map getContextStats() { + return context; + } + + /** + * Get the general stats, null if using context cache + */ + public ScriptStats getGeneralStats() { + return general; + } + + /** + * The sum of all script stats, either the general stats or the sum of all stats of the context stats. + */ + public ScriptStats sum() { + if (general != null) { + return general; + } + long compilations = 0; + long cacheEvictions = 0; + long compilationLimitTriggered = 0; + for (ScriptStats stat: context.values()) { + compilations += stat.getCompilations(); + cacheEvictions += stat.getCacheEvictions(); + compilationLimitTriggered += stat.getCompilationLimitTriggered(); + } + return new ScriptStats( + compilations, + cacheEvictions, + compilationLimitTriggered + ); + } + + static final class Fields { + static final String SCRIPT_CACHE_STATS = "script_cache"; + static final String CONTEXT = "context"; + static final String SUM = "sum"; + static final String CONTEXTS = "contexts"; + } +} diff --git a/server/src/main/java/org/elasticsearch/script/ScriptContext.java b/server/src/main/java/org/elasticsearch/script/ScriptContext.java index eb158f444096c..1e84d36b08b13 100644 --- a/server/src/main/java/org/elasticsearch/script/ScriptContext.java +++ b/server/src/main/java/org/elasticsearch/script/ScriptContext.java @@ -47,6 +47,8 @@ * be {@code boolean needs_score()}. */ public final class ScriptContext { + /** The default compilation rate limit for contexts with compilation rate limiting enabled */ + public static final Tuple DEFAULT_COMPILATION_RATE_LIMIT = new Tuple<>(150, TimeValue.timeValueMinutes(5)); /** A unique identifier for this context. */ public final String name; @@ -66,15 +68,15 @@ public final class ScriptContext { /** The default expiration of a script in the cache for the context, if not overridden */ public final TimeValue cacheExpireDefault; - /** The default max compilation rate for scripts in this context. Script compilation is throttled if this is exceeded */ - public final Tuple maxCompilationRateDefault; + /** Is compilation rate limiting enabled for this context? */ + public final boolean compilationRateLimited; /** Determines if the script can be stored as part of the cluster state. */ public final boolean allowStoredScript; /** Construct a context with the related instance and compiled classes with caller provided cache defaults */ public ScriptContext(String name, Class factoryClazz, int cacheSizeDefault, TimeValue cacheExpireDefault, - Tuple maxCompilationRateDefault, boolean allowStoredScript) { + boolean compilationRateLimited, boolean allowStoredScript) { this.name = name; this.factoryClazz = factoryClazz; Method newInstanceMethod = findMethod("FactoryType", factoryClazz, "newInstance"); @@ -98,15 +100,15 @@ public ScriptContext(String name, Class factoryClazz, int cacheSize this.cacheSizeDefault = cacheSizeDefault; this.cacheExpireDefault = cacheExpireDefault; - this.maxCompilationRateDefault = maxCompilationRateDefault; + this.compilationRateLimited = compilationRateLimited; this.allowStoredScript = allowStoredScript; } /** Construct a context with the related instance and compiled classes with defaults for cacheSizeDefault, cacheExpireDefault and - * maxCompilationRateDefault and allow scripts of this context to be stored scripts */ + * compilationRateLimited and allow scripts of this context to be stored scripts */ public ScriptContext(String name, Class factoryClazz) { // cache size default, cache expire default, max compilation rate are defaults from ScriptService. - this(name, factoryClazz, 100, TimeValue.timeValueMillis(0), new Tuple<>(75, TimeValue.timeValueMinutes(5)), true); + this(name, factoryClazz, 100, TimeValue.timeValueMillis(0), true, true); } /** Returns a method with the given name, or throws an exception if multiple are found. */ diff --git a/server/src/main/java/org/elasticsearch/script/ScriptMetrics.java b/server/src/main/java/org/elasticsearch/script/ScriptMetrics.java index f853305c3cd45..c4d26bc861c8c 100644 --- a/server/src/main/java/org/elasticsearch/script/ScriptMetrics.java +++ b/server/src/main/java/org/elasticsearch/script/ScriptMetrics.java @@ -27,6 +27,10 @@ public void onCompilationLimit() { compilationLimitTriggered.inc(); } + public ScriptStats stats() { + return new ScriptStats(compilationsMetric.count(), cacheEvictionsMetric.count(), compilationLimitTriggered.count()); + } + public ScriptContextStats stats(String context) { return new ScriptContextStats( context, @@ -36,4 +40,5 @@ public ScriptContextStats stats(String context) { new ScriptContextStats.TimeSeries(), new ScriptContextStats.TimeSeries() ); - }} + } +} diff --git a/server/src/main/java/org/elasticsearch/script/ScriptService.java b/server/src/main/java/org/elasticsearch/script/ScriptService.java index d0e9f65425498..104d37467c1cd 100644 --- a/server/src/main/java/org/elasticsearch/script/ScriptService.java +++ b/server/src/main/java/org/elasticsearch/script/ScriptService.java @@ -23,6 +23,8 @@ import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.logging.DeprecationCategory; +import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; @@ -34,6 +36,7 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -49,11 +52,31 @@ public class ScriptService implements Closeable, ClusterStateApplier, ScriptCompiler { private static final Logger logger = LogManager.getLogger(ScriptService.class); + private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(ScriptService.class); static final String DISABLE_DYNAMIC_SCRIPTING_SETTING = "script.disable_dynamic"; + // Special setting value for SCRIPT_GENERAL_MAX_COMPILATIONS_RATE to indicate the script service should use context + // specific caches + static final ScriptCache.CompilationRate USE_CONTEXT_RATE_VALUE = new ScriptCache.CompilationRate(-1, TimeValue.MINUS_ONE); + static final String USE_CONTEXT_RATE_KEY = "use-context"; + + public static final Setting SCRIPT_GENERAL_CACHE_SIZE_SETTING = + Setting.intSetting("script.cache.max_size", 3000, 0, Property.Dynamic, Property.NodeScope); + public static final Setting SCRIPT_GENERAL_CACHE_EXPIRE_SETTING = + Setting.positiveTimeSetting("script.cache.expire", TimeValue.timeValueMillis(0), Property.Dynamic, Property.NodeScope); public static final Setting SCRIPT_MAX_SIZE_IN_BYTES = Setting.intSetting("script.max_size_in_bytes", 65535, 0, Property.Dynamic, Property.NodeScope); + public static final Setting SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING = + new Setting<>("script.max_compilations_rate", "150/5m", + (String value) -> value.equals(USE_CONTEXT_RATE_KEY) ? USE_CONTEXT_RATE_VALUE: new ScriptCache.CompilationRate(value), + Property.Dynamic, Property.NodeScope); + + public static final String USE_CONTEXT_RATE_KEY_DEPRECATION_MESSAGE = "[" + USE_CONTEXT_RATE_KEY + "] is deprecated for the setting [" + + SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey() + "] as system scripts are now exempt from the rate limit. " + + "Set to a value such as [150/5m] (a rate of 150 compilations per five minutes) to rate limit user scripts in case the " + + "script cache [" + SCRIPT_GENERAL_CACHE_SIZE_SETTING.getKey() + "] is undersized causing script compilation thrashing."; + // Per-context settings static final String CONTEXT_PREFIX = "script.context."; @@ -62,11 +85,15 @@ public class ScriptService implements Closeable, ClusterStateApplier, ScriptComp public static final Setting.AffixSetting SCRIPT_CACHE_SIZE_SETTING = Setting.affixKeySetting(CONTEXT_PREFIX, - "cache_max_size", key -> Setting.intSetting(key, 0, Property.NodeScope, Property.Dynamic)); + "cache_max_size", + key -> Setting.intSetting(key, SCRIPT_GENERAL_CACHE_SIZE_SETTING, 0, + Property.NodeScope, Property.Dynamic, Property.Deprecated)); public static final Setting.AffixSetting SCRIPT_CACHE_EXPIRE_SETTING = Setting.affixKeySetting(CONTEXT_PREFIX, - "cache_expire", key -> Setting.positiveTimeSetting(key, TimeValue.timeValueMillis(0), Property.NodeScope, Property.Dynamic)); + "cache_expire", + key -> Setting.positiveTimeSetting(key, SCRIPT_GENERAL_CACHE_EXPIRE_SETTING, TimeValue.timeValueMillis(0), + Property.NodeScope, Property.Dynamic, Property.Deprecated)); // Unlimited compilation rate for context-specific script caches static final String UNLIMITED_COMPILATION_RATE_KEY = "unlimited"; @@ -76,8 +103,8 @@ public class ScriptService implements Closeable, ClusterStateApplier, ScriptComp "max_compilations_rate", key -> new Setting(key, "75/5m", (String value) -> value.equals(UNLIMITED_COMPILATION_RATE_KEY) ? ScriptCache.UNLIMITED_COMPILATION_RATE: - new ScriptCache.CompilationRate(value), - Property.NodeScope, Property.Dynamic)); + new ScriptCache.CompilationRate(value), + Property.NodeScope, Property.Dynamic, Property.Deprecated)); private static final ScriptCache.CompilationRate SCRIPT_COMPILATION_RATE_ZERO = new ScriptCache.CompilationRate(0, TimeValue.ZERO); @@ -185,7 +212,11 @@ public ScriptService(Settings settings, Map engines, Map context: contexts.values()) { clusterSettings.addSettingsUpdateConsumer( (settings) -> cacheHolder.get().set(context.name, contextCache(settings, context)), - List.of(SCRIPT_CACHE_SIZE_SETTING.getConcreteSettingForNamespace(context.name), - SCRIPT_CACHE_EXPIRE_SETTING.getConcreteSettingForNamespace(context.name), - SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(context.name) + Arrays.asList(SCRIPT_CACHE_SIZE_SETTING.getConcreteSettingForNamespace(context.name), + SCRIPT_CACHE_EXPIRE_SETTING.getConcreteSettingForNamespace(context.name), + SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(context.name), + SCRIPT_GENERAL_CACHE_EXPIRE_SETTING, + // general settings used for fallbacks + SCRIPT_GENERAL_CACHE_SIZE_SETTING ) ); } + + // Handle all settings for context and general caches, this flips between general and context caches. + clusterSettings.addSettingsUpdateConsumer( + this::setCacheHolder, + Arrays.asList(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING, + SCRIPT_GENERAL_CACHE_EXPIRE_SETTING, + SCRIPT_GENERAL_CACHE_SIZE_SETTING, + SCRIPT_MAX_COMPILATIONS_RATE_SETTING, + SCRIPT_DISABLE_MAX_COMPILATIONS_RATE_SETTING, + SCRIPT_CACHE_EXPIRE_SETTING, + SCRIPT_CACHE_SIZE_SETTING), + this::validateCacheSettings + ); } /** @@ -215,8 +262,13 @@ void registerClusterSettingsListeners(ClusterSettings clusterSettings) { * when using the general cache. */ void validateCacheSettings(Settings settings) { - List> affixes = List.of(SCRIPT_MAX_COMPILATIONS_RATE_SETTING, SCRIPT_CACHE_EXPIRE_SETTING, - SCRIPT_CACHE_SIZE_SETTING); + boolean useContext = SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.get(settings).equals(USE_CONTEXT_RATE_VALUE); + if (useContext) { + deprecationLogger.warn(DeprecationCategory.SCRIPTING, "scripting-context-cache", + USE_CONTEXT_RATE_KEY_DEPRECATION_MESSAGE); + } + List> affixes = Arrays.asList(SCRIPT_MAX_COMPILATIONS_RATE_SETTING, SCRIPT_CACHE_EXPIRE_SETTING, + SCRIPT_CACHE_SIZE_SETTING); List customRates = new ArrayList<>(); List keys = new ArrayList<>(); for (Setting.AffixSetting affix: affixes) { @@ -231,6 +283,11 @@ void validateCacheSettings(Settings settings) { } } } + if (useContext == false && keys.isEmpty() == false) { + keys.sort(Comparator.naturalOrder()); + throw new IllegalArgumentException("Context cache settings [" + String.join(", ", keys) + "] requires [" + + SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey() + "] to be [" + USE_CONTEXT_RATE_KEY + "]"); + } if (SCRIPT_DISABLE_MAX_COMPILATIONS_RATE_SETTING.get(settings)) { if (customRates.size() > 0) { customRates.sort(Comparator.naturalOrder()); @@ -238,6 +295,12 @@ void validateCacheSettings(Settings settings) { String.join(", ", customRates) + "] if compile rates disabled via [" + SCRIPT_DISABLE_MAX_COMPILATIONS_RATE_SETTING.getKey() + "]"); } + if (useContext == false && SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.exists(settings)) { + throw new IllegalArgumentException("Cannot set custom general compilation rates [" + + SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey() + "] to [" + + SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.get(settings) + "] if compile rates disabled via [" + + SCRIPT_DISABLE_MAX_COMPILATIONS_RATE_SETTING.getKey() + "]"); + } } } @@ -489,11 +552,50 @@ public ScriptStats stats() { return cacheHolder.get().stats(); } + public ScriptCacheStats cacheStats() { + return cacheHolder.get().cacheStats(); + } + @Override public void applyClusterState(ClusterChangedEvent event) { clusterState = event.state(); } + void setCacheHolder(Settings settings) { + CacheHolder current = cacheHolder.get(); + boolean useContext = SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.get(settings).equals(USE_CONTEXT_RATE_VALUE); + + if (current == null) { + if (useContext) { + cacheHolder.set(contextCacheHolder(settings)); + } else { + cacheHolder.set(generalCacheHolder(settings)); + } + return; + } + + // Update + if (useContext) { + if (current.general != null) { + // Flipping to context specific + cacheHolder.set(contextCacheHolder(settings)); + } + } else if (current.general == null) { + // Flipping to general + cacheHolder.set(generalCacheHolder(settings)); + } else if (current.general.rate.equals(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.get(settings)) == false || + current.general.cacheExpire.equals(SCRIPT_GENERAL_CACHE_EXPIRE_SETTING.get(settings)) == false || + current.general.cacheSize != SCRIPT_GENERAL_CACHE_SIZE_SETTING.get(settings)) { + // General compilation rate, cache expiration or cache size changed + cacheHolder.set(generalCacheHolder(settings)); + } + } + + CacheHolder generalCacheHolder(Settings settings) { + return new CacheHolder(SCRIPT_GENERAL_CACHE_SIZE_SETTING.get(settings), SCRIPT_GENERAL_CACHE_EXPIRE_SETTING.get(settings), + SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.get(settings), SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey()); + } + CacheHolder contextCacheHolder(Settings settings) { Map contextCache = new HashMap<>(contexts.size()); contexts.forEach((k, v) -> contextCache.put(k, contextCache(settings, v))); @@ -510,13 +612,15 @@ ScriptCache contextCache(Settings settings, ScriptContext context) { Setting rateSetting = SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(context.name); - ScriptCache.CompilationRate rate = null; - if (SCRIPT_DISABLE_MAX_COMPILATIONS_RATE_SETTING.get(settings) || compilationLimitsEnabled() == false) { + ScriptCache.CompilationRate rate; + if (SCRIPT_DISABLE_MAX_COMPILATIONS_RATE_SETTING.get(settings) + || compilationLimitsEnabled() == false + || context.compilationRateLimited == false) { rate = SCRIPT_COMPILATION_RATE_ZERO; } else if (rateSetting.existsOrFallbackExists(settings)) { rate = rateSetting.get(settings); } else { - rate = new ScriptCache.CompilationRate(context.maxCompilationRateDefault); + rate = new ScriptCache.CompilationRate(ScriptContext.DEFAULT_COMPILATION_RATE_LIMIT); } return new ScriptCache(cacheSize, cacheExpire, rate, rateSetting.getKey()); @@ -528,12 +632,19 @@ ScriptCache contextCache(Settings settings, ScriptContext context) { * 2) context mode, if the context script cache is configured. There is no general cache in this case. */ static class CacheHolder { + final ScriptCache general; final Map> contextCache; + CacheHolder(int cacheMaxSize, TimeValue cacheExpire, ScriptCache.CompilationRate maxCompilationRate, String contextRateSetting) { + contextCache = null; + general = new ScriptCache(cacheMaxSize, cacheExpire, maxCompilationRate, contextRateSetting); + } + CacheHolder(Map context) { Map> refs = new HashMap<>(context.size()); context.forEach((k, v) -> refs.put(k, new AtomicReference<>(v))); contextCache = Collections.unmodifiableMap(refs); + general = null; } /** @@ -541,6 +652,9 @@ static class CacheHolder { * the given context. Returns null in context mode if the requested context does not exist. */ ScriptCache get(String context) { + if (general != null) { + return general; + } AtomicReference ref = contextCache.get(context); if (ref == null) { return null; @@ -549,17 +663,35 @@ ScriptCache get(String context) { } ScriptStats stats() { - List stats = new ArrayList<>(contextCache.size()); + if (general != null) { + return general.stats(); + } + List contextStats = new ArrayList<>(contextCache.size()); for (Map.Entry> entry : contextCache.entrySet()) { - stats.add(entry.getValue().get().stats(entry.getKey())); + ScriptCache cache = entry.getValue().get(); + contextStats.add(cache.stats(entry.getKey())); + } + return new ScriptStats(contextStats); + } + + ScriptCacheStats cacheStats() { + if (general != null) { + return new ScriptCacheStats(general.stats()); } - return new ScriptStats(stats); + Map context = new HashMap<>(contextCache.size()); + for (String name: contextCache.keySet()) { + context.put(name, contextCache.get(name).get().stats()); + } + return new ScriptCacheStats(context); } /** * Update a single context cache if we're in the context cache mode otherwise no-op. */ void set(String name, ScriptCache cache) { + if (general != null) { + return; + } AtomicReference ref = contextCache.get(name); assert ref != null : "expected script cache to exist for context [" + name + "]"; ScriptCache oldCache = ref.get(); diff --git a/server/src/main/java/org/elasticsearch/script/ScriptStats.java b/server/src/main/java/org/elasticsearch/script/ScriptStats.java index 55ba2a847f7bb..2b8925cb10cee 100644 --- a/server/src/main/java/org/elasticsearch/script/ScriptStats.java +++ b/server/src/main/java/org/elasticsearch/script/ScriptStats.java @@ -15,8 +15,11 @@ import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; import java.util.List; -import java.util.stream.Collectors; +import java.util.Map; public class ScriptStats implements Writeable, ToXContentFragment { private final List contextStats; @@ -24,9 +27,11 @@ public class ScriptStats implements Writeable, ToXContentFragment { private final long cacheEvictions; private final long compilationLimitTriggered; - public ScriptStats(List contextStats) { - this.contextStats = contextStats.stream().sorted().collect(Collectors.toUnmodifiableList()); + ArrayList ctxStats = new ArrayList<>(contextStats.size()); + ctxStats.addAll(contextStats); + ctxStats.sort(ScriptContextStats::compareTo); + this.contextStats = Collections.unmodifiableList(ctxStats); long compilations = 0; long cacheEvictions = 0; long compilationLimitTriggered = 0; @@ -40,6 +45,17 @@ public ScriptStats(List contextStats) { this.compilationLimitTriggered = compilationLimitTriggered; } + public ScriptStats(long compilations, long cacheEvictions, long compilationLimitTriggered) { + this.contextStats = Collections.emptyList(); + this.compilations = compilations; + this.cacheEvictions = cacheEvictions; + this.compilationLimitTriggered = compilationLimitTriggered; + } + + public ScriptStats(ScriptContextStats context) { + this(context.getCompilations(), context.getCacheEvictions(), context.getCompilationLimitTriggered()); + } + public ScriptStats(StreamInput in) throws IOException { compilations = in.readVLong(); cacheEvictions = in.readVLong(); @@ -71,6 +87,17 @@ public long getCompilationLimitTriggered() { return compilationLimitTriggered; } + public ScriptCacheStats toScriptCacheStats() { + if (contextStats.isEmpty()) { + return new ScriptCacheStats(this); + } + Map contexts = new HashMap<>(contextStats.size()); + for (ScriptContextStats contextStats : contextStats) { + contexts.put(contextStats.getContext(), new ScriptStats(contextStats)); + } + return new ScriptCacheStats(contexts); + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(Fields.SCRIPT_STATS); diff --git a/server/src/main/java/org/elasticsearch/script/TemplateScript.java b/server/src/main/java/org/elasticsearch/script/TemplateScript.java index d1777316434cf..7356be0bbdc32 100644 --- a/server/src/main/java/org/elasticsearch/script/TemplateScript.java +++ b/server/src/main/java/org/elasticsearch/script/TemplateScript.java @@ -42,5 +42,5 @@ public interface Factory { // rate limiting. MustacheScriptEngine explicitly checks for TemplateScript. Rather than complicating the implementation there by // creating a new Script class (as would be customary), this context is used to avoid the default rate limit. public static final ScriptContext INGEST_CONTEXT = new ScriptContext<>("ingest_template", Factory.class, - 200, TimeValue.timeValueMillis(0), ScriptCache.UNLIMITED_COMPILATION_RATE.asTuple(), true); + 200, TimeValue.timeValueMillis(0), false, true); } diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java index 4272dfa77f2cc..39e197d7a0f96 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -29,6 +29,7 @@ import org.elasticsearch.monitor.process.ProcessStats; import org.elasticsearch.node.AdaptiveSelectionStats; import org.elasticsearch.node.ResponseCollectorService; +import org.elasticsearch.script.ScriptCacheStats; import org.elasticsearch.script.ScriptContextStats; import org.elasticsearch.script.ScriptStats; import org.elasticsearch.test.ESTestCase; @@ -418,6 +419,34 @@ public void testSerialization() throws IOException { assertEquals(aStats.responseTime, bStats.responseTime, 0.01); }); } + ScriptCacheStats scriptCacheStats = nodeStats.getScriptCacheStats(); + ScriptCacheStats deserializedScriptCacheStats = deserializedNodeStats.getScriptCacheStats(); + if (scriptCacheStats == null) { + assertNull(deserializedScriptCacheStats); + } else if (deserializedScriptCacheStats.getContextStats() != null) { + Map deserialized = deserializedScriptCacheStats.getContextStats(); + long evictions = 0; + long limited = 0; + long compilations = 0; + Map stats = scriptCacheStats.getContextStats(); + for (String context: stats.keySet()) { + ScriptStats deserStats = deserialized.get(context); + ScriptStats generatedStats = stats.get(context); + + evictions += generatedStats.getCacheEvictions(); + assertEquals(generatedStats.getCacheEvictions(), deserStats.getCacheEvictions()); + + limited += generatedStats.getCompilationLimitTriggered(); + assertEquals(generatedStats.getCompilationLimitTriggered(), deserStats.getCompilationLimitTriggered()); + + compilations += generatedStats.getCompilations(); + assertEquals(generatedStats.getCompilations(), deserStats.getCompilations()); + } + ScriptStats sum = deserializedScriptCacheStats.sum(); + assertEquals(evictions, sum.getCacheEvictions()); + assertEquals(limited, sum.getCompilationLimitTriggered()); + assertEquals(compilations, sum.getCompilations()); + } } } } @@ -688,10 +717,11 @@ public static NodeStats createNodeStats() { } adaptiveSelectionStats = new AdaptiveSelectionStats(nodeConnections, nodeStats); } + ScriptCacheStats scriptCacheStats = scriptStats != null ? scriptStats.toScriptCacheStats() : null; //TODO NodeIndicesStats are not tested here, way too complicated to create, also they need to be migrated to Writeable yet return new NodeStats(node, randomNonNegativeLong(), null, osStats, processStats, jvmStats, threadPoolStats, fsInfo, transportStats, httpStats, allCircuitBreakerStats, scriptStats, discoveryStats, - ingestStats, adaptiveSelectionStats, null); + ingestStats, adaptiveSelectionStats, scriptCacheStats, null); } private static ScriptContextStats.TimeSeries randomTimeSeries() { diff --git a/server/src/test/java/org/elasticsearch/cluster/DiskUsageTests.java b/server/src/test/java/org/elasticsearch/cluster/DiskUsageTests.java index 61466be75cf99..dc89a392c1e25 100644 --- a/server/src/test/java/org/elasticsearch/cluster/DiskUsageTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/DiskUsageTests.java @@ -151,11 +151,14 @@ public void testFillDiskUsage() { }; List nodeStats = Arrays.asList( new NodeStats(new DiscoveryNode("node_1", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT), 0, - null,null,null,null,null,new FsInfo(0, null, node1FSInfo), null,null,null,null,null, null, null, null), + null,null,null,null,null,new FsInfo(0, null, node1FSInfo), null,null,null,null,null, null, null, + null, null), new NodeStats(new DiscoveryNode("node_2", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT), 0, - null,null,null,null,null, new FsInfo(0, null, node2FSInfo), null,null,null,null,null, null, null, null), + null,null,null,null,null, new FsInfo(0, null, node2FSInfo), null,null,null,null,null, null, null, + null, null), new NodeStats(new DiscoveryNode("node_3", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT), 0, - null,null,null,null,null, new FsInfo(0, null, node3FSInfo), null,null,null,null,null, null, null, null) + null,null,null,null,null, new FsInfo(0, null, node3FSInfo), null,null,null,null,null, null, null, + null, null) ); InternalClusterInfoService.fillDiskUsagePerNode(nodeStats, newLeastAvaiableUsages, newMostAvaiableUsages); DiskUsage leastNode_1 = newLeastAvaiableUsages.get("node_1"); @@ -192,11 +195,14 @@ public void testFillDiskUsageSomeInvalidValues() { }; List nodeStats = Arrays.asList( new NodeStats(new DiscoveryNode("node_1", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT), 0, - null,null,null,null,null,new FsInfo(0, null, node1FSInfo), null,null,null,null,null, null, null, null), + null,null,null,null,null,new FsInfo(0, null, node1FSInfo), null,null,null,null,null, null, null, + null, null), new NodeStats(new DiscoveryNode("node_2", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT), 0, - null,null,null,null,null, new FsInfo(0, null, node2FSInfo), null,null,null,null,null, null, null, null), + null,null,null,null,null, new FsInfo(0, null, node2FSInfo), null,null,null,null,null, null, null, + null, null), new NodeStats(new DiscoveryNode("node_3", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT), 0, - null,null,null,null,null, new FsInfo(0, null, node3FSInfo), null,null,null,null,null, null, null, null) + null,null,null,null,null, new FsInfo(0, null, node3FSInfo), null,null,null,null,null, null, null, + null, null) ); InternalClusterInfoService.fillDiskUsagePerNode(nodeStats, newLeastAvailableUsages, newMostAvailableUsages); DiskUsage leastNode_1 = newLeastAvailableUsages.get("node_1"); diff --git a/server/src/test/java/org/elasticsearch/script/ScriptCacheTests.java b/server/src/test/java/org/elasticsearch/script/ScriptCacheTests.java index 6ee7149a460ec..efcbf07cd3f76 100644 --- a/server/src/test/java/org/elasticsearch/script/ScriptCacheTests.java +++ b/server/src/test/java/org/elasticsearch/script/ScriptCacheTests.java @@ -21,7 +21,7 @@ public class ScriptCacheTests extends ESTestCase { public void testCompilationCircuitBreaking() throws Exception { String context = randomFrom( ScriptModule.CORE_CONTEXTS.values().stream().filter( - c -> c.maxCompilationRateDefault.equals(ScriptCache.UNLIMITED_COMPILATION_RATE) == false + c -> c.compilationRateLimited ).collect(Collectors.toList()) ).name; final TimeValue expire = ScriptService.SCRIPT_CACHE_EXPIRE_SETTING.getConcreteSettingForNamespace(context).get(Settings.EMPTY); @@ -55,10 +55,37 @@ public void testCompilationCircuitBreaking() throws Exception { } } + public void testGeneralCompilationCircuitBreaking() throws Exception { + final TimeValue expire = ScriptService.SCRIPT_GENERAL_CACHE_EXPIRE_SETTING.get(Settings.EMPTY); + final Integer size = ScriptService.SCRIPT_GENERAL_CACHE_SIZE_SETTING.get(Settings.EMPTY); + String settingName = ScriptService.SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(); + ScriptCache cache = new ScriptCache(size, expire, new ScriptCache.CompilationRate(1, TimeValue.timeValueMinutes(1)), settingName); + cache.checkCompilationLimit(); // should pass + expectThrows(CircuitBreakingException.class, cache::checkCompilationLimit); + cache = new ScriptCache(size, expire, new ScriptCache.CompilationRate(2, TimeValue.timeValueMinutes(1)), settingName); + cache.checkCompilationLimit(); // should pass + cache.checkCompilationLimit(); // should pass + expectThrows(CircuitBreakingException.class, cache::checkCompilationLimit); + int count = randomIntBetween(5, 50); + cache = new ScriptCache(size, expire, new ScriptCache.CompilationRate(count, TimeValue.timeValueMinutes(1)), settingName); + for (int i = 0; i < count; i++) { + cache.checkCompilationLimit(); // should pass + } + expectThrows(CircuitBreakingException.class, cache::checkCompilationLimit); + cache = new ScriptCache(size, expire, new ScriptCache.CompilationRate(0, TimeValue.timeValueMinutes(1)), settingName); + expectThrows(CircuitBreakingException.class, cache::checkCompilationLimit); + cache = new ScriptCache(size, expire, + new ScriptCache.CompilationRate(Integer.MAX_VALUE, TimeValue.timeValueMinutes(1)), settingName); + int largeLimit = randomIntBetween(1000, 10000); + for (int i = 0; i < largeLimit; i++) { + cache.checkCompilationLimit(); + } + } + public void testUnlimitedCompilationRate() { String context = randomFrom( ScriptModule.CORE_CONTEXTS.values().stream().filter( - c -> c.maxCompilationRateDefault.equals(ScriptCache.UNLIMITED_COMPILATION_RATE) == false + c -> c.compilationRateLimited ).collect(Collectors.toList()) ).name; final Integer size = ScriptService.SCRIPT_CACHE_SIZE_SETTING.getConcreteSettingForNamespace(context).get(Settings.EMPTY); @@ -73,4 +100,18 @@ public void testUnlimitedCompilationRate() { assertEquals(initialState.availableTokens, currentState.availableTokens, 0.0); // delta of 0.0 because it should never change } } + + public void testGeneralUnlimitedCompilationRate() { + final Integer size = ScriptService.SCRIPT_GENERAL_CACHE_SIZE_SETTING.get(Settings.EMPTY); + final TimeValue expire = ScriptService.SCRIPT_GENERAL_CACHE_EXPIRE_SETTING.get(Settings.EMPTY); + String settingName = ScriptService.SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(); + ScriptCache cache = new ScriptCache(size, expire, ScriptCache.UNLIMITED_COMPILATION_RATE, settingName); + ScriptCache.TokenBucketState initialState = cache.tokenBucketState.get(); + for(int i=0; i < 3000; i++) { + cache.checkCompilationLimit(); + ScriptCache.TokenBucketState currentState = cache.tokenBucketState.get(); + assertEquals(initialState.lastInlineCompileTime, currentState.lastInlineCompileTime); + assertEquals(initialState.availableTokens, currentState.availableTokens, 0.0); // delta of 0.0 because it should never change + } + } } diff --git a/server/src/test/java/org/elasticsearch/script/ScriptServiceTests.java b/server/src/test/java/org/elasticsearch/script/ScriptServiceTests.java index 1ea9780c762a6..cdca6be6797b8 100644 --- a/server/src/test/java/org/elasticsearch/script/ScriptServiceTests.java +++ b/server/src/test/java/org/elasticsearch/script/ScriptServiceTests.java @@ -7,6 +7,7 @@ */ package org.elasticsearch.script; +import org.apache.logging.log4j.Level; import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptRequest; import org.elasticsearch.cluster.ClusterName; @@ -16,6 +17,7 @@ import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.settings.ClusterSettings; +import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.TimeValue; import org.elasticsearch.xcontent.XContentFactory; @@ -36,7 +38,12 @@ import static org.elasticsearch.script.ScriptService.SCRIPT_CACHE_EXPIRE_SETTING; import static org.elasticsearch.script.ScriptService.SCRIPT_CACHE_SIZE_SETTING; +import static org.elasticsearch.script.ScriptService.SCRIPT_GENERAL_CACHE_EXPIRE_SETTING; +import static org.elasticsearch.script.ScriptService.SCRIPT_GENERAL_CACHE_SIZE_SETTING; +import static org.elasticsearch.script.ScriptService.SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING; import static org.elasticsearch.script.ScriptService.SCRIPT_MAX_COMPILATIONS_RATE_SETTING; +import static org.elasticsearch.script.ScriptService.USE_CONTEXT_RATE_KEY_DEPRECATION_MESSAGE; +import static org.elasticsearch.script.ScriptService.USE_CONTEXT_RATE_KEY; import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.notNullValue; @@ -50,6 +57,7 @@ public class ScriptServiceTests extends ESTestCase { private ScriptService scriptService; private Settings baseSettings; private ClusterSettings clusterSettings; + private Map> rateLimitedContexts; @Before public void setup() throws IOException { @@ -68,6 +76,7 @@ public void setup() throws IOException { engines.put(scriptEngine.getType(), scriptEngine); engines.put("test", new MockScriptEngine("test", scripts, Collections.emptyMap())); logger.info("--> setup script service"); + rateLimitedContexts = compilationRateLimitedContexts(); } private void buildScriptService(Settings additionalSettings) throws IOException { @@ -204,7 +213,6 @@ public void testCompileCountedInCompilationStats() throws IOException { scriptService.compile(new Script(ScriptType.INLINE, "test", "1+1", Collections.emptyMap()), randomFrom(contexts.values())); assertEquals(1L, scriptService.stats().getCompilations()); } - public void testMultipleCompilationsCountedInCompilationStats() throws IOException { buildScriptService(Settings.EMPTY); int numberOfCompilations = randomIntBetween(1, 20); @@ -215,7 +223,7 @@ public void testMultipleCompilationsCountedInCompilationStats() throws IOExcepti assertEquals(numberOfCompilations, scriptService.stats().getCompilations()); } - public void testCompilationStatsOnCacheHit() throws IOException { + public void testCompilationGeneralStatsOnCacheHit() throws IOException { buildScriptService(Settings.EMPTY); Script script = new Script(ScriptType.INLINE, "test", "1+1", Collections.emptyMap()); ScriptContext context = randomFrom(contexts.values()); @@ -224,41 +232,106 @@ public void testCompilationStatsOnCacheHit() throws IOException { assertEquals(1L, scriptService.stats().getCompilations()); } - public void testIndexedScriptCountedInCompilationStats() throws IOException { + public void testIndexedScriptCountedInGeneralCompilationStats() throws IOException { + buildScriptService(Settings.EMPTY); + ScriptContext ctx = randomFrom(contexts.values()); + scriptService.compile(new Script(ScriptType.STORED, null, "script", Collections.emptyMap()), ctx); + assertEquals(1L, scriptService.stats().getCompilations()); + } + + public void testContextCompilationStatsOnCacheHit() throws IOException { + buildScriptService(Settings.builder() + .put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), USE_CONTEXT_RATE_KEY) + .build()); + Script script = new Script(ScriptType.INLINE, "test", "1+1", Collections.emptyMap()); + ScriptContext context = randomFrom(contexts.values()); + scriptService.compile(script, context); + scriptService.compile(script, context); + assertEquals(1L, scriptService.stats().getCompilations()); + assertWarnings(true, new DeprecationWarning(Level.WARN, USE_CONTEXT_RATE_KEY_DEPRECATION_MESSAGE)); + } + + public void testGeneralCompilationStatsOnCacheHit() throws IOException { + Settings.Builder builder = Settings.builder() + .put(SCRIPT_GENERAL_CACHE_SIZE_SETTING.getKey(), 1) + .put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), "2/1m"); + buildScriptService(builder.build()); + Script script = new Script(ScriptType.INLINE, "test", "1+1", Collections.emptyMap()); + ScriptContext context = randomFrom(contexts.values()); + scriptService.compile(script, context); + scriptService.compile(script, context); + assertEquals(1L, scriptService.stats().getCompilations()); + } + + public void testGeneralIndexedScriptCountedInCompilationStats() throws IOException { buildScriptService(Settings.EMPTY); ScriptContext ctx = randomFrom(contexts.values()); scriptService.compile(new Script(ScriptType.STORED, null, "script", Collections.emptyMap()), ctx); assertEquals(1L, scriptService.stats().getCompilations()); - assertEquals(1L, getByContext(scriptService.stats(), ctx.name).getCompilations()); + } + + public void testContextIndexedScriptCountedInCompilationStats() throws IOException { + buildScriptService(Settings.builder() + .put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), USE_CONTEXT_RATE_KEY) + .build()); + ScriptContext ctx = randomFrom(contexts.values()); + scriptService.compile(new Script(ScriptType.STORED, null, "script", Collections.emptyMap()), ctx); + assertEquals(1L, scriptService.stats().getCompilations()); + assertEquals(1L, scriptService.cacheStats().getContextStats().get(ctx.name).getCompilations()); + assertWarnings(true, new DeprecationWarning(Level.WARN, USE_CONTEXT_RATE_KEY_DEPRECATION_MESSAGE)); } public void testCacheEvictionCountedInCacheEvictionsStats() throws IOException { ScriptContext context = randomFrom(contexts.values()); + Setting contextCacheSizeSetting = SCRIPT_CACHE_SIZE_SETTING.getConcreteSettingForNamespace(context.name); buildScriptService(Settings.builder() - .put(SCRIPT_CACHE_SIZE_SETTING.getConcreteSettingForNamespace(context.name).getKey(), 1) + .put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), USE_CONTEXT_RATE_KEY) + .put(contextCacheSizeSetting.getKey(), 1) .build() ); scriptService.compile(new Script(ScriptType.INLINE, "test", "1+1", Collections.emptyMap()), context); scriptService.compile(new Script(ScriptType.INLINE, "test", "2+2", Collections.emptyMap()), context); assertEquals(2L, scriptService.stats().getCompilations()); assertEquals(1L, scriptService.stats().getCacheEvictions()); + assertSettingDeprecationsAndWarnings(new Setting[]{contextCacheSizeSetting}, + new DeprecationWarning(Level.WARN, USE_CONTEXT_RATE_KEY_DEPRECATION_MESSAGE)); + } + + public void testGeneralCacheEvictionCountedInCacheEvictionsStats() throws IOException { + Settings.Builder builder = Settings.builder(); + builder.put(SCRIPT_GENERAL_CACHE_SIZE_SETTING.getKey(), 1); + builder.put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), "10/1m"); + buildScriptService(builder.build()); + scriptService.compile(new Script(ScriptType.INLINE, "test", "1+1", Collections.emptyMap()), randomFrom(contexts.values())); + scriptService.compile(new Script(ScriptType.INLINE, "test", "2+2", Collections.emptyMap()), randomFrom(contexts.values())); + assertEquals(2L, scriptService.stats().getCompilations()); + assertEquals(2L, scriptService.cacheStats().getGeneralStats().getCompilations()); + assertEquals(1L, scriptService.stats().getCacheEvictions()); + assertEquals(1L, scriptService.cacheStats().getGeneralStats().getCacheEvictions()); } public void testContextCacheStats() throws IOException { - ScriptContext contextA = randomFrom(contexts.values()); + ScriptContext contextA = randomFrom(rateLimitedContexts.values()); String aRate = "2/10m"; - ScriptContext contextB = randomValueOtherThan(contextA, () -> randomFrom(contexts.values())); + ScriptContext contextB = randomValueOtherThan(contextA, () -> randomFrom(rateLimitedContexts.values())); String bRate = "3/10m"; BiFunction msg = (rate, ctx) -> ( "[script] Too many dynamic script compilations within, max: [" + rate + "]; please use indexed, or scripts with parameters instead; this limit can be changed by the [script.context." + ctx + ".max_compilations_rate] setting" ); + Setting cacheSizeA = SCRIPT_CACHE_SIZE_SETTING.getConcreteSettingForNamespace(contextA.name); + Setting compilationRateA = SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(contextA.name); + + Setting cacheSizeB = SCRIPT_CACHE_SIZE_SETTING.getConcreteSettingForNamespace(contextB.name); + Setting compilationRateB = SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(contextB.name); + buildScriptService(Settings.builder() - .put(SCRIPT_CACHE_SIZE_SETTING.getConcreteSettingForNamespace(contextA.name).getKey(), 1) - .put(SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(contextA.name).getKey(), aRate) - .put(SCRIPT_CACHE_SIZE_SETTING.getConcreteSettingForNamespace(contextB.name).getKey(), 2) - .put(SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(contextB.name).getKey(), bRate) + .put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), USE_CONTEXT_RATE_KEY) + .put(cacheSizeA.getKey(), 1) + .put(compilationRateA.getKey(), aRate) + .put(cacheSizeB.getKey(), 2) + .put(compilationRateB.getKey(), bRate) .build()); // Context A @@ -295,6 +368,9 @@ public void testContextCacheStats() throws IOException { assertEquals(5L, scriptService.stats().getCompilations()); assertEquals(2L, scriptService.stats().getCacheEvictions()); assertEquals(3L, scriptService.stats().getCompilationLimitTriggered()); + + assertSettingDeprecationsAndWarnings(new Setting[]{cacheSizeA, compilationRateA, cacheSizeB, compilationRateB}, + new DeprecationWarning(Level.WARN, USE_CONTEXT_RATE_KEY_DEPRECATION_MESSAGE)); } private ScriptContextStats getByContext(ScriptStats stats, String context) { @@ -366,17 +442,126 @@ public void testMaxSizeLimit() throws Exception { iae.getMessage()); } + public void testConflictContextSettings() throws IOException { + IllegalArgumentException illegal = expectThrows(IllegalArgumentException.class, () -> { + buildScriptService(Settings.builder() + .put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), "10/1m") + .put(ScriptService.SCRIPT_CACHE_SIZE_SETTING.getConcreteSettingForNamespace("field").getKey(), 123).build()); + }); + assertEquals("Context cache settings [script.context.field.cache_max_size] requires " + + "[script.max_compilations_rate] to be [use-context]", + illegal.getMessage() + ); + + illegal = expectThrows(IllegalArgumentException.class, () -> { + buildScriptService(Settings.builder() + .put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), "10/1m") + .put(ScriptService.SCRIPT_CACHE_EXPIRE_SETTING.getConcreteSettingForNamespace("ingest").getKey(), "5m").build()); + }); + + assertEquals("Context cache settings [script.context.ingest.cache_expire] requires " + + "[script.max_compilations_rate] to be [use-context]", + illegal.getMessage() + ); + + illegal = expectThrows(IllegalArgumentException.class, () -> { + buildScriptService(Settings.builder() + .put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), "10/1m") + .put(ScriptService.SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace("score").getKey(), "50/5m").build()); + }); + + assertEquals("Context cache settings [script.context.score.max_compilations_rate] requires " + + "[script.max_compilations_rate] to be [use-context]", + illegal.getMessage() + ); + + Setting ingestExpire = ScriptService.SCRIPT_CACHE_EXPIRE_SETTING.getConcreteSettingForNamespace("ingest"); + Setting fieldSize = ScriptService.SCRIPT_CACHE_SIZE_SETTING.getConcreteSettingForNamespace("field"); + Setting scoreCompilation = ScriptService.SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace("score"); + + buildScriptService( + Settings.builder() + .put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), USE_CONTEXT_RATE_KEY) + .put(ingestExpire.getKey(), "5m") + .put(fieldSize.getKey(), 123) + .put(scoreCompilation.getKey(), "50/5m") + .build()); + assertSettingDeprecationsAndWarnings(new Setting[]{ingestExpire, fieldSize, scoreCompilation}, + new DeprecationWarning(Level.WARN, USE_CONTEXT_RATE_KEY_DEPRECATION_MESSAGE)); + } + + public void testFallbackContextSettings() { + int cacheSizeBackup = randomIntBetween(0, 1024); + int cacheSizeFoo = randomValueOtherThan(cacheSizeBackup, () -> randomIntBetween(0, 1024)); + + String cacheExpireBackup = randomTimeValue(1, 1000, "h"); + TimeValue cacheExpireBackupParsed = TimeValue.parseTimeValue(cacheExpireBackup, ""); + String cacheExpireFoo = randomValueOtherThan(cacheExpireBackup, () -> randomTimeValue(1, 1000, "h")); + TimeValue cacheExpireFooParsed = TimeValue.parseTimeValue(cacheExpireFoo, ""); + + Setting cacheSizeSetting = ScriptService.SCRIPT_CACHE_SIZE_SETTING.getConcreteSettingForNamespace("foo"); + Setting cacheExpireSetting = ScriptService.SCRIPT_CACHE_EXPIRE_SETTING.getConcreteSettingForNamespace("foo"); + Settings s = Settings.builder() + .put(SCRIPT_GENERAL_CACHE_SIZE_SETTING.getKey(), cacheSizeBackup) + .put(cacheSizeSetting.getKey(), cacheSizeFoo) + .put(SCRIPT_GENERAL_CACHE_EXPIRE_SETTING.getKey(), cacheExpireBackup) + .put(cacheExpireSetting.getKey(), cacheExpireFoo) + .build(); + + assertEquals(cacheSizeFoo, ScriptService.SCRIPT_CACHE_SIZE_SETTING.getConcreteSettingForNamespace("foo").get(s).intValue()); + assertEquals(cacheSizeBackup, ScriptService.SCRIPT_CACHE_SIZE_SETTING.getConcreteSettingForNamespace("bar").get(s).intValue()); + + assertEquals(cacheExpireFooParsed, ScriptService.SCRIPT_CACHE_EXPIRE_SETTING.getConcreteSettingForNamespace("foo").get(s)); + assertEquals(cacheExpireBackupParsed, ScriptService.SCRIPT_CACHE_EXPIRE_SETTING.getConcreteSettingForNamespace("bar").get(s)); + assertSettingDeprecationsAndWarnings(new Setting[]{cacheExpireSetting, cacheExpireSetting}); + } + + public void testUseContextSettingValue() { + Setting contextMaxCompilationRate = ScriptService.SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace("foo"); + Settings s = Settings.builder() + .put(ScriptService.SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), ScriptService.USE_CONTEXT_RATE_KEY) + .put(contextMaxCompilationRate.getKey(), + ScriptService.USE_CONTEXT_RATE_KEY) + .build(); + + assertEquals(ScriptService.SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.get(s), ScriptService.USE_CONTEXT_RATE_VALUE); + + IllegalArgumentException illegal = expectThrows(IllegalArgumentException.class, () -> { + ScriptService.SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getAsMap(s); + }); + + assertEquals("parameter must contain a positive integer and a timevalue, i.e. 10/1m, but was [use-context]", illegal.getMessage()); + assertSettingDeprecationsAndWarnings(new Setting[]{contextMaxCompilationRate}); + } + + public void testCacheHolderGeneralConstructor() throws IOException { + String compilationRate = "77/5m"; + buildScriptService( + Settings.builder().put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), compilationRate).build() + ); + + ScriptService.CacheHolder holder = scriptService.cacheHolder.get(); + + assertNotNull(holder.general); + assertNull(holder.contextCache); + assertEquals(holder.general.rate, new ScriptCache.CompilationRate(compilationRate)); + } + public void testCacheHolderContextConstructor() throws IOException { - String a = randomFrom(contexts.keySet()); - String b = randomValueOtherThan(a, () -> randomFrom(contexts.keySet())); + String a = randomFrom(rateLimitedContexts.keySet()); + String b = randomValueOtherThan(a, () -> randomFrom(rateLimitedContexts.keySet())); String aCompilationRate = "77/5m"; String bCompilationRate = "78/6m"; + Setting aSetting = SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(a); + Setting bSetting = SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(b); buildScriptService(Settings.builder() - .put(SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(a).getKey(), aCompilationRate) - .put(SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(b).getKey(), bCompilationRate) + .put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), USE_CONTEXT_RATE_KEY) + .put(aSetting.getKey(), aCompilationRate) + .put(bSetting.getKey(), bCompilationRate) .build()); + assertNull(scriptService.cacheHolder.get().general); assertNotNull(scriptService.cacheHolder.get().contextCache); assertEquals(contexts.keySet(), scriptService.cacheHolder.get().contextCache.keySet()); @@ -384,11 +569,35 @@ public void testCacheHolderContextConstructor() throws IOException { scriptService.cacheHolder.get().contextCache.get(a).get().rate); assertEquals(new ScriptCache.CompilationRate(bCompilationRate), scriptService.cacheHolder.get().contextCache.get(b).get().rate); + assertSettingDeprecationsAndWarnings(new Setting[]{aSetting, bSetting}, + new DeprecationWarning(Level.WARN, USE_CONTEXT_RATE_KEY_DEPRECATION_MESSAGE)); + } + + public void testCompilationRateUnlimitedContextOnly() throws IOException { + IllegalArgumentException illegal = expectThrows(IllegalArgumentException.class, () -> { + buildScriptService(Settings.builder() + .put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), ScriptService.UNLIMITED_COMPILATION_RATE_KEY) + .build()); + }); + assertEquals("parameter must contain a positive integer and a timevalue, i.e. 10/1m, but was [unlimited]", illegal.getMessage()); + + + Setting field = SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace("field"); + Setting ingest = SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace("ingest"); + // Should not throw. + buildScriptService(Settings.builder() + .put(field.getKey(), ScriptService.UNLIMITED_COMPILATION_RATE_KEY) + .put(ingest.getKey(), ScriptService.UNLIMITED_COMPILATION_RATE_KEY) + .put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), ScriptService.USE_CONTEXT_RATE_KEY) + .build()); + assertSettingDeprecationsAndWarnings(new Setting[]{field, ingest}, + new DeprecationWarning(Level.WARN, USE_CONTEXT_RATE_KEY_DEPRECATION_MESSAGE)); } public void testDisableCompilationRateSetting() throws IOException { IllegalArgumentException illegal = expectThrows(IllegalArgumentException.class, () -> { buildScriptService(Settings.builder() + .put("script.max_compilations_rate", "use-context") .put("script.context.ingest.max_compilations_rate", "76/10m") .put("script.context.field.max_compilations_rate", "77/10m") .put("script.disable_max_compilations_rate", true) @@ -399,32 +608,61 @@ public void testDisableCompilationRateSetting() throws IOException { "[script.disable_max_compilations_rate]", illegal.getMessage()); + illegal = expectThrows(IllegalArgumentException.class, () -> { + buildScriptService(Settings.builder() + .put("script.disable_max_compilations_rate", true) + .put("script.max_compilations_rate", "76/10m") + .build()); + }); + assertEquals("Cannot set custom general compilation rates [script.max_compilations_rate] " + + "to [76/10m] if compile rates disabled via [script.disable_max_compilations_rate]", + illegal.getMessage()); + buildScriptService(Settings.builder() .put("script.disable_max_compilations_rate", true) .build()); + assertSettingDeprecationsAndWarnings(new Setting[]{ + SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace("field"), + SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace("ingest")}, + new DeprecationWarning(Level.WARN, USE_CONTEXT_RATE_KEY_DEPRECATION_MESSAGE)); } public void testCacheHolderChangeSettings() throws IOException { - Set contextNames = contexts.keySet(); + Set contextNames = rateLimitedContexts.keySet(); String a = randomFrom(contextNames); String aRate = "77/5m"; String b = randomValueOtherThan(a, () -> randomFrom(contextNames)); String bRate = "78/6m"; String c = randomValueOtherThanMany(s -> a.equals(s) || b.equals(s), () -> randomFrom(contextNames)); + String compilationRate = "77/5m"; + ScriptCache.CompilationRate generalRate = new ScriptCache.CompilationRate(compilationRate); - buildScriptService(Settings.EMPTY); - - Settings settings = Settings.builder() - .put(SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(a).getKey(), aRate) - .put(SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(b).getKey(), bRate) - .put(SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(c).getKey(), - ScriptService.UNLIMITED_COMPILATION_RATE_KEY) + Settings s = Settings.builder() + .put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), compilationRate) .build(); + buildScriptService(s); + + assertNotNull(scriptService.cacheHolder.get().general); + // Set should not throw when using general cache + scriptService.cacheHolder.get().set(c, scriptService.contextCache(s, contexts.get(c))); + assertNull(scriptService.cacheHolder.get().contextCache); + assertEquals(generalRate, scriptService.cacheHolder.get().general.rate); + + Setting compilationRateA = SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(a); + Setting compilationRateB = SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(b); + Setting compilationRateC = SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(c); + + scriptService.setCacheHolder(Settings.builder() + .put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), USE_CONTEXT_RATE_KEY) + .put(compilationRateA.getKey(), aRate) + .put(compilationRateB.getKey(), bRate) + .put(compilationRateC.getKey(), ScriptService.UNLIMITED_COMPILATION_RATE_KEY) + .build() + ); + + assertNull(scriptService.cacheHolder.get().general); assertNotNull(scriptService.cacheHolder.get().contextCache); - scriptService.cacheHolder.get().set(a, scriptService.contextCache(settings, contexts.get(a))); - scriptService.cacheHolder.get().set(b, scriptService.contextCache(settings, contexts.get(b))); - scriptService.cacheHolder.get().set(c, scriptService.contextCache(settings, contexts.get(c))); // get of missing context should be null assertNull(scriptService.cacheHolder.get().get( randomValueOtherThanMany(contexts.keySet()::contains, () -> randomAlphaOfLength(8))) @@ -443,6 +681,27 @@ public void testCacheHolderChangeSettings() throws IOException { contexts.get(b))); assertEquals(new ScriptCache.CompilationRate(aRate), scriptService.cacheHolder.get().contextCache.get(b).get().rate); + + scriptService.setCacheHolder(s); + assertNotNull(scriptService.cacheHolder.get().general); + assertNull(scriptService.cacheHolder.get().contextCache); + assertEquals(generalRate, scriptService.cacheHolder.get().general.rate); + + scriptService.setCacheHolder( + Settings.builder().put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), bRate).build() + ); + + assertNotNull(scriptService.cacheHolder.get().general); + assertNull(scriptService.cacheHolder.get().contextCache); + assertEquals(new ScriptCache.CompilationRate(bRate), scriptService.cacheHolder.get().general.rate); + + ScriptService.CacheHolder holder = scriptService.cacheHolder.get(); + scriptService.setCacheHolder( + Settings.builder().put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), bRate).build() + ); + assertEquals(holder, scriptService.cacheHolder.get()); + + assertSettingDeprecationsAndWarnings(new Setting[]{compilationRateA, compilationRateB, compilationRateC}); } public void testFallbackToContextDefaults() throws IOException { @@ -451,20 +710,23 @@ public void testFallbackToContextDefaults() throws IOException { int contextCacheSize = randomIntBetween(1, 1024); TimeValue contextExpire = TimeValue.timeValueMinutes(randomIntBetween(10, 200)); - buildScriptService(Settings.EMPTY); + buildScriptService( + Settings.builder().put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), "75/5m").build() + ); - String name = "ingest"; + String name = "score"; + Setting cacheSizeContextSetting = SCRIPT_CACHE_SIZE_SETTING.getConcreteSettingForNamespace(name); + Setting cacheExpireContextSetting = SCRIPT_CACHE_EXPIRE_SETTING.getConcreteSettingForNamespace(name); + Setting compilationRateContextSetting = SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(name); // Use context specific - scriptService.cacheHolder.get().set( - name, - scriptService.contextCache(Settings.builder() - .put(SCRIPT_CACHE_SIZE_SETTING.getConcreteSettingForNamespace(name).getKey(), contextCacheSize) - .put(SCRIPT_CACHE_EXPIRE_SETTING.getConcreteSettingForNamespace(name).getKey(), contextExpire) - .put(SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(name).getKey(), contextRateStr) - .build(), - contexts.get(name) - )); + scriptService.setCacheHolder(Settings.builder() + .put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), USE_CONTEXT_RATE_KEY) + .put(cacheSizeContextSetting.getKey(), contextCacheSize) + .put(cacheExpireContextSetting.getKey(), contextExpire) + .put(compilationRateContextSetting.getKey(), contextRateStr) + .build() + ); ScriptService.CacheHolder holder = scriptService.cacheHolder.get(); assertNotNull(holder.contextCache); @@ -475,18 +737,31 @@ public void testFallbackToContextDefaults() throws IOException { assertEquals(contextCacheSize, holder.contextCache.get(name).get().cacheSize); assertEquals(contextExpire, holder.contextCache.get(name).get().cacheExpire); - ScriptContext ingest = contexts.get(name); + ScriptContext score = contexts.get(name); // Fallback to context defaults - buildScriptService(Settings.EMPTY); + buildScriptService(Settings.builder().put(SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), USE_CONTEXT_RATE_KEY).build()); holder = scriptService.cacheHolder.get(); assertNotNull(holder.contextCache); assertNotNull(holder.contextCache.get(name)); assertNotNull(holder.contextCache.get(name).get()); - assertEquals(ingest.maxCompilationRateDefault, holder.contextCache.get(name).get().rate.asTuple()); - assertEquals(ingest.cacheSizeDefault, holder.contextCache.get(name).get().cacheSize); - assertEquals(ingest.cacheExpireDefault, holder.contextCache.get(name).get().cacheExpire); + assertEquals(ScriptContext.DEFAULT_COMPILATION_RATE_LIMIT, holder.contextCache.get(name).get().rate.asTuple()); + assertEquals(score.cacheSizeDefault, holder.contextCache.get(name).get().cacheSize); + assertEquals(score.cacheExpireDefault, holder.contextCache.get(name).get().cacheExpire); + + assertSettingDeprecationsAndWarnings(new Setting[]{cacheSizeContextSetting, cacheExpireContextSetting, + compilationRateContextSetting}, new DeprecationWarning(Level.WARN, USE_CONTEXT_RATE_KEY_DEPRECATION_MESSAGE)); + } + + protected HashMap> compilationRateLimitedContexts() { + HashMap> rateLimited = new HashMap<>(); + for (Map.Entry> entry: contexts.entrySet()) { + if (entry.getValue().compilationRateLimited) { + rateLimited.put(entry.getKey(), entry.getValue()); + } + } + return rateLimited; } private void assertCompileRejected(String lang, String script, ScriptType scriptType, ScriptContext scriptContext) { diff --git a/test/framework/src/main/java/org/elasticsearch/cluster/MockInternalClusterInfoService.java b/test/framework/src/main/java/org/elasticsearch/cluster/MockInternalClusterInfoService.java index 100b6611226ed..fc71839b93209 100644 --- a/test/framework/src/main/java/org/elasticsearch/cluster/MockInternalClusterInfoService.java +++ b/test/framework/src/main/java/org/elasticsearch/cluster/MockInternalClusterInfoService.java @@ -74,7 +74,8 @@ List adjustNodesStats(List nodesStats) { .map(fsInfoPath -> diskUsageFunction.apply(discoveryNode, fsInfoPath)) .toArray(FsInfo.Path[]::new)), nodeStats.getTransport(), nodeStats.getHttp(), nodeStats.getBreaker(), nodeStats.getScriptStats(), nodeStats.getDiscoveryStats(), - nodeStats.getIngestStats(), nodeStats.getAdaptiveSelectionStats(), nodeStats.getIndexingPressureStats()); + nodeStats.getIngestStats(), nodeStats.getAdaptiveSelectionStats(), nodeStats.getScriptCacheStats(), + nodeStats.getIndexingPressureStats()); }).collect(Collectors.toList()); } diff --git a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java index 6cfebd3275812..4305f69924ce8 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java @@ -93,7 +93,6 @@ import org.elasticsearch.node.NodeService; import org.elasticsearch.node.NodeValidationException; import org.elasticsearch.plugins.Plugin; -import org.elasticsearch.script.ScriptModule; import org.elasticsearch.script.ScriptService; import org.elasticsearch.search.SearchService; import org.elasticsearch.tasks.TaskManager; @@ -521,16 +520,16 @@ private static Settings getRandomNodeSettings(long seed) { builder.put(TransportSettings.PING_SCHEDULE.getKey(), RandomNumbers.randomIntBetween(random, 100, 2000) + "ms"); } + if (random.nextBoolean()) { - String ctx = randomFrom(random, ScriptModule.CORE_CONTEXTS.keySet()); - builder.put(ScriptService.SCRIPT_CACHE_SIZE_SETTING.getConcreteSettingForNamespace(ctx).getKey(), + builder.put(ScriptService.SCRIPT_GENERAL_CACHE_SIZE_SETTING.getKey(), RandomNumbers.randomIntBetween(random, 0, 2000)); } if (random.nextBoolean()) { - String ctx = randomFrom(random, ScriptModule.CORE_CONTEXTS.keySet()); - builder.put(ScriptService.SCRIPT_CACHE_EXPIRE_SETTING.getConcreteSettingForNamespace(ctx).getKey(), + builder.put(ScriptService.SCRIPT_GENERAL_CACHE_EXPIRE_SETTING.getKey(), timeValueMillis(RandomNumbers.randomIntBetween(random, 750, 10000000)).getStringRep()); } + if (random.nextBoolean()) { int initialMillisBound = RandomNumbers.randomIntBetween(random,10, 100); builder.put(TransportReplicationAction.REPLICATION_INITIAL_RETRY_BACKOFF_BOUND.getKey(), timeValueMillis(initialMillisBound)); diff --git a/x-pack/plugin/autoscaling/src/test/java/org/elasticsearch/xpack/autoscaling/capacity/memory/AutoscalingMemoryInfoServiceTests.java b/x-pack/plugin/autoscaling/src/test/java/org/elasticsearch/xpack/autoscaling/capacity/memory/AutoscalingMemoryInfoServiceTests.java index fb0a073c02e86..1c005095854bd 100644 --- a/x-pack/plugin/autoscaling/src/test/java/org/elasticsearch/xpack/autoscaling/capacity/memory/AutoscalingMemoryInfoServiceTests.java +++ b/x-pack/plugin/autoscaling/src/test/java/org/elasticsearch/xpack/autoscaling/capacity/memory/AutoscalingMemoryInfoServiceTests.java @@ -367,6 +367,7 @@ private static NodeStats statsForNode(DiscoveryNode node, long memory) { null, null, null, + null, null ); } diff --git a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationChecks.java b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationChecks.java index cece62ed0b853..c0274c85d6354 100644 --- a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationChecks.java +++ b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationChecks.java @@ -79,7 +79,11 @@ private DeprecationChecks() { NodeDeprecationChecks::checkMonitoringSettingDecommissionAlerts, NodeDeprecationChecks::checkMonitoringSettingEsCollectionEnabled, NodeDeprecationChecks::checkMonitoringSettingCollectionEnabled, - NodeDeprecationChecks::checkMonitoringSettingCollectionInterval + NodeDeprecationChecks::checkMonitoringSettingCollectionInterval, + NodeDeprecationChecks::checkScriptContextCache, + NodeDeprecationChecks::checkScriptContextCompilationsRateLimitSetting, + NodeDeprecationChecks::checkScriptContextCacheSizeSetting, + NodeDeprecationChecks::checkScriptContextCacheExpirationSetting ); static List> INDEX_SETTINGS_CHECKS = List.of( diff --git a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/NodeDeprecationChecks.java b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/NodeDeprecationChecks.java index e3471d4df7a82..355fc21efd60d 100644 --- a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/NodeDeprecationChecks.java +++ b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/NodeDeprecationChecks.java @@ -13,6 +13,7 @@ import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; +import org.elasticsearch.script.ScriptService; import org.elasticsearch.xpack.core.deprecation.DeprecationIssue; import org.elasticsearch.xpack.core.monitoring.MonitoringDeprecatedSettings; import org.elasticsearch.xpack.core.security.authc.RealmConfig; @@ -364,4 +365,80 @@ static DeprecationIssue checkExporterCreateLegacyTemplateSetting(final Settings DeprecationIssue.Level.WARNING, settings); } + + static DeprecationIssue checkScriptContextCache(final Settings settings, + final PluginsAndModules pluginsAndModules) { + if (ScriptService.isUseContextCacheSet(settings)) { + return new DeprecationIssue(DeprecationIssue.Level.WARNING, + ScriptService.USE_CONTEXT_RATE_KEY_DEPRECATION_MESSAGE, + "https://ela.st/es-deprecation-7-script-context-cache", + "found deprecated script context caches in use, change setting to compilation rate or remove " + + "setting to use the default", + false, null); + } + return null; + } + + static DeprecationIssue checkScriptContextCompilationsRateLimitSetting(final Settings settings, + final PluginsAndModules pluginsAndModules) { + Setting.AffixSetting maxSetting = ScriptService.SCRIPT_MAX_COMPILATIONS_RATE_SETTING; + Set contextCompilationRates = maxSetting.getAsMap(settings).keySet(); + if (contextCompilationRates.isEmpty() == false) { + String maxSettings = contextCompilationRates.stream().sorted().map( + c -> maxSetting.getConcreteSettingForNamespace(c).getKey() + ).collect(Collectors.joining(",")); + return new DeprecationIssue(DeprecationIssue.Level.WARNING, + String.format(Locale.ROOT, + "Setting context-specific rate limits [%s] is deprecated." + + " Use [%s] to rate limit the compilation of user scripts." + + " Context-specific caches are no longer needed to prevent system scripts from triggering rate limits.", + maxSettings, ScriptService.SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey()), + "https://ela.st/es-deprecation-7-script-context-cache", + String.format(Locale.ROOT, "[%s] is deprecated and will be removed in a future release", maxSettings), + false, null); + } + return null; + } + + static DeprecationIssue checkScriptContextCacheSizeSetting(final Settings settings, + final PluginsAndModules pluginsAndModules) { + Setting.AffixSetting cacheSizeSetting = ScriptService.SCRIPT_CACHE_SIZE_SETTING; + Set contextCacheSizes = cacheSizeSetting.getAsMap(settings).keySet(); + if (contextCacheSizes.isEmpty() == false) { + String cacheSizeSettings = contextCacheSizes.stream().sorted().map( + c -> cacheSizeSetting.getConcreteSettingForNamespace(c).getKey() + ).collect(Collectors.joining(",")); + return new DeprecationIssue(DeprecationIssue.Level.WARNING, + String.format(Locale.ROOT, + "Setting a context-specific cache size [%s] is deprecated." + + " Use [%s] to configure the size of the general cache for scripts." + + " Context-specific caches are no longer needed to prevent system scripts from triggering rate limits.", + cacheSizeSettings, ScriptService.SCRIPT_GENERAL_CACHE_SIZE_SETTING.getKey()), + "https://ela.st/es-deprecation-7-script-context-cache", + String.format(Locale.ROOT, "[%s] is deprecated and will be removed in a future release", cacheSizeSettings), + false, null); + } + return null; + } + + static DeprecationIssue checkScriptContextCacheExpirationSetting(final Settings settings, + final PluginsAndModules pluginsAndModules) { + Setting.AffixSetting cacheExpireSetting = ScriptService.SCRIPT_CACHE_EXPIRE_SETTING; + Set contextCacheExpires = cacheExpireSetting.getAsMap(settings).keySet(); + if (contextCacheExpires.isEmpty() == false) { + String cacheExpireSettings = contextCacheExpires.stream().sorted().map( + c -> cacheExpireSetting.getConcreteSettingForNamespace(c).getKey() + ).collect(Collectors.joining(",")); + return new DeprecationIssue(DeprecationIssue.Level.WARNING, + String.format(Locale.ROOT, + "Setting a context-specific cache expiration [%s] is deprecated." + + " Use [%s] to configure the expiration of the general cache." + + " Context-specific caches are no longer needed to prevent system scripts from triggering rate limits.", + cacheExpireSettings, ScriptService.SCRIPT_GENERAL_CACHE_EXPIRE_SETTING.getKey()), + "https://ela.st/es-deprecation-7-script-context-cache", + String.format(Locale.ROOT, "[%s] is deprecated and will be removed in a future release", cacheExpireSettings), + false, null); + } + return null; + } } diff --git a/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/NodeDeprecationChecksTests.java b/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/NodeDeprecationChecksTests.java index 7e490157966d7..310470e2ae639 100644 --- a/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/NodeDeprecationChecksTests.java +++ b/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/NodeDeprecationChecksTests.java @@ -14,6 +14,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.TimeValue; import org.elasticsearch.env.Environment; +import org.elasticsearch.script.ScriptService; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.core.deprecation.DeprecationIssue; @@ -349,4 +350,113 @@ public void testExporterCreateLegacyTemplateSetting() { "[xpack.monitoring.exporters.test.index.template.create_legacy_templates]", false, null))); } + + + public void testScriptContextCacheSetting() { + Settings settings = Settings.builder() + .put(ScriptService.SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), "use-context") + .build(); + + List issues = DeprecationChecks.filterChecks(NODE_SETTINGS_CHECKS, c -> c.apply(settings, null)); + + assertThat( + issues, + hasItem( + new DeprecationIssue(DeprecationIssue.Level.WARNING, + ScriptService.USE_CONTEXT_RATE_KEY_DEPRECATION_MESSAGE, + "https://ela.st/es-deprecation-7-script-context-cache", + "found deprecated script context caches in use, change setting to compilation rate or remove " + + "setting to use the default", + false, null)) + ); + } + + public void testScriptContextCompilationsRateLimitSetting() { + List contexts = List.of("field", "score"); + Settings settings = Settings.builder() + .put(ScriptService.SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), "use-context") + .put(ScriptService.SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(contexts.get(0)).getKey(), "123/5m") + .put(ScriptService.SCRIPT_MAX_COMPILATIONS_RATE_SETTING.getConcreteSettingForNamespace(contexts.get(1)).getKey(), "456/7m") + .build(); + + List issues = DeprecationChecks.filterChecks(NODE_SETTINGS_CHECKS, c -> c.apply(settings, null)); + + assertThat( + issues, + hasItem( + new DeprecationIssue(DeprecationIssue.Level.WARNING, + "Setting context-specific rate limits" + + " [script.context.field.max_compilations_rate,script.context.score.max_compilations_rate] is deprecated." + + " Use [script.max_compilations_rate] to rate limit the compilation of user scripts." + + " Context-specific caches are no longer needed to prevent system scripts from triggering rate limits.", + "https://ela.st/es-deprecation-7-script-context-cache", + "[script.context.field.max_compilations_rate,script.context.score.max_compilations_rate] is deprecated and" + + " will be removed in a future release", + false, null))); + + assertWarnings( + "[script.context.field.max_compilations_rate] setting was deprecated in Elasticsearch and will be" + + " removed in a future release! See the breaking changes documentation for the next major version.", + "[script.context.score.max_compilations_rate] setting was deprecated in Elasticsearch and will be removed in a future" + + " release! See the breaking changes documentation for the next major version."); + } + + public void testScriptContextCacheSizeSetting() { + List contexts = List.of("filter", "update"); + Settings settings = Settings.builder() + .put(ScriptService.SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), "use-context") + .put(ScriptService.SCRIPT_CACHE_SIZE_SETTING.getConcreteSettingForNamespace(contexts.get(0)).getKey(), 80) + .put(ScriptService.SCRIPT_CACHE_SIZE_SETTING.getConcreteSettingForNamespace(contexts.get(1)).getKey(), 200) + .build(); + + List issues = DeprecationChecks.filterChecks(NODE_SETTINGS_CHECKS, c -> c.apply(settings, null)); + + assertThat( + issues, + hasItem( + new DeprecationIssue(DeprecationIssue.Level.WARNING, + "Setting a context-specific cache size" + + " [script.context.filter.cache_max_size,script.context.update.cache_max_size] is deprecated." + + " Use [script.cache.max_size] to configure the size of the general cache for scripts." + + " Context-specific caches are no longer needed to prevent system scripts from triggering rate limits.", + "https://ela.st/es-deprecation-7-script-context-cache", + "[script.context.filter.cache_max_size,script.context.update.cache_max_size] is deprecated and will be" + + " removed in a future release", + false, null))); + + assertWarnings("[script.context.update.cache_max_size] setting was deprecated in Elasticsearch and will be" + + " removed in a future release! See the breaking changes documentation for the next major version.", + "[script.context.filter.cache_max_size] setting was deprecated in Elasticsearch and will be removed in a future" + + " release! See the breaking changes documentation for the next major version."); + } + + public void testScriptContextCacheExpirationSetting() { + List contexts = List.of("interval", "moving-function"); + Settings settings = Settings.builder() + .put(ScriptService.SCRIPT_GENERAL_MAX_COMPILATIONS_RATE_SETTING.getKey(), "use-context") + .put(ScriptService.SCRIPT_CACHE_EXPIRE_SETTING.getConcreteSettingForNamespace(contexts.get(0)).getKey(), "100m") + .put(ScriptService.SCRIPT_CACHE_EXPIRE_SETTING.getConcreteSettingForNamespace(contexts.get(1)).getKey(), "2d") + .build(); + + List issues = DeprecationChecks.filterChecks(NODE_SETTINGS_CHECKS, c -> c.apply(settings, null)); + + assertThat( + issues, + hasItem( + new DeprecationIssue(DeprecationIssue.Level.WARNING, + "Setting a context-specific cache expiration" + + " [script.context.interval.cache_expire,script.context.moving-function.cache_expire] is deprecated." + + " Use [script.cache.expire] to configure the expiration of the general cache." + + " Context-specific caches are no longer needed to prevent system scripts from triggering rate limits.", + "https://ela.st/es-deprecation-7-script-context-cache", + "[script.context.interval.cache_expire,script.context.moving-function.cache_expire] is deprecated and will be" + + " removed in a future release", + false, null))); + + + assertWarnings("[script.context.interval.cache_expire] setting was deprecated in Elasticsearch and will be" + + " removed in a future release! See the breaking changes documentation for the next major version.", + "[script.context.moving-function.cache_expire] setting was deprecated in Elasticsearch and will be removed in a future" + + " release! See the breaking changes documentation for the next major version."); + } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java index 9a283f31abe6e..0402b32b5a8af 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java @@ -686,7 +686,7 @@ private static NodeStats buildNodeStats(List pipelineNames, List SCRIPT_TEMPLATE_CONTEXT = new ScriptContext<>("xpack_template", TemplateScript.Factory.class, - 200, TimeValue.timeValueMillis(0), ScriptCache.UNLIMITED_COMPILATION_RATE.asTuple(), true); + 200, TimeValue.timeValueMillis(0), false, true); private static final Logger logger = LogManager.getLogger(Watcher.class); private WatcherIndexingListener listener; diff --git a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/condition/WatcherConditionScript.java b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/condition/WatcherConditionScript.java index bc38efd0c6572..04d4b97a8508c 100644 --- a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/condition/WatcherConditionScript.java +++ b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/condition/WatcherConditionScript.java @@ -7,7 +7,6 @@ package org.elasticsearch.xpack.watcher.condition; import org.elasticsearch.core.TimeValue; -import org.elasticsearch.script.ScriptCache; import org.elasticsearch.script.ScriptContext; import org.elasticsearch.xpack.core.watcher.execution.WatchExecutionContext; import org.elasticsearch.xpack.watcher.support.Variables; @@ -45,5 +44,5 @@ public interface Factory { } public static ScriptContext CONTEXT = new ScriptContext<>("watcher_condition", Factory.class, - 200, TimeValue.timeValueMillis(0), ScriptCache.UNLIMITED_COMPILATION_RATE.asTuple(), true); + 200, TimeValue.timeValueMillis(0), false, true); } diff --git a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/transform/script/WatcherTransformScript.java b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/transform/script/WatcherTransformScript.java index 465f87cc26bd8..3a48e25635f1d 100644 --- a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/transform/script/WatcherTransformScript.java +++ b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/transform/script/WatcherTransformScript.java @@ -7,7 +7,6 @@ package org.elasticsearch.xpack.watcher.transform.script; import org.elasticsearch.core.TimeValue; -import org.elasticsearch.script.ScriptCache; import org.elasticsearch.script.ScriptContext; import org.elasticsearch.xpack.core.watcher.execution.WatchExecutionContext; import org.elasticsearch.xpack.core.watcher.watch.Payload; @@ -46,5 +45,5 @@ public interface Factory { } public static ScriptContext CONTEXT = new ScriptContext<>("watcher_transform", Factory.class, - 200, TimeValue.timeValueMillis(0), ScriptCache.UNLIMITED_COMPILATION_RATE.asTuple(), true); + 200, TimeValue.timeValueMillis(0), false, true); } From 12474b1b360dc797bb43afc2da269ca36feec57c Mon Sep 17 00:00:00 2001 From: James Rodewig <40268737+jrodewig@users.noreply.github.com> Date: Thu, 21 Oct 2021 09:03:12 -0400 Subject: [PATCH 05/21] [DOCS] Fix create snapshot API parameters (#79209) Changes: - Notes snapshot names support date math - Sorts request body parameters alphabetically - Adds the `expand_wildcards` request body parameter - Reuses cluster state contents list from the restore snapshot API - Notes the `indices` and `feature_states` parameters support a special `none` value Relates to #79081 --- .../apis/create-snapshot-api.asciidoc | 76 ++++++++++++------- .../apis/restore-snapshot-api.asciidoc | 24 +++--- 2 files changed, 61 insertions(+), 39 deletions(-) diff --git a/docs/reference/snapshot-restore/apis/create-snapshot-api.asciidoc b/docs/reference/snapshot-restore/apis/create-snapshot-api.asciidoc index 3e5851aa04cf8..3049ac09469d6 100644 --- a/docs/reference/snapshot-restore/apis/create-snapshot-api.asciidoc +++ b/docs/reference/snapshot-restore/apis/create-snapshot-api.asciidoc @@ -73,11 +73,12 @@ Besides creating a copy of each data stream and index, the snapshot process can ``:: (Required, string) -Name of the repository to create a snapshot in. +Name of the snapshot repository. ``:: (Required, string) -Name of the snapshot to create. This name must be unique in the snapshot repository. +Name of the snapshot. Supports <>. Must be +unique within the snapshot repository. [[create-snapshot-api-query-params]] ==== {api-query-parms-title} @@ -96,50 +97,67 @@ initializes. Defaults to `false`. // Set an attribute so we can reuse these params with anchors :page-id: create-snapshot-api // tag::snapshot-config[] +`expand_wildcards`:: ++ +-- +(Optional, string) Determines how wildcard patterns in the `indices` parameter +match data streams and indices. Supports comma-separated values, such as +`open,hidden`. Defaults to `all`. Valid values are: + +`all`::: +Match any data stream or index, including <> ones. + +`open`::: +Match open indices and data streams. + +`closed`::: +Match closed indices and data streams. + +`hidden`::: +Match hidden data streams and indices. Must be combined with `open`, `closed`, +or both. + +`none`::: +Don't expand wildcard patterns. +-- + `ignore_unavailable`:: (Optional, Boolean) If `false`, the snapshot fails if any data stream or index in `indices` is missing or closed. If `true`, the snapshot ignores missing or closed data streams and indices. Defaults to `false`. -`indices`:: -(Optional, string) -A comma-separated list of data streams and indices to include in the snapshot. -<> is supported. -+ -By default, a snapshot includes all data streams and indices in the cluster. If this -argument is provided, the snapshot only includes the specified data streams and clusters. - `include_global_state`:: + -- (Optional, Boolean) -If `true`, the current global state is included in the snapshot. -Defaults to `true`. - -The global state includes: +If `true`, include the cluster state in the snapshot. Defaults to `true`. +The cluster state includes: -* Persistent cluster settings -* Index templates -* Legacy index templates -* Ingest pipelines -* {ilm-init} lifecycle policies -* Data stored in system indices, such as Watches and task records (configurable via `feature_states`) +include::restore-snapshot-api.asciidoc[tag=cluster-state-contents] -- +`indices`:: +(Optional, string or array of strings) +Comma-separated list of data streams and indices to include in the snapshot. +Supports <>. Defaults to an empty array +(`[]`), which includes all data streams and indices, including system indices. ++ +To exclude all data streams and indices, use `-*` or `none`. + [id="{page-id}-feature-states"] `feature_states`:: (Optional, array of strings) -A list of feature states to be included in this snapshot. A list of features -available for inclusion in the snapshot and their descriptions be can be -retrieved using the <>. -Each feature state includes one or more system indices containing data necessary -for the function of that feature. Providing an empty array will include no feature -states in the snapshot, regardless of the value of `include_global_state`. +Feature states to include in the snapshot. To get a list of possible feature +state values and their descriptions, use the <>. Each feature state includes one or more system indices. ++ +If `include_global_state` is `true`, the snapshot includes all feature states by +default. If `include_global_state` is `false`, the snapshot includes no feature +states by default. + -By default, all available feature states will be included in the snapshot if -`include_global_state` is `true`, or no feature states if `include_global_state` -is `false`. +To exclude all feature states, regardless of the `include_global_state` value, +specify an empty array (`[]`) or `none`. `metadata`:: (Optional, object) diff --git a/docs/reference/snapshot-restore/apis/restore-snapshot-api.asciidoc b/docs/reference/snapshot-restore/apis/restore-snapshot-api.asciidoc index 377b4b2321b1a..d50dab75e3c6e 100644 --- a/docs/reference/snapshot-restore/apis/restore-snapshot-api.asciidoc +++ b/docs/reference/snapshot-restore/apis/restore-snapshot-api.asciidoc @@ -135,18 +135,19 @@ indices. If `false`, the request doesn't restore aliases. Defaults to `true`. + -- (Optional, Boolean) -If `false`, the global state is not restored. Defaults to `false`. +If `true`, restore the cluster state. Defaults to `false`. -If `true`, the current global state is included in the restore operation. +The cluster state includes: -The global state includes: - -* Persistent cluster settings -* Index templates -* Legacy index templates -* Ingest pipelines -* {ilm-init} lifecycle policies -* For snapshots taken after 7.12.0, data stored in system indices, such as Watches and task records, replacing any existing configuration (configurable via `feature_states`) +// tag::cluster-state-contents[] +* <> +* <> +* <> +* <> +* <> +* For snapshots taken after 7.12.0, data stored in system indices, such as + Watches and task records. +// end::cluster-state-contents[] If `include_global_state` is `true` then the restore operation merges the legacy index templates in your cluster with the templates contained in the @@ -154,6 +155,9 @@ snapshot, replacing any existing ones whose name matches one in the snapshot. It completely removes all persistent settings, non-legacy index templates, ingest pipelines and {ilm-init} lifecycle policies that exist in your cluster and replaces them with the corresponding items from the snapshot. + +You can use the `feature_states` parameter to configure how system indices +are restored from the cluster state. -- [[restore-snapshot-api-feature-states]] From 879ae33b1771c60986258e5aaa6c195ce4e4f9e9 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 21 Oct 2021 15:06:37 +0200 Subject: [PATCH 06/21] Remove binary field after attachment processor execution (#79172) Keeping the binary as a field within the document might consume a lot of resources. It is highly recommended to remove that field from the document, which can be done by adding a `remove` processor in th pipeline. This commit allows an easier way of doing this by adding a new option `remove_binary`. When set to `true`, it removes automatically the field at the end of the processor execution. It defaults to `false` to keep the existing behavior and not introduce any breaking change. --- docs/plugins/ingest-attachment.asciidoc | 4 ++ .../attachment/AttachmentProcessor.java | 17 +++++++- .../AttachmentProcessorFactoryTests.java | 15 +++++++ .../attachment/AttachmentProcessorTests.java | 39 ++++++++++++++----- 4 files changed, 64 insertions(+), 11 deletions(-) diff --git a/docs/plugins/ingest-attachment.asciidoc b/docs/plugins/ingest-attachment.asciidoc index 4658237602d7a..50711023f93ba 100644 --- a/docs/plugins/ingest-attachment.asciidoc +++ b/docs/plugins/ingest-attachment.asciidoc @@ -28,6 +28,7 @@ include::install_remove.asciidoc[] | `indexed_chars_field` | no | `null` | Field name from which you can overwrite the number of chars being used for extraction. See `indexed_chars`. | `properties` | no | all properties | Array of properties to select to be stored. Can be `content`, `title`, `name`, `author`, `keywords`, `date`, `content_type`, `content_length`, `language` | `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document +| `remove_binary` | no | `false` | If `true`, the binary `field` will be removed from the document | `resource_name` | no | | Field containing the name of the resource to decode. If specified, the processor passes this resource name to the underlying Tika library to enable https://tika.apache.org/1.24.1/detection.html#Resource_Name_Based_Detection[Resource Name Based Detection]. |====== @@ -94,6 +95,9 @@ The document's `attachment` object contains extracted properties for the file: ---- // TESTRESPONSE[s/"_seq_no": \d+/"_seq_no" : $body._seq_no/ s/"_primary_term" : 1/"_primary_term" : $body._primary_term/] +NOTE: Keeping the binary as a field within the document might consume a lot of resources. It is highly recommended + to remove that field from the document. Set `remove_binary` to `true` to automatically remove the field. + To extract only certain `attachment` fields, specify the `properties` array: [source,console] diff --git a/plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/AttachmentProcessor.java b/plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/AttachmentProcessor.java index f071407005f98..22857331f896c 100644 --- a/plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/AttachmentProcessor.java +++ b/plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/AttachmentProcessor.java @@ -44,11 +44,13 @@ public final class AttachmentProcessor extends AbstractProcessor { private final Set properties; private final int indexedChars; private final boolean ignoreMissing; + private final boolean removeBinary; private final String indexedCharsField; private final String resourceName; AttachmentProcessor(String tag, String description, String field, String targetField, Set properties, - int indexedChars, boolean ignoreMissing, String indexedCharsField, String resourceName) { + int indexedChars, boolean ignoreMissing, String indexedCharsField, String resourceName, + boolean removeBinary) { super(tag, description); this.field = field; this.targetField = targetField; @@ -57,12 +59,18 @@ public final class AttachmentProcessor extends AbstractProcessor { this.ignoreMissing = ignoreMissing; this.indexedCharsField = indexedCharsField; this.resourceName = resourceName; + this.removeBinary = removeBinary; } boolean isIgnoreMissing() { return ignoreMissing; } + // For tests only + boolean isRemoveBinary() { + return removeBinary; + } + @Override public IngestDocument execute(IngestDocument ingestDocument) { Map additionalFields = new HashMap<>(); @@ -162,6 +170,10 @@ public IngestDocument execute(IngestDocument ingestDocument) { } ingestDocument.setFieldValue(targetField, additionalFields); + + if (removeBinary) { + ingestDocument.removeField(field); + } return ingestDocument; } @@ -200,6 +212,7 @@ public AttachmentProcessor create(Map registry, Strin int indexedChars = readIntProperty(TYPE, processorTag, config, "indexed_chars", NUMBER_OF_CHARS_INDEXED); boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false); String indexedCharsField = readOptionalStringProperty(TYPE, processorTag, config, "indexed_chars_field"); + boolean removeBinary = readBooleanProperty(TYPE, processorTag, config, "remove_binary", false); final Set properties; if (propertyNames != null) { @@ -217,7 +230,7 @@ public AttachmentProcessor create(Map registry, Strin } return new AttachmentProcessor(processorTag, description, field, targetField, properties, indexedChars, ignoreMissing, - indexedCharsField, resourceName); + indexedCharsField, resourceName, removeBinary); } } diff --git a/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorFactoryTests.java b/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorFactoryTests.java index d0288fdc4d75f..45c3407020dcc 100644 --- a/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorFactoryTests.java +++ b/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorFactoryTests.java @@ -124,4 +124,19 @@ public void testIgnoreMissing() throws Exception { assertThat(processor.getProperties(), sameInstance(AttachmentProcessor.Factory.DEFAULT_PROPERTIES)); assertTrue(processor.isIgnoreMissing()); } + + public void testRemoveBinary() throws Exception { + Map config = new HashMap<>(); + config.put("field", "_field"); + config.put("remove_binary", true); + + String processorTag = randomAlphaOfLength(10); + + AttachmentProcessor processor = factory.create(null, processorTag, null, config); + assertThat(processor.getTag(), equalTo(processorTag)); + assertThat(processor.getField(), equalTo("_field")); + assertThat(processor.getTargetField(), equalTo("attachment")); + assertThat(processor.getProperties(), sameInstance(AttachmentProcessor.Factory.DEFAULT_PROPERTIES)); + assertTrue(processor.isRemoveBinary()); + } } diff --git a/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorTests.java b/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorTests.java index 4291a54a9149b..63197de7f7981 100644 --- a/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorTests.java +++ b/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorTests.java @@ -44,7 +44,7 @@ public class AttachmentProcessorTests extends ESTestCase { @Before public void createStandardProcessor() { processor = new AttachmentProcessor(randomAlphaOfLength(10), null, "source_field", - "target_field", EnumSet.allOf(AttachmentProcessor.Property.class), 10000, false, null, null); + "target_field", EnumSet.allOf(AttachmentProcessor.Property.class), 10000, false, null, null, false); } public void testEnglishTextDocument() throws Exception { @@ -77,7 +77,7 @@ public void testHtmlDocumentWithRandomFields() throws Exception { selectedProperties.add(AttachmentProcessor.Property.DATE); } processor = new AttachmentProcessor(randomAlphaOfLength(10), null, "source_field", - "target_field", selectedProperties, 10000, false, null, null); + "target_field", selectedProperties, 10000, false, null, null, false); Map attachmentData = parseDocument("htmlWithEmptyDateMeta.html", processor); assertThat(attachmentData.keySet(), hasSize(selectedFieldNames.length)); @@ -237,7 +237,7 @@ public void testNullValueWithIgnoreMissing() throws Exception { Collections.singletonMap("source_field", null)); IngestDocument ingestDocument = new IngestDocument(originalIngestDocument); Processor processor = new AttachmentProcessor(randomAlphaOfLength(10), null, "source_field", - "randomTarget", null, 10, true, null, null); + "randomTarget", null, 10, true, null, null, false); processor.execute(ingestDocument); assertIngestDocument(originalIngestDocument, ingestDocument); } @@ -246,7 +246,7 @@ public void testNonExistentWithIgnoreMissing() throws Exception { IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap()); IngestDocument ingestDocument = new IngestDocument(originalIngestDocument); Processor processor = new AttachmentProcessor(randomAlphaOfLength(10), null, "source_field", - "randomTarget", null, 10, true, null, null); + "randomTarget", null, 10, true, null, null, false); processor.execute(ingestDocument); assertIngestDocument(originalIngestDocument, ingestDocument); } @@ -256,7 +256,7 @@ public void testNullWithoutIgnoreMissing() throws Exception { Collections.singletonMap("source_field", null)); IngestDocument ingestDocument = new IngestDocument(originalIngestDocument); Processor processor = new AttachmentProcessor(randomAlphaOfLength(10), null, "source_field", - "randomTarget", null, 10, false, null, null); + "randomTarget", null, 10, false, null, null, false); Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument)); assertThat(exception.getMessage(), equalTo("field [source_field] is null, cannot parse.")); } @@ -265,7 +265,7 @@ public void testNonExistentWithoutIgnoreMissing() throws Exception { IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap()); IngestDocument ingestDocument = new IngestDocument(originalIngestDocument); Processor processor = new AttachmentProcessor(randomAlphaOfLength(10), null, "source_field", - "randomTarget", null, 10, false, null, null); + "randomTarget", null, 10, false, null, null, false); Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument)); assertThat(exception.getMessage(), equalTo("field [source_field] not present as part of path [source_field]")); } @@ -299,7 +299,7 @@ private Map parseDocument(String file, AttachmentProcessor proce public void testIndexedChars() throws Exception { processor = new AttachmentProcessor(randomAlphaOfLength(10), null, "source_field", - "target_field", EnumSet.allOf(AttachmentProcessor.Property.class), 19, false, null, null); + "target_field", EnumSet.allOf(AttachmentProcessor.Property.class), 19, false, null, null, false); Map attachmentData = parseDocument("text-in-english.txt", processor); @@ -310,7 +310,7 @@ public void testIndexedChars() throws Exception { assertThat(attachmentData.get("content_length"), is(19L)); processor = new AttachmentProcessor(randomAlphaOfLength(10), null, "source_field", - "target_field", EnumSet.allOf(AttachmentProcessor.Property.class), 19, false, "max_length", null); + "target_field", EnumSet.allOf(AttachmentProcessor.Property.class), 19, false, "max_length", null, false); attachmentData = parseDocument("text-in-english.txt", processor); @@ -341,7 +341,7 @@ public void testIndexedChars() throws Exception { public void testIndexedCharsWithResourceName() throws Exception { processor = new AttachmentProcessor(randomAlphaOfLength(10), null, "source_field", "target_field", EnumSet.allOf(AttachmentProcessor.Property.class), 100, - false, null, "resource_name"); + false, null, "resource_name", false); Map attachmentData = parseDocument("text-cjk-big5.txt", processor, Collections.singletonMap("max_length", 100), true); @@ -369,6 +369,27 @@ public void testIndexedCharsWithResourceName() throws Exception { assertThat(attachmentData.get("content_length"), is(100L)); } + public void testRemoveBinary() throws Exception { + { + // Test the default behavior. + Map document = new HashMap<>(); + document.put("source_field", getAsBinaryOrBase64("text-in-english.txt")); + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); + processor.execute(ingestDocument); + assertThat(ingestDocument.hasField("source_field"), is(true)); + } + { + // Remove the binary field. + processor = new AttachmentProcessor(randomAlphaOfLength(10), null, "source_field", + "target_field", EnumSet.allOf(AttachmentProcessor.Property.class), 10000, false, null, null, true); + Map document = new HashMap<>(); + document.put("source_field", getAsBinaryOrBase64("text-in-english.txt")); + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), document); + processor.execute(ingestDocument); + assertThat(ingestDocument.hasField("source_field"), is(false)); + } + } + private Object getAsBinaryOrBase64(String filename) throws Exception { String path = "/org/elasticsearch/ingest/attachment/test/sample-files/" + filename; try (InputStream is = AttachmentProcessorTests.class.getResourceAsStream(path)) { From 3f4b6a74b86ce2efe75bdefc6c91e7cbca70430a Mon Sep 17 00:00:00 2001 From: William Brafford Date: Thu, 21 Oct 2021 09:29:59 -0400 Subject: [PATCH 07/21] Remove data telemetry access to restricted indices (#76994) * kibana_system role cannot access system indices for data telemetry --- .../security/authz/store/ReservedRolesStore.java | 5 ++--- .../authz/store/ReservedRolesStoreTests.java | 16 ++++++++++++---- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java index b45ae543a7dad..a2e5b0cc183d9 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java @@ -402,10 +402,9 @@ public static RoleDescriptor kibanaSystemRoleDescriptor(String name) { RoleDescriptor.IndicesPrivileges.builder() .indices("apm-*") .privileges("read", "read_cross_cluster").build(), - // Data telemetry reads mappings, metadata and stats of indices (excluding security and async search indices) + // Data telemetry reads mappings, metadata and stats of indices RoleDescriptor.IndicesPrivileges.builder() - .indices("/@&~(\\.security.*)&~(\\.async-search.*)/") - .allowRestrictedIndices(true) + .indices("*") .privileges("view_index_metadata", "monitor").build(), // Endpoint diagnostic information. Kibana reads from these indices to send telemetry RoleDescriptor.IndicesPrivileges.builder() diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java index 7c6cb6d1db757..c7ced20ee9dd0 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStoreTests.java @@ -538,10 +538,7 @@ public void testKibanaSystemRole() { // Data telemetry reads mappings, metadata and stats of indices - Arrays.asList(randomAlphaOfLengthBetween(8, 24), "packetbeat-*", - // check system indices other than .security* and .async-search* - ".watches", ".triggered-watches", ".tasks", ".enrich" - ).forEach((index) -> { + Arrays.asList(randomAlphaOfLengthBetween(8, 24), "packetbeat-*").forEach((index) -> { logger.info("index name [{}]", index); assertThat(kibanaRole.indices().allowedIndicesMatcher(IndicesStatsAction.NAME).test(mockIndexAbstraction(index)), is(true)); assertViewIndexMetadata(kibanaRole, index); @@ -559,6 +556,17 @@ public void testKibanaSystemRole() { assertThat(kibanaRole.indices().allowedIndicesMatcher(READ_CROSS_CLUSTER_NAME).test(mockIndexAbstraction(index)), is(false)); }); + // Data telemetry does not have access to system indices that aren't specified + List.of(".watches", ".geoip_databases", ".logstash", ".snapshot-blob-cache").forEach((index) -> { + logger.info("index name [{}]", index); + assertThat(kibanaRole.indices().allowedIndicesMatcher(GetIndexAction.NAME).test(mockIndexAbstraction(index)), is(false)); + assertThat(kibanaRole.indices().allowedIndicesMatcher(GetMappingsAction.NAME).test(mockIndexAbstraction(index)), is(false)); + assertThat(kibanaRole.indices().allowedIndicesMatcher(IndicesStatsAction.NAME).test(mockIndexAbstraction(index)), is(false)); + assertThat(kibanaRole.indices().allowedIndicesMatcher("indices:foo").test(mockIndexAbstraction(index)), is(false)); + assertThat(kibanaRole.indices().allowedIndicesMatcher("indices:bar").test(mockIndexAbstraction(index)), is(false)); + assertThat(kibanaRole.indices().allowedIndicesMatcher(READ_CROSS_CLUSTER_NAME).test(mockIndexAbstraction(index)), is(false)); + }); + // Data telemetry does not have access to security and async search RestrictedIndicesNames.RESTRICTED_NAMES.forEach((index) -> { logger.info("index name [{}]", index); From bc1c9b407c5961cfdb1255d3e97f9f96d230776b Mon Sep 17 00:00:00 2001 From: James Rodewig <40268737+jrodewig@users.noreply.github.com> Date: Thu, 21 Oct 2021 09:30:27 -0400 Subject: [PATCH 08/21] [DOCS] Remove logic for permanently unreleased branches (#79575) These tags are no longer needed. We previously used them for docs behind a feature flag. You can now achieve the same effect with `"{release-state}"!="released"`. No docs currently use this logic. --- docs/Versions.asciidoc | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/docs/Versions.asciidoc b/docs/Versions.asciidoc index b95585ea13e43..2038cc5e2efd7 100644 --- a/docs/Versions.asciidoc +++ b/docs/Versions.asciidoc @@ -54,16 +54,6 @@ endif::[] :javadoc-license: {rest-high-level-client-javadoc}/org/elasticsearch/protocol/xpack/license :javadoc-watcher: {rest-high-level-client-javadoc}/org/elasticsearch/protocol/xpack/watcher -/////// -Permanently unreleased branches (master, n.X) -/////// -ifeval::["{source_branch}"=="master"] -:permanently-unreleased-branch: -endif::[] -ifeval::["{source_branch}"=="{major-version}"] -:permanently-unreleased-branch: -endif::[] - /////// Shared attribute values are pulled from elastic/docs /////// From c369288f572b05a096840f4d9c0df4c592974d37 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Thu, 21 Oct 2021 15:10:37 +0100 Subject: [PATCH 09/21] [ML] Remove redundant constructor argument from MachineLearning (#79627) The MachineLearning plugin still takes a configPath argument but doesn't use it. I think this used to be an argument used by all plugins, but isn't any longer, and it's been removed from most plugins. This change removes it from MachineLearning too. --- .../java/org/elasticsearch/xpack/ml/MachineLearning.java | 3 +-- .../elasticsearch/xpack/ml/LocalStateMachineLearning.java | 2 +- .../org/elasticsearch/xpack/ml/MachineLearningTests.java | 2 +- .../aggs/categorization/CategorizeTextAggregatorTests.java | 4 ++-- .../InternalCategorizationAggregationTests.java | 2 +- .../BucketCorrelationAggregationBuilderTests.java | 2 +- .../xpack/ml/aggs/heuristic/PValueScoreTests.java | 6 +++--- .../inference/InferencePipelineAggregationBuilderTests.java | 2 +- .../aggs/inference/InternalInferenceAggregationTests.java | 2 +- .../kstest/BucketCountKSTestAggregationBuilderTests.java | 2 +- .../ml/aggs/kstest/InternalKSTestAggregationTests.java | 2 +- .../ml/job/categorization/CategorizationAnalyzerTests.java | 2 +- 12 files changed, 15 insertions(+), 16 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index 7cfda17f2bdc7..97e0a154243d4 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -412,7 +412,6 @@ import org.elasticsearch.xpack.ml.utils.persistence.ResultsPersisterService; import java.io.IOException; -import java.nio.file.Path; import java.time.Clock; import java.util.ArrayList; import java.util.Arrays; @@ -611,7 +610,7 @@ public Map getProcessors(Processor.Parameters paramet private final SetOnce deploymentManager = new SetOnce<>(); private final SetOnce trainedModelAllocationClusterServiceSetOnce = new SetOnce<>(); - public MachineLearning(Settings settings, Path configPath) { + public MachineLearning(Settings settings) { this.settings = settings; this.enabled = XPackSettings.MACHINE_LEARNING_ENABLED.get(settings); } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearning.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearning.java index aba1d90ca18ba..2cf44ef00f50f 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearning.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/LocalStateMachineLearning.java @@ -43,7 +43,7 @@ public class LocalStateMachineLearning extends LocalStateCompositeXPackPlugin { public LocalStateMachineLearning(final Settings settings, final Path configPath) { super(settings, configPath); LocalStateMachineLearning thisVar = this; - mlPlugin = new MachineLearning(settings, configPath){ + mlPlugin = new MachineLearning(settings){ @Override protected XPackLicenseState getLicenseState() { return thisVar.getLicenseState(); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningTests.java index f33191afd6b5f..b1da34ecc9113 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningTests.java @@ -175,7 +175,7 @@ public void testNoAttributes_givenClash() { private MachineLearning createMachineLearning(Settings settings) { XPackLicenseState licenseState = mock(XPackLicenseState.class); - return new MachineLearning(settings, null){ + return new MachineLearning(settings) { @Override protected XPackLicenseState getLicenseState() { return licenseState; diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregatorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregatorTests.java index 95cfdcb0f8f8f..eb62b6cec225a 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregatorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregatorTests.java @@ -45,13 +45,13 @@ protected AnalysisModule createAnalysisModule() throws Exception { TestEnvironment.newEnvironment( Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build() ), - List.of(new MachineLearning(Settings.EMPTY, null)) + List.of(new MachineLearning(Settings.EMPTY)) ); } @Override protected List getSearchPlugins() { - return List.of(new MachineLearning(Settings.EMPTY, null)); + return List.of(new MachineLearning(Settings.EMPTY)); } private static final String TEXT_FIELD_NAME = "text"; diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/categorization/InternalCategorizationAggregationTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/categorization/InternalCategorizationAggregationTests.java index 50e74155fe04c..8441f4a0142dc 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/categorization/InternalCategorizationAggregationTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/categorization/InternalCategorizationAggregationTests.java @@ -33,7 +33,7 @@ public class InternalCategorizationAggregationTests extends InternalMultiBucketA @Override protected SearchPlugin registerPlugin() { - return new MachineLearning(Settings.EMPTY, null); + return new MachineLearning(Settings.EMPTY); } @Override diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/correlation/BucketCorrelationAggregationBuilderTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/correlation/BucketCorrelationAggregationBuilderTests.java index 72f20667ef099..0428b4dc697d8 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/correlation/BucketCorrelationAggregationBuilderTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/correlation/BucketCorrelationAggregationBuilderTests.java @@ -31,7 +31,7 @@ public class BucketCorrelationAggregationBuilderTests extends BasePipelineAggreg @Override protected List plugins() { - return Collections.singletonList(new MachineLearning(Settings.EMPTY, null)); + return Collections.singletonList(new MachineLearning(Settings.EMPTY)); } @Override diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/heuristic/PValueScoreTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/heuristic/PValueScoreTests.java index 08b6743e9d10c..3a22584c8cb48 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/heuristic/PValueScoreTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/heuristic/PValueScoreTests.java @@ -17,7 +17,7 @@ import org.elasticsearch.search.aggregations.bucket.terms.heuristic.SignificanceHeuristic; import org.elasticsearch.xpack.ml.MachineLearning; -import java.util.Arrays; +import java.util.List; import java.util.function.Function; import static org.hamcrest.Matchers.allOf; @@ -56,14 +56,14 @@ public void testAssertions() { @Override protected NamedXContentRegistry xContentRegistry() { return new NamedXContentRegistry( - new SearchModule(Settings.EMPTY, Arrays.asList(new MachineLearning(Settings.EMPTY, null))).getNamedXContents() + new SearchModule(Settings.EMPTY, List.of(new MachineLearning(Settings.EMPTY))).getNamedXContents() ); } @Override protected NamedWriteableRegistry writableRegistry() { return new NamedWriteableRegistry( - new SearchModule(Settings.EMPTY, Arrays.asList(new MachineLearning(Settings.EMPTY, null))).getNamedWriteables() + new SearchModule(Settings.EMPTY, List.of(new MachineLearning(Settings.EMPTY))).getNamedWriteables() ); } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/inference/InferencePipelineAggregationBuilderTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/inference/InferencePipelineAggregationBuilderTests.java index 0d7f2d996bb0c..58068a177d063 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/inference/InferencePipelineAggregationBuilderTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/inference/InferencePipelineAggregationBuilderTests.java @@ -43,7 +43,7 @@ public class InferencePipelineAggregationBuilderTests extends BasePipelineAggreg @Override protected List plugins() { - return Collections.singletonList(new MachineLearning(Settings.EMPTY, null)); + return Collections.singletonList(new MachineLearning(Settings.EMPTY)); } @Override diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/inference/InternalInferenceAggregationTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/inference/InternalInferenceAggregationTests.java index 7244464ea5211..ad5a63845d3f2 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/inference/InternalInferenceAggregationTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/inference/InternalInferenceAggregationTests.java @@ -40,7 +40,7 @@ public class InternalInferenceAggregationTests extends InternalAggregationTestCa @Override protected SearchPlugin registerPlugin() { - return new MachineLearning(Settings.EMPTY, null); + return new MachineLearning(Settings.EMPTY); } @Override diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/kstest/BucketCountKSTestAggregationBuilderTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/kstest/BucketCountKSTestAggregationBuilderTests.java index ba4c88508501c..cd7ee7eb9a7f1 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/kstest/BucketCountKSTestAggregationBuilderTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/kstest/BucketCountKSTestAggregationBuilderTests.java @@ -32,7 +32,7 @@ public class BucketCountKSTestAggregationBuilderTests extends BasePipelineAggreg @Override protected List plugins() { - return Collections.singletonList(new MachineLearning(Settings.EMPTY, null)); + return Collections.singletonList(new MachineLearning(Settings.EMPTY)); } @Override diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/kstest/InternalKSTestAggregationTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/kstest/InternalKSTestAggregationTests.java index b27e181e37465..8f3718b56dc19 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/kstest/InternalKSTestAggregationTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/kstest/InternalKSTestAggregationTests.java @@ -28,7 +28,7 @@ public class InternalKSTestAggregationTests extends InternalAggregationTestCase< @Override protected SearchPlugin registerPlugin() { - return new MachineLearning(Settings.EMPTY, null); + return new MachineLearning(Settings.EMPTY); } @Override diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/categorization/CategorizationAnalyzerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/categorization/CategorizationAnalyzerTests.java index eaac564723cc3..a9d1b82eededc 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/categorization/CategorizationAnalyzerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/categorization/CategorizationAnalyzerTests.java @@ -52,7 +52,7 @@ public class CategorizationAnalyzerTests extends ESTestCase { public static AnalysisRegistry buildTestAnalysisRegistry(Environment environment) throws Exception { CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin(); - MachineLearning ml = new MachineLearning(environment.settings(), environment.configFile()); + MachineLearning ml = new MachineLearning(environment.settings()); return new AnalysisModule(environment, Arrays.asList(commonAnalysisPlugin, ml)).getAnalysisRegistry(); } From 24c659e9cec4e9c45f9c6615d29040d4bbf98b80 Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Thu, 21 Oct 2021 10:43:06 -0400 Subject: [PATCH 10/21] [ML] add new truncate parameter tokenization (#79515) This commit adds a new `truncate` parameter to tokenization. Valid values are: first : truncate only the first sequence (if two are provided) second: truncate only the second sequence (if two are provided) none: do no truncation, which means we throw an error when sequences are too long --- .../trainedmodel/BertTokenization.java | 16 +- .../inference/trainedmodel/Tokenization.java | 42 ++++- .../trainedmodel/BertTokenizationTests.java | 3 +- .../xpack/ml/integration/AutoscalingIT.java | 5 +- .../ml/integration/TestFeatureResetIT.java | 6 +- .../ml/integration/TrainedModelCRUDIT.java | 3 +- .../nlp/tokenizers/BertTokenizer.java | 73 +++++++- .../nlp/BertRequestBuilderTests.java | 7 +- .../ml/inference/nlp/NerProcessorTests.java | 3 +- .../nlp/TextClassificationProcessorTests.java | 3 +- .../ZeroShotClassificationProcessorTests.java | 3 +- .../nlp/tokenizers/BertTokenizerTests.java | 167 +++++++++++++----- 12 files changed, 263 insertions(+), 68 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/BertTokenization.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/BertTokenization.java index a5ea7176db547..68f33a68c4fb7 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/BertTokenization.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/BertTokenization.java @@ -25,7 +25,12 @@ public static ConstructingObjectParser createParser(bool ConstructingObjectParser parser = new ConstructingObjectParser<>( "bert_tokenization", ignoreUnknownFields, - a -> new BertTokenization((Boolean) a[0], (Boolean) a[1], (Integer) a[2]) + a -> new BertTokenization( + (Boolean) a[0], + (Boolean) a[1], + (Integer) a[2], + a[3] == null ? null : Truncate.fromString((String)a[3]) + ) ); Tokenization.declareCommonFields(parser); return parser; @@ -38,8 +43,13 @@ public static BertTokenization fromXContent(XContentParser parser, boolean lenie return lenient ? LENIENT_PARSER.apply(parser, null) : STRICT_PARSER.apply(parser, null); } - public BertTokenization(@Nullable Boolean doLowerCase, @Nullable Boolean withSpecialTokens, @Nullable Integer maxSequenceLength) { - super(doLowerCase, withSpecialTokens, maxSequenceLength); + public BertTokenization( + @Nullable Boolean doLowerCase, + @Nullable Boolean withSpecialTokens, + @Nullable Integer maxSequenceLength, + @Nullable Truncate truncate + ) { + super(doLowerCase, withSpecialTokens, maxSequenceLength, truncate); } public BertTokenization(StreamInput in) throws IOException { diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/Tokenization.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/Tokenization.java index dc34ed7c3ec3c..d0ee7d7a7e539 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/Tokenization.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/Tokenization.java @@ -17,47 +17,74 @@ import org.elasticsearch.xpack.core.ml.utils.NamedXContentObject; import java.io.IOException; +import java.util.Locale; import java.util.Objects; import java.util.Optional; public abstract class Tokenization implements NamedXContentObject, NamedWriteable { + public enum Truncate { + FIRST, + SECOND, + NONE; + + public static Truncate fromString(String value) { + return valueOf(value.toUpperCase(Locale.ROOT)); + } + + @Override + public String toString() { + return name().toLowerCase(Locale.ROOT); + } + } + //TODO add global params like never_split, bos_token, eos_token, mask_token, tokenize_chinese_chars, strip_accents, etc. public static final ParseField DO_LOWER_CASE = new ParseField("do_lower_case"); public static final ParseField WITH_SPECIAL_TOKENS = new ParseField("with_special_tokens"); public static final ParseField MAX_SEQUENCE_LENGTH = new ParseField("max_sequence_length"); + public static final ParseField TRUNCATE = new ParseField("truncate"); private static final int DEFAULT_MAX_SEQUENCE_LENGTH = 512; private static final boolean DEFAULT_DO_LOWER_CASE = false; private static final boolean DEFAULT_WITH_SPECIAL_TOKENS = true; + private static final Truncate DEFAULT_TRUNCATION = Truncate.FIRST; static void declareCommonFields(ConstructingObjectParser parser) { parser.declareBoolean(ConstructingObjectParser.optionalConstructorArg(), DO_LOWER_CASE); parser.declareBoolean(ConstructingObjectParser.optionalConstructorArg(), WITH_SPECIAL_TOKENS); parser.declareInt(ConstructingObjectParser.optionalConstructorArg(), MAX_SEQUENCE_LENGTH); + parser.declareString(ConstructingObjectParser.optionalConstructorArg(), TRUNCATE); } public static BertTokenization createDefault() { - return new BertTokenization(null, null, null); + return new BertTokenization(null, null, null, Truncate.FIRST); } protected final boolean doLowerCase; protected final boolean withSpecialTokens; protected final int maxSequenceLength; - - Tokenization(@Nullable Boolean doLowerCase, @Nullable Boolean withSpecialTokens, @Nullable Integer maxSequenceLength) { + protected final Truncate truncate; + + Tokenization( + @Nullable Boolean doLowerCase, + @Nullable Boolean withSpecialTokens, + @Nullable Integer maxSequenceLength, + @Nullable Truncate truncate + ) { if (maxSequenceLength != null && maxSequenceLength <= 0) { throw new IllegalArgumentException("[" + MAX_SEQUENCE_LENGTH.getPreferredName() + "] must be positive"); } this.doLowerCase = Optional.ofNullable(doLowerCase).orElse(DEFAULT_DO_LOWER_CASE); this.withSpecialTokens = Optional.ofNullable(withSpecialTokens).orElse(DEFAULT_WITH_SPECIAL_TOKENS); this.maxSequenceLength = Optional.ofNullable(maxSequenceLength).orElse(DEFAULT_MAX_SEQUENCE_LENGTH); + this.truncate = Optional.ofNullable(truncate).orElse(DEFAULT_TRUNCATION); } public Tokenization(StreamInput in) throws IOException { this.doLowerCase = in.readBoolean(); this.withSpecialTokens = in.readBoolean(); this.maxSequenceLength = in.readVInt(); + this.truncate = in.readEnum(Truncate.class); } @Override @@ -65,6 +92,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeBoolean(doLowerCase); out.writeBoolean(withSpecialTokens); out.writeVInt(maxSequenceLength); + out.writeEnum(truncate); } abstract XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException; @@ -75,6 +103,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(DO_LOWER_CASE.getPreferredName(), doLowerCase); builder.field(WITH_SPECIAL_TOKENS.getPreferredName(), withSpecialTokens); builder.field(MAX_SEQUENCE_LENGTH.getPreferredName(), maxSequenceLength); + builder.field(TRUNCATE.getPreferredName(), truncate.toString()); builder = doXContentBody(builder, params); builder.endObject(); return builder; @@ -87,12 +116,13 @@ public boolean equals(Object o) { Tokenization that = (Tokenization) o; return doLowerCase == that.doLowerCase && withSpecialTokens == that.withSpecialTokens + && truncate == that.truncate && maxSequenceLength == that.maxSequenceLength; } @Override public int hashCode() { - return Objects.hash(doLowerCase, withSpecialTokens, maxSequenceLength); + return Objects.hash(doLowerCase, truncate, withSpecialTokens, maxSequenceLength); } public boolean doLowerCase() { @@ -106,4 +136,8 @@ public boolean withSpecialTokens() { public int maxSequenceLength() { return maxSequenceLength; } + + public Truncate getTruncate() { + return truncate; + } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/BertTokenizationTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/BertTokenizationTests.java index 9000a6b61e921..7091dadf6637d 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/BertTokenizationTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/BertTokenizationTests.java @@ -48,7 +48,8 @@ public static BertTokenization createRandom() { return new BertTokenization( randomBoolean() ? null : randomBoolean(), randomBoolean() ? null : randomBoolean(), - randomBoolean() ? null : randomIntBetween(1, 1024) + randomBoolean() ? null : randomIntBetween(1, 1024), + randomBoolean() ? null : randomFrom(Tokenization.Truncate.values()) ); } } diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/AutoscalingIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/AutoscalingIT.java index a91dd0a339ad3..5618f94cdee15 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/AutoscalingIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/AutoscalingIT.java @@ -26,6 +26,7 @@ import org.elasticsearch.xpack.core.ml.inference.allocation.AllocationStatus; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.BertTokenization; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.PassThroughConfig; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.Tokenization; import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig; import org.elasticsearch.xpack.core.ml.job.config.AnalysisLimits; import org.elasticsearch.xpack.core.ml.job.config.DataDescription; @@ -276,7 +277,9 @@ private void putAndStartModelDeployment(String modelId, long memoryUse, Allocati new PutTrainedModelAction.Request( TrainedModelConfig.builder() .setModelType(TrainedModelType.PYTORCH) - .setInferenceConfig(new PassThroughConfig(null, new BertTokenization(null, false, null), null)) + .setInferenceConfig( + new PassThroughConfig(null, new BertTokenization(null, false, null, Tokenization.Truncate.NONE), null) + ) .setModelId(modelId) .build(), false diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TestFeatureResetIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TestFeatureResetIT.java index 10dd627c974f4..1c616da018902 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TestFeatureResetIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/TestFeatureResetIT.java @@ -13,7 +13,6 @@ import org.elasticsearch.action.ingest.DeletePipelineRequest; import org.elasticsearch.action.ingest.PutPipelineAction; import org.elasticsearch.action.ingest.PutPipelineRequest; -import org.elasticsearch.action.support.WriteRequest; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.xcontent.XContentType; @@ -31,10 +30,9 @@ import org.elasticsearch.xpack.core.ml.dataframe.analyses.Classification; import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig; import org.elasticsearch.xpack.core.ml.inference.TrainedModelType; -import org.elasticsearch.xpack.core.ml.inference.persistence.InferenceIndexConstants; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.BertTokenization; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.PassThroughConfig; -import org.elasticsearch.xpack.core.ml.inference.trainedmodel.VocabularyConfig; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.Tokenization; import org.elasticsearch.xpack.core.ml.job.config.Job; import org.elasticsearch.xpack.core.ml.job.config.JobState; import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts; @@ -200,7 +198,7 @@ void createModelDeployment() { .setInferenceConfig( new PassThroughConfig( null, - new BertTokenization(null, false, null), + new BertTokenization(null, false, null, Tokenization.Truncate.NONE), null ) ) diff --git a/x-pack/plugin/ml/src/internalClusterTest/java/org/elasticsearch/xpack/ml/integration/TrainedModelCRUDIT.java b/x-pack/plugin/ml/src/internalClusterTest/java/org/elasticsearch/xpack/ml/integration/TrainedModelCRUDIT.java index 329f3619d8588..a28870fc4307a 100644 --- a/x-pack/plugin/ml/src/internalClusterTest/java/org/elasticsearch/xpack/ml/integration/TrainedModelCRUDIT.java +++ b/x-pack/plugin/ml/src/internalClusterTest/java/org/elasticsearch/xpack/ml/integration/TrainedModelCRUDIT.java @@ -16,6 +16,7 @@ import org.elasticsearch.xpack.core.ml.inference.trainedmodel.PassThroughConfig; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.BertTokenization; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.IndexLocation; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.Tokenization; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.VocabularyConfig; import org.elasticsearch.xpack.ml.MlSingleNodeTestCase; import org.junit.Before; @@ -72,7 +73,7 @@ public void testPutTrainedModelAndDefinition() { new VocabularyConfig( InferenceIndexConstants.nativeDefinitionStore() ), - new BertTokenization(null, false, null), + new BertTokenization(null, false, null, Tokenization.Truncate.NONE), null ) ) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/BertTokenizer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/BertTokenizer.java index 52c7c758887a4..56158feb6dd38 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/BertTokenizer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/BertTokenizer.java @@ -52,6 +52,7 @@ public class BertTokenizer implements NlpTokenizer { private final boolean doTokenizeCjKChars; private final boolean doStripAccents; private final boolean withSpecialTokens; + private final Tokenization.Truncate truncate; private final Set neverSplit; private final int maxSequenceLength; private final NlpTask.RequestBuilder requestBuilder; @@ -62,6 +63,7 @@ protected BertTokenizer(List originalVocab, boolean doTokenizeCjKChars, boolean doStripAccents, boolean withSpecialTokens, + Tokenization.Truncate truncate, int maxSequenceLength, Function requestBuilderFactory, Set neverSplit) { @@ -72,6 +74,7 @@ protected BertTokenizer(List originalVocab, this.doTokenizeCjKChars = doTokenizeCjKChars; this.doStripAccents = doStripAccents; this.withSpecialTokens = withSpecialTokens; + this.truncate = truncate; this.neverSplit = Sets.union(neverSplit, NEVER_SPLIT); this.maxSequenceLength = maxSequenceLength; this.requestBuilder = requestBuilderFactory.apply(this); @@ -113,6 +116,21 @@ public TokenizationResult.Tokenization tokenize(String seq) { List wordPieceTokens = innerResult.v1(); List tokenPositionMap = innerResult.v2(); int numTokens = withSpecialTokens ? wordPieceTokens.size() + 2 : wordPieceTokens.size(); + if (numTokens > maxSequenceLength) { + switch (truncate) { + case FIRST: + case SECOND: + wordPieceTokens = wordPieceTokens.subList(0, withSpecialTokens ? maxSequenceLength - 2 : maxSequenceLength); + break; + case NONE: + throw ExceptionsHelper.badRequestException( + "Input too large. The tokenized input length [{}] exceeds the maximum sequence length [{}]", + numTokens, + maxSequenceLength + ); + } + numTokens = maxSequenceLength; + } String[] tokens = new String[numTokens]; int[] tokenIds = new int[numTokens]; int[] tokenMap = new int[numTokens]; @@ -128,7 +146,7 @@ public TokenizationResult.Tokenization tokenize(String seq) { for (WordPieceTokenizer.TokenAndId tokenAndId : wordPieceTokens) { tokens[i] = tokenAndId.getToken(); tokenIds[i] = tokenAndId.getId(); - tokenMap[i] = tokenPositionMap.get(i-decrementHandler); + tokenMap[i] = tokenPositionMap.get(i - decrementHandler); i++; } @@ -138,13 +156,6 @@ public TokenizationResult.Tokenization tokenize(String seq) { tokenMap[i] = SPECIAL_TOKEN_POSITION; } - if (tokenIds.length > maxSequenceLength) { - throw ExceptionsHelper.badRequestException( - "Input too large. The tokenized input length [{}] exceeds the maximum sequence length [{}]", - tokenIds.length, - maxSequenceLength - ); - } return new TokenizationResult.Tokenization(seq, tokens, tokenIds, tokenMap); } @@ -161,6 +172,44 @@ public TokenizationResult.Tokenization tokenize(String seq1, String seq2) { } // [CLS] seq1 [SEP] seq2 [SEP] int numTokens = wordPieceTokenSeq1s.size() + wordPieceTokenSeq2s.size() + 3; + + if (numTokens > maxSequenceLength) { + switch (truncate) { + case FIRST: + if (wordPieceTokenSeq2s.size() > maxSequenceLength - 3) { + throw ExceptionsHelper.badRequestException( + "Attempting truncation [{}] but input is too large for the second sequence. " + + "The tokenized input length [{}] exceeds the maximum sequence length [{}], " + + "when taking special tokens into account", + truncate.toString(), + wordPieceTokenSeq2s.size(), + maxSequenceLength - 3 + ); + } + wordPieceTokenSeq1s = wordPieceTokenSeq1s.subList(0, maxSequenceLength - 3 - wordPieceTokenSeq2s.size()); + break; + case SECOND: + if (wordPieceTokenSeq1s.size() > maxSequenceLength - 3) { + throw ExceptionsHelper.badRequestException( + "Attempting truncation [{}] but input is too large for the first sequence. " + + "The tokenized input length [{}] exceeds the maximum sequence length [{}], " + + "when taking special tokens into account", + truncate.toString(), + wordPieceTokenSeq2s.size(), + maxSequenceLength - 3 + ); + } + wordPieceTokenSeq2s = wordPieceTokenSeq2s.subList(0, maxSequenceLength - 3 - wordPieceTokenSeq1s.size()); + break; + case NONE: + throw ExceptionsHelper.badRequestException( + "Input too large. The tokenized input length [{}] exceeds the maximum sequence length [{}]", + numTokens, + maxSequenceLength + ); + } + numTokens = maxSequenceLength; + } String[] tokens = new String[numTokens]; int[] tokenIds = new int[numTokens]; int[] tokenMap = new int[numTokens]; @@ -247,6 +296,7 @@ public static class Builder { protected boolean doLowerCase = false; protected boolean doTokenizeCjKChars = true; protected boolean withSpecialTokens = true; + protected Tokenization.Truncate truncate = Tokenization.Truncate.FIRST; protected int maxSequenceLength; protected Boolean doStripAccents = null; protected Set neverSplit; @@ -258,6 +308,7 @@ protected Builder(List vocab, Tokenization tokenization) { this.doLowerCase = tokenization.doLowerCase(); this.withSpecialTokens = tokenization.withSpecialTokens(); this.maxSequenceLength = tokenization.maxSequenceLength(); + this.truncate = tokenization.getTruncate(); } private static SortedMap buildSortedVocab(List vocab) { @@ -308,6 +359,11 @@ public Builder setRequestBuilderFactory(Function nthListItemFromMap(String name, int n, Map vocab, String input) { - BertTokenizer tokenizer = BertTokenizer.builder(vocab, new BertTokenization(true, false, null)) + BertTokenizer tokenizer = BertTokenizer.builder(vocab, new BertTokenization(true, false, null, Tokenization.Truncate.NONE)) .setDoLowerCase(true) .setWithSpecialTokens(false) .build(); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/TextClassificationProcessorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/TextClassificationProcessorTests.java index 3338f149092c4..cb5ec0681c198 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/TextClassificationProcessorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/TextClassificationProcessorTests.java @@ -14,6 +14,7 @@ import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.BertTokenization; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextClassificationConfig; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.Tokenization; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.VocabularyConfig; import org.elasticsearch.xpack.ml.inference.deployment.PyTorchResult; import org.elasticsearch.xpack.ml.inference.nlp.tokenizers.BertTokenizer; @@ -67,7 +68,7 @@ public void testBuildRequest() throws IOException { BertTokenizer.CLASS_TOKEN, BertTokenizer.SEPARATOR_TOKEN, BertTokenizer.PAD_TOKEN), randomAlphaOfLength(10) ), - new BertTokenization(null, null, 512)); + new BertTokenization(null, null, 512, Tokenization.Truncate.NONE)); TextClassificationConfig config = new TextClassificationConfig( new VocabularyConfig("test-index"), null, List.of("a", "b"), null, null); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/ZeroShotClassificationProcessorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/ZeroShotClassificationProcessorTests.java index 82c41561a92a6..f523535da5797 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/ZeroShotClassificationProcessorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/ZeroShotClassificationProcessorTests.java @@ -12,6 +12,7 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.BertTokenization; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.NlpConfig; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.Tokenization; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.VocabularyConfig; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ZeroShotClassificationConfig; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ZeroShotClassificationConfigUpdate; @@ -35,7 +36,7 @@ public void testBuildRequest() throws IOException { BertTokenizer.CLASS_TOKEN, BertTokenizer.SEPARATOR_TOKEN, BertTokenizer.PAD_TOKEN), randomAlphaOfLength(10) ), - new BertTokenization(null, true, 512)); + new BertTokenization(null, true, 512, Tokenization.Truncate.NONE)); ZeroShotClassificationConfig config = new ZeroShotClassificationConfig( List.of("entailment", "neutral", "contradiction"), diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/BertTokenizerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/BertTokenizerTests.java index 53b31540be509..ab8c576a919ca 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/BertTokenizerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/BertTokenizerTests.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.ml.inference.nlp.tokenizers; +import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.BertTokenization; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.Tokenization; @@ -16,47 +17,102 @@ import java.util.List; import static org.hamcrest.Matchers.arrayContaining; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; public class BertTokenizerTests extends ESTestCase { + private static final List TEST_CASED_VOCAB = List.of( + "Elastic", + "##search", + "is", + "fun", + "my", + "little", + "red", + "car", + "God", + "##zilla", + ".", + ",", + BertTokenizer.CLASS_TOKEN, + BertTokenizer.SEPARATOR_TOKEN, + BertTokenizer.MASK_TOKEN, + BertTokenizer.UNKNOWN_TOKEN, + "day", + "Pancake", + "with" + ); + public void testTokenize() { BertTokenizer tokenizer = BertTokenizer.builder( - Arrays.asList("Elastic", "##search", "fun"), - new BertTokenization(null, false, null) + TEST_CASED_VOCAB, + new BertTokenization(null, false, null, Tokenization.Truncate.NONE) ).build(); TokenizationResult.Tokenization tokenization = tokenizer.tokenize("Elasticsearch fun"); assertThat(tokenization.getTokens(), arrayContaining("Elastic", "##search", "fun")); - assertArrayEquals(new int[] {0, 1, 2}, tokenization.getTokenIds()); + assertArrayEquals(new int[] { 0, 1, 3 }, tokenization.getTokenIds()); assertArrayEquals(new int[] {0, 0, 1}, tokenization.getTokenMap()); } + public void testTokenizeLargeInputNoTruncation() { + BertTokenizer tokenizer = BertTokenizer.builder(TEST_CASED_VOCAB, new BertTokenization(null, false, 5, Tokenization.Truncate.NONE)) + .build(); + + ElasticsearchStatusException ex = expectThrows( + ElasticsearchStatusException.class, + () -> tokenizer.tokenize("Elasticsearch fun with Pancake and Godzilla") + ); + assertThat(ex.getMessage(), equalTo("Input too large. The tokenized input length [8] exceeds the maximum sequence length [5]")); + + BertTokenizer specialCharTokenizer = BertTokenizer.builder( + TEST_CASED_VOCAB, + new BertTokenization(null, true, 5, Tokenization.Truncate.NONE) + ).build(); + + // Shouldn't throw + tokenizer.tokenize("Elasticsearch fun with Pancake"); + + // Should throw as special chars add two tokens + expectThrows(ElasticsearchStatusException.class, () -> specialCharTokenizer.tokenize("Elasticsearch fun with Pancake")); + } + + public void testTokenizeLargeInputTruncation() { + BertTokenizer tokenizer = BertTokenizer.builder(TEST_CASED_VOCAB, new BertTokenization(null, false, 5, Tokenization.Truncate.FIRST)) + .build(); + + TokenizationResult.Tokenization tokenization = tokenizer.tokenize("Elasticsearch fun with Pancake and Godzilla"); + assertThat(tokenization.getTokens(), arrayContaining("Elastic", "##search", "fun", "with", "Pancake")); + + tokenizer = BertTokenizer.builder(TEST_CASED_VOCAB, new BertTokenization(null, true, 5, Tokenization.Truncate.FIRST)).build(); + tokenization = tokenizer.tokenize("Elasticsearch fun with Pancake and Godzilla"); + assertThat(tokenization.getTokens(), arrayContaining("[CLS]", "Elastic", "##search", "fun", "[SEP]")); + } + public void testTokenizeAppendSpecialTokens() { BertTokenizer tokenizer = BertTokenizer.builder( - Arrays.asList( "elastic", "##search", "fun", BertTokenizer.CLASS_TOKEN, BertTokenizer.SEPARATOR_TOKEN), + TEST_CASED_VOCAB, Tokenization.createDefault() ).build(); - TokenizationResult.Tokenization tokenization = tokenizer.tokenize("elasticsearch fun"); - assertThat(tokenization.getTokens(), arrayContaining("[CLS]", "elastic", "##search", "fun", "[SEP]")); - assertArrayEquals(new int[] {3, 0, 1, 2, 4}, tokenization.getTokenIds()); + TokenizationResult.Tokenization tokenization = tokenizer.tokenize("Elasticsearch fun"); + assertThat(tokenization.getTokens(), arrayContaining("[CLS]", "Elastic", "##search", "fun", "[SEP]")); + assertArrayEquals(new int[] { 12, 0, 1, 3, 13 }, tokenization.getTokenIds()); assertArrayEquals(new int[] {-1, 0, 0, 1, -1}, tokenization.getTokenMap()); } public void testNeverSplitTokens() { final String specialToken = "SP001"; - BertTokenizer tokenizer = BertTokenizer.builder( - Arrays.asList("Elastic", "##search", "fun", specialToken, BertTokenizer.UNKNOWN_TOKEN), - Tokenization.createDefault() - ).setNeverSplit(Collections.singleton(specialToken)) + BertTokenizer tokenizer = BertTokenizer.builder(TEST_CASED_VOCAB, Tokenization.createDefault()) + .setNeverSplit(Collections.singleton(specialToken)) .setWithSpecialTokens(false) .build(); TokenizationResult.Tokenization tokenization = tokenizer.tokenize("Elasticsearch " + specialToken + " fun"); assertThat(tokenization.getTokens(), arrayContaining("Elastic", "##search", specialToken, "fun")); - assertArrayEquals(new int[] {0, 1, 3, 2}, tokenization.getTokenIds()); + assertArrayEquals(new int[] { 0, 1, 15, 3 }, tokenization.getTokenIds()); assertArrayEquals(new int[] {0, 0, 1, 2}, tokenization.getTokenMap()); } @@ -91,29 +147,25 @@ public void testDoLowerCase() { public void testPunctuation() { BertTokenizer tokenizer = BertTokenizer.builder( - Arrays.asList("Elastic", "##search", "fun", ".", ",", BertTokenizer.MASK_TOKEN, BertTokenizer.UNKNOWN_TOKEN), + TEST_CASED_VOCAB, Tokenization.createDefault() ).setWithSpecialTokens(false).build(); TokenizationResult.Tokenization tokenization = tokenizer.tokenize("Elasticsearch, fun."); assertThat(tokenization.getTokens(), arrayContaining("Elastic", "##search", ",", "fun", ".")); - assertArrayEquals(new int[] {0, 1, 4, 2, 3}, tokenization.getTokenIds()); + assertArrayEquals(new int[] { 0, 1, 11, 3, 10 }, tokenization.getTokenIds()); assertArrayEquals(new int[] {0, 0, 1, 2, 3}, tokenization.getTokenMap()); tokenization = tokenizer.tokenize("Elasticsearch, fun [MASK]."); assertThat(tokenization.getTokens(), arrayContaining("Elastic", "##search", ",", "fun", "[MASK]", ".")); - assertArrayEquals(new int[] {0, 1, 4, 2, 5, 3}, tokenization.getTokenIds()); + assertArrayEquals(new int[] { 0, 1, 11, 3, 14, 10 }, tokenization.getTokenIds()); assertArrayEquals(new int[] {0, 0, 1, 2, 3, 4}, tokenization.getTokenMap()); } public void testBatchInput() { BertTokenizer tokenizer = BertTokenizer.builder( - Arrays.asList("Elastic", "##search", "fun", - "Pancake", "day", - "my", "little", "red", "car", - "God", "##zilla" - ), - new BertTokenization(null, false, null) + TEST_CASED_VOCAB, + new BertTokenization(null, false, null, Tokenization.Truncate.NONE) ).build(); TokenizationResult tr = tokenizer.buildTokenizationResult( @@ -133,36 +185,22 @@ public void testBatchInput() { tokenization = tr.getTokenizations().get(1); assertThat(tokenization.getTokens(), arrayContaining("my", "little", "red", "car")); - assertArrayEquals(new int[] {5, 6, 7, 8}, tokenization.getTokenIds()); + assertArrayEquals(new int[] { 4, 5, 6, 7 }, tokenization.getTokenIds()); assertArrayEquals(new int[] {0, 1, 2, 3}, tokenization.getTokenMap()); tokenization = tr.getTokenizations().get(2); assertThat(tokenization.getTokens(), arrayContaining("God", "##zilla", "day")); - assertArrayEquals(new int[] {9, 10, 4}, tokenization.getTokenIds()); + assertArrayEquals(new int[] { 8, 9, 16 }, tokenization.getTokenIds()); assertArrayEquals(new int[] {0, 0, 1}, tokenization.getTokenMap()); tokenization = tr.getTokenizations().get(3); assertThat(tokenization.getTokens(), arrayContaining("God", "##zilla", "Pancake", "red", "car", "day")); - assertArrayEquals(new int[] {9, 10, 3, 7, 8, 4}, tokenization.getTokenIds()); + assertArrayEquals(new int[] { 8, 9, 17, 6, 7, 16 }, tokenization.getTokenIds()); assertArrayEquals(new int[] {0, 0, 1, 2, 3, 4}, tokenization.getTokenMap()); } public void testMultiSeqTokenization() { - List vocab = List.of( - "Elastic", - "##search", - "is", - "fun", - "my", - "little", - "red", - "car", - "God", - "##zilla", - BertTokenizer.CLASS_TOKEN, - BertTokenizer.SEPARATOR_TOKEN - ); - BertTokenizer tokenizer = BertTokenizer.builder(vocab, Tokenization.createDefault()) + BertTokenizer tokenizer = BertTokenizer.builder(TEST_CASED_VOCAB, Tokenization.createDefault()) .setDoLowerCase(false) .setWithSpecialTokens(true) .build(); @@ -185,7 +223,56 @@ public void testMultiSeqTokenization() { BertTokenizer.SEPARATOR_TOKEN ) ); - assertArrayEquals(new int[] { 10, 0, 1, 2, 3, 11, 8, 9, 4, 5, 6, 7, 11 }, tokenization.getTokenIds()); + assertArrayEquals(new int[] { 12, 0, 1, 2, 3, 13, 8, 9, 4, 5, 6, 7, 13 }, tokenization.getTokenIds()); + } + + public void testTokenizeLargeInputMultiSequenceTruncation() { + BertTokenizer tokenizer = BertTokenizer.builder(TEST_CASED_VOCAB, new BertTokenization(null, true, 10, Tokenization.Truncate.FIRST)) + .build(); + + TokenizationResult.Tokenization tokenization = tokenizer.tokenize("Elasticsearch is fun", "Godzilla my little red car"); + assertThat( + tokenization.getTokens(), + arrayContaining( + BertTokenizer.CLASS_TOKEN, + "Elastic", + BertTokenizer.SEPARATOR_TOKEN, + "God", + "##zilla", + "my", + "little", + "red", + "car", + BertTokenizer.SEPARATOR_TOKEN + ) + ); + + expectThrows( + ElasticsearchStatusException.class, + () -> BertTokenizer.builder(TEST_CASED_VOCAB, new BertTokenization(null, true, 8, Tokenization.Truncate.NONE)) + .build() + .tokenize("Elasticsearch is fun", "Godzilla my little red car") + ); + + tokenizer = BertTokenizer.builder(TEST_CASED_VOCAB, new BertTokenization(null, true, 10, Tokenization.Truncate.SECOND)).build(); + + tokenization = tokenizer.tokenize("Elasticsearch is fun", "Godzilla my little red car"); + assertThat( + tokenization.getTokens(), + arrayContaining( + BertTokenizer.CLASS_TOKEN, + "Elastic", + "##search", + "is", + "fun", + BertTokenizer.SEPARATOR_TOKEN, + "God", + "##zilla", + "my", + BertTokenizer.SEPARATOR_TOKEN + ) + ); + } public void testMultiSeqRequiresSpecialTokens() { From 9b92926ed961f73ce95a185629c577dbca43fc14 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Thu, 21 Oct 2021 15:47:49 +0100 Subject: [PATCH 11/21] [ML] Modernize ML node detection (#79623) Where possible we should now detect ML nodes by looking for the ML node role rather than an attribute. Sadly this is not possible in the YAML tests, as the syntax doesn't support asserting that a list contains a particular item without asserting on the entire list contents. So in the YAML case we need to look for the ml.machine_memory node attribute, as this is reliably set on ML nodes but not other types of nodes. After #79622 is merged this closes #79518 --- x-pack/plugin/build.gradle | 1 - .../xpack/ml/MachineLearning.java | 8 +-- ...chineLearningInfoTransportActionTests.java | 6 +- ...ortStartDataFrameAnalyticsActionTests.java | 6 +- .../MlAutoscalingDeciderServiceTests.java | 3 +- .../xpack/ml/job/JobNodeSelectorTests.java | 68 ++++++++++--------- .../rest-api-spec/test/ml/jobs_get_stats.yml | 1 + 7 files changed, 47 insertions(+), 46 deletions(-) diff --git a/x-pack/plugin/build.gradle b/x-pack/plugin/build.gradle index b97206ef18b80..0ec41b33e6330 100644 --- a/x-pack/plugin/build.gradle +++ b/x-pack/plugin/build.gradle @@ -98,7 +98,6 @@ tasks.named("yamlRestTestV7CompatTransform").configure{ task -> task.skipTest("vectors/50_vector_stats/Usage stats on vector fields", "not supported for compatibility") task.skipTest("roles/30_prohibited_role_query/Test use prohibited query inside role query", "put role request with a term lookup (deprecated) and type. Requires validation in REST layer") task.skipTest("ml/jobs_crud/Test create job with delimited format", "removing undocumented functionality") - task.skipTest("ml/jobs_get_stats/Test get job stats after uploading data prompting the creation of some stats", "https://github.com/elastic/elasticsearch/issues/79518") task.skipTest("ml/datafeeds_crud/Test update datafeed to point to missing job", "behaviour change #44752 - not allowing to update datafeed job_id") task.skipTest("ml/datafeeds_crud/Test update datafeed to point to different job", "behaviour change #44752 - not allowing to update datafeed job_id") task.skipTest("ml/datafeeds_crud/Test update datafeed to point to job already attached to another datafeed", "behaviour change #44752 - not allowing to update datafeed job_id") diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index 97e0a154243d4..236aba1cd6966 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -618,12 +618,8 @@ public MachineLearning(Settings settings) { protected XPackLicenseState getLicenseState() { return XPackPlugin.getSharedLicenseState(); } public static boolean isMlNode(DiscoveryNode node) { - Map nodeAttributes = node.getAttributes(); - try { - return Long.parseLong(nodeAttributes.get(MACHINE_MEMORY_NODE_ATTR)) > 0; - } catch (NumberFormatException e) { - return false; - } + logger.info("DMR node roles are " + node.getRoles()); + return node.getRoles().contains(DiscoveryNodeRole.ML_ROLE); } public List> getSettings() { diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java index 0402b32b5a8af..f9e84f962653c 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MachineLearningInfoTransportActionTests.java @@ -543,10 +543,8 @@ private ClusterState givenNodeCount(int nodeCount) { DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(); for (int i = 0; i < nodeCount; i++) { Map attrs = Map.of(MachineLearning.MACHINE_MEMORY_NODE_ATTR, "1000000000"); - Set roles = new HashSet<>(); - roles.add(DiscoveryNodeRole.DATA_ROLE); - roles.add(DiscoveryNodeRole.MASTER_ROLE); - roles.add(DiscoveryNodeRole.INGEST_ROLE); + Set roles = Set.of(DiscoveryNodeRole.DATA_ROLE, DiscoveryNodeRole.MASTER_ROLE, + DiscoveryNodeRole.INGEST_ROLE, DiscoveryNodeRole.ML_ROLE); nodesBuilder.add(new DiscoveryNode("ml-feature-set-given-ml-node-" + i, new TransportAddress(TransportAddress.META_ADDRESS, 9100 + i), attrs, diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsActionTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsActionTests.java index 5a4f3c3d0d365..a1ce3bd76abb0 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsActionTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsActionTests.java @@ -12,6 +12,7 @@ import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.ClusterSettings; @@ -32,8 +33,8 @@ import org.elasticsearch.xpack.ml.process.MlMemoryTracker; import java.net.InetAddress; -import java.util.Collections; import java.util.Map; +import java.util.Set; import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.containsString; @@ -177,7 +178,8 @@ private static DiscoveryNode createNode(int i, boolean isMlNode, Version nodeVer "_node_id" + i, new TransportAddress(InetAddress.getLoopbackAddress(), 9300 + i), isMlNode ? Map.of("ml.machine_memory", String.valueOf(ByteSizeValue.ofGb(1).getBytes())) : Map.of(), - Collections.emptySet(), + isMlNode ? Set.of(DiscoveryNodeRole.MASTER_ROLE, DiscoveryNodeRole.DATA_ROLE, DiscoveryNodeRole.ML_ROLE) + : Set.of(DiscoveryNodeRole.MASTER_ROLE, DiscoveryNodeRole.DATA_ROLE), nodeVersion); } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java index 6a7c7eb892a95..c3b1a3e64f558 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java @@ -46,7 +46,6 @@ import java.util.Arrays; import java.util.Collections; import java.util.Date; -import java.util.HashSet; import java.util.List; import java.util.Optional; import java.util.Set; @@ -705,7 +704,7 @@ private static List withMlNodes(String... nodeName) { .put(MachineLearning.MACHINE_MEMORY_NODE_ATTR, String.valueOf(DEFAULT_NODE_SIZE)) .put(MachineLearning.MAX_JVM_SIZE_NODE_ATTR, String.valueOf(DEFAULT_JVM_SIZE)) .map(), - new HashSet<>(List.of(DiscoveryNodeRole.MASTER_ROLE)), + Set.of(DiscoveryNodeRole.ML_ROLE), Version.CURRENT)) .collect(Collectors.toList()); } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/JobNodeSelectorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/JobNodeSelectorTests.java index 8662a0f42b8e0..cbccca9619e44 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/JobNodeSelectorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/JobNodeSelectorTests.java @@ -11,6 +11,7 @@ import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.common.Randomness; import org.elasticsearch.common.collect.MapBuilder; @@ -42,6 +43,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; @@ -59,6 +61,10 @@ public class JobNodeSelectorTests extends ESTestCase { // To simplify the logic in this class all jobs have the same memory requirement private static final long MAX_JOB_BYTES = ByteSizeValue.ofGb(1).getBytes(); private static final ByteSizeValue JOB_MEMORY_REQUIREMENT = ByteSizeValue.ofMb(10); + private static final Set ROLES_WITH_ML = + Set.of(DiscoveryNodeRole.MASTER_ROLE, DiscoveryNodeRole.ML_ROLE, DiscoveryNodeRole.DATA_ROLE); + private static final Set ROLES_WITHOUT_ML = + Set.of(DiscoveryNodeRole.MASTER_ROLE, DiscoveryNodeRole.DATA_ROLE); private MlMemoryTracker memoryTracker; private boolean isMemoryTrackerRecentlyRefreshed; @@ -77,7 +83,7 @@ public void testNodeNameAndVersion() { TransportAddress ta = new TransportAddress(InetAddress.getLoopbackAddress(), 9300); Map attributes = new HashMap<>(); attributes.put("unrelated", "attribute"); - DiscoveryNode node = new DiscoveryNode("_node_name1", "_node_id1", ta, attributes, Collections.emptySet(), Version.CURRENT); + DiscoveryNode node = new DiscoveryNode("_node_name1", "_node_id1", ta, attributes, ROLES_WITHOUT_ML, Version.CURRENT); assertEquals("{_node_name1}{version=" + node.getVersion() + "}", JobNodeSelector.nodeNameAndVersion(node)); } @@ -85,14 +91,14 @@ public void testNodeNameAndMlAttributes() { TransportAddress ta = new TransportAddress(InetAddress.getLoopbackAddress(), 9300); SortedMap attributes = new TreeMap<>(); attributes.put("unrelated", "attribute"); - DiscoveryNode node = new DiscoveryNode("_node_name1", "_node_id1", ta, attributes, Collections.emptySet(), Version.CURRENT); + DiscoveryNode node = new DiscoveryNode("_node_name1", "_node_id1", ta, attributes, ROLES_WITHOUT_ML, Version.CURRENT); assertEquals("{_node_name1}", JobNodeSelector.nodeNameAndMlAttributes(node)); attributes.put("ml.machine_memory", "5"); - node = new DiscoveryNode("_node_name1", "_node_id1", ta, attributes, Collections.emptySet(), Version.CURRENT); + node = new DiscoveryNode("_node_name1", "_node_id1", ta, attributes, ROLES_WITH_ML, Version.CURRENT); assertEquals("{_node_name1}{ml.machine_memory=5}", JobNodeSelector.nodeNameAndMlAttributes(node)); - node = new DiscoveryNode(null, "_node_id1", ta, attributes, Collections.emptySet(), Version.CURRENT); + node = new DiscoveryNode(null, "_node_id1", ta, attributes, ROLES_WITH_ML, Version.CURRENT); assertEquals("{_node_id1}{ml.machine_memory=5}", JobNodeSelector.nodeNameAndMlAttributes(node)); } @@ -321,9 +327,9 @@ public void testSelectLeastLoadedMlNodeForDataFrameAnalyticsJob_firstJobTooBigMe public void testSelectLeastLoadedMlNode_noMlNodes() { DiscoveryNodes nodes = DiscoveryNodes.builder() .add(new DiscoveryNode("_node_name1", "_node_id1", new TransportAddress(InetAddress.getLoopbackAddress(), 9300), - Collections.emptyMap(), Collections.emptySet(), Version.CURRENT)) + Collections.emptyMap(), ROLES_WITHOUT_ML, Version.CURRENT)) .add(new DiscoveryNode("_node_name2", "_node_id2", new TransportAddress(InetAddress.getLoopbackAddress(), 9301), - Collections.emptyMap(), Collections.emptySet(), Version.CURRENT)) + Collections.emptyMap(), ROLES_WITHOUT_ML, Version.CURRENT)) .build(); PersistentTasksCustomMetadata.Builder tasksBuilder = PersistentTasksCustomMetadata.builder(); @@ -358,11 +364,11 @@ public void testSelectLeastLoadedMlNode_maxConcurrentOpeningJobs() { Map nodeAttr = Map.of(MachineLearning.MACHINE_MEMORY_NODE_ATTR, "1000000000"); DiscoveryNodes nodes = DiscoveryNodes.builder() .add(new DiscoveryNode("_node_name1", "_node_id1", new TransportAddress(InetAddress.getLoopbackAddress(), 9300), - nodeAttr, Collections.emptySet(), Version.CURRENT)) + nodeAttr, ROLES_WITH_ML, Version.CURRENT)) .add(new DiscoveryNode("_node_name2", "_node_id2", new TransportAddress(InetAddress.getLoopbackAddress(), 9301), - nodeAttr, Collections.emptySet(), Version.CURRENT)) + nodeAttr, ROLES_WITH_ML, Version.CURRENT)) .add(new DiscoveryNode("_node_name3", "_node_id3", new TransportAddress(InetAddress.getLoopbackAddress(), 9302), - nodeAttr, Collections.emptySet(), Version.CURRENT)) + nodeAttr, ROLES_WITH_ML, Version.CURRENT)) .build(); PersistentTasksCustomMetadata.Builder tasksBuilder = PersistentTasksCustomMetadata.builder(); @@ -465,11 +471,11 @@ public void testSelectLeastLoadedMlNode_concurrentOpeningJobsAndStaleFailedJob() Map nodeAttr = Map.of(MachineLearning.MACHINE_MEMORY_NODE_ATTR, "1000000000"); DiscoveryNodes nodes = DiscoveryNodes.builder() .add(new DiscoveryNode("_node_name1", "_node_id1", new TransportAddress(InetAddress.getLoopbackAddress(), 9300), - nodeAttr, Collections.emptySet(), Version.CURRENT)) + nodeAttr, ROLES_WITH_ML, Version.CURRENT)) .add(new DiscoveryNode("_node_name2", "_node_id2", new TransportAddress(InetAddress.getLoopbackAddress(), 9301), - nodeAttr, Collections.emptySet(), Version.CURRENT)) + nodeAttr, ROLES_WITH_ML, Version.CURRENT)) .add(new DiscoveryNode("_node_name3", "_node_id3", new TransportAddress(InetAddress.getLoopbackAddress(), 9302), - nodeAttr, Collections.emptySet(), Version.CURRENT)) + nodeAttr, ROLES_WITH_ML, Version.CURRENT)) .build(); PersistentTasksCustomMetadata.Builder tasksBuilder = PersistentTasksCustomMetadata.builder(); @@ -532,9 +538,9 @@ public void testSelectLeastLoadedMlNode_noCompatibleJobTypeNodes() { Map nodeAttr = Map.of(MachineLearning.MACHINE_MEMORY_NODE_ATTR, "1000000000"); DiscoveryNodes nodes = DiscoveryNodes.builder() .add(new DiscoveryNode("_node_name1", "_node_id1", new TransportAddress(InetAddress.getLoopbackAddress(), 9300), - nodeAttr, Collections.emptySet(), Version.CURRENT)) + nodeAttr, ROLES_WITH_ML, Version.CURRENT)) .add(new DiscoveryNode("_node_name2", "_node_id2", new TransportAddress(InetAddress.getLoopbackAddress(), 9301), - nodeAttr, Collections.emptySet(), Version.CURRENT)) + nodeAttr, ROLES_WITH_ML, Version.CURRENT)) .build(); PersistentTasksCustomMetadata.Builder tasksBuilder = PersistentTasksCustomMetadata.builder(); @@ -577,7 +583,7 @@ public void testSelectLeastLoadedMlNode_reasonsAreInDeterministicOrder() { "_node_id1", new TransportAddress(InetAddress.getLoopbackAddress(), 9300), nodeAttr, - Collections.emptySet(), + ROLES_WITH_ML, Version.CURRENT ) ) @@ -587,7 +593,7 @@ public void testSelectLeastLoadedMlNode_reasonsAreInDeterministicOrder() { "_node_id2", new TransportAddress(InetAddress.getLoopbackAddress(), 9301), nodeAttr, - Collections.emptySet(), + ROLES_WITH_ML, Version.CURRENT ) ) @@ -639,9 +645,9 @@ public void testSelectLeastLoadedMlNode_noNodesMatchingModelSnapshotMinVersion() Map nodeAttr = Map.of(MachineLearning.MACHINE_MEMORY_NODE_ATTR, "1000000000"); DiscoveryNodes nodes = DiscoveryNodes.builder() .add(new DiscoveryNode("_node_name1", "_node_id1", new TransportAddress(InetAddress.getLoopbackAddress(), 9300), - nodeAttr, Collections.emptySet(), Version.fromString("6.2.0"))) + nodeAttr, ROLES_WITH_ML, Version.fromString("6.2.0"))) .add(new DiscoveryNode("_node_name2", "_node_id2", new TransportAddress(InetAddress.getLoopbackAddress(), 9301), - nodeAttr, Collections.emptySet(), Version.fromString("6.1.0"))) + nodeAttr, ROLES_WITH_ML, Version.fromString("6.1.0"))) .build(); PersistentTasksCustomMetadata.Builder tasksBuilder = PersistentTasksCustomMetadata.builder(); @@ -677,9 +683,9 @@ public void testSelectLeastLoadedMlNode_jobWithRules() { Map nodeAttr = Map.of(MachineLearning.MACHINE_MEMORY_NODE_ATTR, "1000000000"); DiscoveryNodes nodes = DiscoveryNodes.builder() .add(new DiscoveryNode("_node_name1", "_node_id1", new TransportAddress(InetAddress.getLoopbackAddress(), 9300), - nodeAttr, Collections.emptySet(), Version.fromString("6.2.0"))) + nodeAttr, ROLES_WITH_ML, Version.fromString("6.2.0"))) .add(new DiscoveryNode("_node_name2", "_node_id2", new TransportAddress(InetAddress.getLoopbackAddress(), 9301), - nodeAttr, Collections.emptySet(), Version.fromString("6.4.0"))) + nodeAttr, ROLES_WITH_ML, Version.fromString("6.4.0"))) .build(); PersistentTasksCustomMetadata.Builder tasksBuilder = PersistentTasksCustomMetadata.builder(); @@ -711,9 +717,9 @@ public void testSelectMlNodeOnlyOutOfCandidates() { Map nodeAttr = Map.of(MachineLearning.MACHINE_MEMORY_NODE_ATTR, "1000000000"); DiscoveryNodes nodes = DiscoveryNodes.builder() .add(new DiscoveryNode("_node_name1", "_node_id1", new TransportAddress(InetAddress.getLoopbackAddress(), 9300), - nodeAttr, Collections.emptySet(), Version.CURRENT)) + nodeAttr, ROLES_WITH_ML, Version.CURRENT)) .add(new DiscoveryNode("_node_name2", "_node_id2", new TransportAddress(InetAddress.getLoopbackAddress(), 9301), - nodeAttr, Collections.emptySet(), Version.CURRENT)) + nodeAttr, ROLES_WITH_ML, Version.CURRENT)) .build(); PersistentTasksCustomMetadata.Builder tasksBuilder = PersistentTasksCustomMetadata.builder(); @@ -745,9 +751,9 @@ public void testSelectMlNodeOnlyOutOfCandidates() { public void testConsiderLazyAssignmentWithNoLazyNodes() { DiscoveryNodes nodes = DiscoveryNodes.builder() .add(new DiscoveryNode("_node_name1", "_node_id1", new TransportAddress(InetAddress.getLoopbackAddress(), 9300), - Collections.emptyMap(), Collections.emptySet(), Version.CURRENT)) + Collections.emptyMap(), ROLES_WITHOUT_ML, Version.CURRENT)) .add(new DiscoveryNode("_node_name2", "_node_id2", new TransportAddress(InetAddress.getLoopbackAddress(), 9301), - Collections.emptyMap(), Collections.emptySet(), Version.CURRENT)) + Collections.emptyMap(), ROLES_WITHOUT_ML, Version.CURRENT)) .build(); ClusterState.Builder cs = ClusterState.builder(new ClusterName("_name")); @@ -769,9 +775,9 @@ public void testConsiderLazyAssignmentWithNoLazyNodes() { public void testConsiderLazyAssignmentWithLazyNodes() { DiscoveryNodes nodes = DiscoveryNodes.builder() .add(new DiscoveryNode("_node_name1", "_node_id1", new TransportAddress(InetAddress.getLoopbackAddress(), 9300), - Collections.emptyMap(), Collections.emptySet(), Version.CURRENT)) + Collections.emptyMap(), ROLES_WITHOUT_ML, Version.CURRENT)) .add(new DiscoveryNode("_node_name2", "_node_id2", new TransportAddress(InetAddress.getLoopbackAddress(), 9301), - Collections.emptyMap(), Collections.emptySet(), Version.CURRENT)) + Collections.emptyMap(), ROLES_WITHOUT_ML, Version.CURRENT)) .build(); ClusterState.Builder cs = ClusterState.builder(new ClusterName("_name")); @@ -823,7 +829,7 @@ public void testMaximumPossibleNodeMemoryTooSmall() { public void testPerceivedCapacityAndMaxFreeMemory() { DiscoveryNodes nodes = DiscoveryNodes.builder() .add(new DiscoveryNode("not_ml_node_name", "_node_id", new TransportAddress(InetAddress.getLoopbackAddress(), 9300), - Collections.emptyMap(), Collections.emptySet(), Version.CURRENT)) + Collections.emptyMap(), ROLES_WITHOUT_ML, Version.CURRENT)) .add(new DiscoveryNode( "filled_ml_node_name", "filled_ml_node_id", @@ -832,7 +838,7 @@ public void testPerceivedCapacityAndMaxFreeMemory() { .put(MachineLearning.MAX_JVM_SIZE_NODE_ATTR, "10") .put(MachineLearning.MACHINE_MEMORY_NODE_ATTR, Long.toString(ByteSizeValue.ofGb(30).getBytes())) .map(), - Collections.emptySet(), + ROLES_WITH_ML, Version.CURRENT)) .add(new DiscoveryNode("not_filled_ml_node", "not_filled_ml_node_id", @@ -841,7 +847,7 @@ public void testPerceivedCapacityAndMaxFreeMemory() { .put(MachineLearning.MAX_JVM_SIZE_NODE_ATTR, "10") .put(MachineLearning.MACHINE_MEMORY_NODE_ATTR, Long.toString(ByteSizeValue.ofGb(30).getBytes())) .map(), - Collections.emptySet(), + ROLES_WITH_ML, Version.CURRENT)) .add(new DiscoveryNode("not_filled_smaller_ml_node", "not_filled_smaller_ml_node_id", @@ -850,7 +856,7 @@ public void testPerceivedCapacityAndMaxFreeMemory() { .put(MachineLearning.MAX_JVM_SIZE_NODE_ATTR, "10") .put(MachineLearning.MACHINE_MEMORY_NODE_ATTR, Long.toString(ByteSizeValue.ofGb(10).getBytes())) .map(), - Collections.emptySet(), + ROLES_WITH_ML, Version.CURRENT)) .build(); @@ -894,7 +900,7 @@ private ClusterState.Builder fillNodesWithRunningJobs(Map nodeAt for (int i = 0; i < numNodes; i++) { String nodeId = "_node_id" + i; TransportAddress address = new TransportAddress(InetAddress.getLoopbackAddress(), 9300 + i); - nodes.add(new DiscoveryNode("_node_name" + i, nodeId, address, nodeAttr, Collections.emptySet(), Version.CURRENT)); + nodes.add(new DiscoveryNode("_node_name" + i, nodeId, address, nodeAttr, ROLES_WITH_ML, Version.CURRENT)); for (int j = 0; j < numRunningJobsPerNode; j++) { int id = j + (numRunningJobsPerNode * i); // Both anomaly detector jobs and data frame analytics jobs should count towards the limit diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/jobs_get_stats.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/jobs_get_stats.yml index 8f4414396445f..a5d258378c1df 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/jobs_get_stats.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/jobs_get_stats.yml @@ -103,6 +103,7 @@ setup: - match: { jobs.0.state: opened } - is_true: jobs.0.node.name - is_true: jobs.0.node.transport_address + - is_true: jobs.0.node.attributes.ml\.machine_memory - is_true: jobs.0.open_time - match: { jobs.0.timing_stats.job_id: job-stats-test } - match: { jobs.0.timing_stats.bucket_count: 1 } # Records are 1h apart and bucket span is 1h so 1 bucket is produced From 947e3656cccf34a0ee892fdce05b840c63c06ae6 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Thu, 21 Oct 2021 16:43:32 +0100 Subject: [PATCH 12/21] [ML] Tone down ML unassigned job notifications (#79578) When an ML node is restarted the ML jobs that were running on it often spend a short period unassigned while the work to assign them to a new node is performed. We used to generate warning notifications for such jobs while they were unassigned, which caused unnecessary worry. The warning notifications cause a yellow warning triangle in the UI jobs list. This PR changes the ML job assignment notifications so that instead of a warning notification for every single unassigned reason that a job cycles through before it is assigned there will just be info messages for assignment and unassignment. Once per day we will still audit unassigned jobs and generate the same warning messages as before. These messages should be sufficient to report jobs that cannot be assigned for long periods due to lack of capacity or some other non-transient problem. Fixes #79270 --- .../integration/RunDataFrameAnalyticsIT.java | 2 +- .../xpack/ml/MachineLearning.java | 2 +- .../xpack/ml/MlAssignmentNotifier.java | 89 +++++++++------- .../xpack/ml/MlAssignmentNotifierTests.java | 100 ++++++++++-------- 4 files changed, 111 insertions(+), 82 deletions(-) diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RunDataFrameAnalyticsIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RunDataFrameAnalyticsIT.java index df3d297bfc7cd..89865e7deb0b6 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RunDataFrameAnalyticsIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/RunDataFrameAnalyticsIT.java @@ -593,7 +593,7 @@ public void testLazyAssignmentWithModelMemoryLimitTooHighForAssignment() throws assertThatAuditMessagesMatch(id, "Created analytics with type [outlier_detection]", "Estimated memory usage [", - "No node found to start analytics. Reasons [persistent task is awaiting node assignment.]", + "Job requires at least [1tb] free memory on a machine learning capable node to run", "Started analytics", "Stopped analytics"); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index 236aba1cd6966..a26f65bfeac94 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -887,7 +887,7 @@ public Collection createComponents(Client client, ClusterService cluster clusterService, datafeedRunner, mlController, autodetectProcessManager, dataFrameAnalyticsManager, memoryTracker); this.mlLifeCycleService.set(mlLifeCycleService); MlAssignmentNotifier mlAssignmentNotifier = new MlAssignmentNotifier(anomalyDetectionAuditor, dataFrameAnalyticsAuditor, threadPool, - new MlConfigMigrator(settings, client, clusterService, indexNameExpressionResolver), clusterService); + clusterService); MlAutoUpdateService mlAutoUpdateService = new MlAutoUpdateService(threadPool, List.of(new DatafeedConfigAutoUpdater(datafeedConfigProvider, indexNameExpressionResolver))); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlAssignmentNotifier.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlAssignmentNotifier.java index c37379fcd6b5b..28e1aa7794aeb 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlAssignmentNotifier.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlAssignmentNotifier.java @@ -8,12 +8,12 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.elasticsearch.action.ActionListener; import org.elasticsearch.cluster.ClusterChangedEvent; import org.elasticsearch.cluster.ClusterStateListener; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.Strings; import org.elasticsearch.persistent.PersistentTasksCustomMetadata; import org.elasticsearch.persistent.PersistentTasksCustomMetadata.Assignment; import org.elasticsearch.persistent.PersistentTasksCustomMetadata.PersistentTask; @@ -33,14 +33,12 @@ public class MlAssignmentNotifier implements ClusterStateListener { private final AnomalyDetectionAuditor anomalyDetectionAuditor; private final DataFrameAnalyticsAuditor dataFrameAnalyticsAuditor; - private final MlConfigMigrator mlConfigMigrator; private final ThreadPool threadPool; MlAssignmentNotifier(AnomalyDetectionAuditor anomalyDetectionAuditor, DataFrameAnalyticsAuditor dataFrameAnalyticsAuditor, - ThreadPool threadPool, MlConfigMigrator mlConfigMigrator, ClusterService clusterService) { + ThreadPool threadPool, ClusterService clusterService) { this.anomalyDetectionAuditor = anomalyDetectionAuditor; this.dataFrameAnalyticsAuditor = dataFrameAnalyticsAuditor; - this.mlConfigMigrator = mlConfigMigrator; this.threadPool = threadPool; clusterService.addListener(this); } @@ -56,21 +54,15 @@ public void clusterChanged(ClusterChangedEvent event) { return; } - mlConfigMigrator.migrateConfigs(event.state(), ActionListener.wrap( - response -> threadPool.executor(executorName()).execute(() -> auditChangesToMlTasks(event)), - e -> { - logger.error("error migrating ml configurations", e); - threadPool.executor(executorName()).execute(() -> auditChangesToMlTasks(event)); - } - )); - } - - private void auditChangesToMlTasks(ClusterChangedEvent event) { - if (event.metadataChanged() == false) { return; } + threadPool.executor(executorName()).execute(() -> auditChangesToMlTasks(event)); + } + + private void auditChangesToMlTasks(ClusterChangedEvent event) { + PersistentTasksCustomMetadata previousTasks = event.previousState().getMetadata().custom(PersistentTasksCustomMetadata.TYPE); PersistentTasksCustomMetadata currentTasks = event.state().getMetadata().custom(PersistentTasksCustomMetadata.TYPE); @@ -78,7 +70,7 @@ private void auditChangesToMlTasks(ClusterChangedEvent event) { return; } - auditMlTasks(event.state().nodes(), previousTasks, currentTasks, false); + auditMlTasks(event.previousState().nodes(), event.state().nodes(), previousTasks, currentTasks, false); } /** @@ -87,10 +79,11 @@ private void auditChangesToMlTasks(ClusterChangedEvent event) { * Care must be taken not to call this method frequently. */ public void auditUnassignedMlTasks(DiscoveryNodes nodes, PersistentTasksCustomMetadata tasks) { - auditMlTasks(nodes, tasks, tasks, true); + auditMlTasks(nodes, nodes, tasks, tasks, true); } - private void auditMlTasks(DiscoveryNodes nodes, PersistentTasksCustomMetadata previousTasks, PersistentTasksCustomMetadata currentTasks, + private void auditMlTasks(DiscoveryNodes previousNodes, DiscoveryNodes currentNodes, + PersistentTasksCustomMetadata previousTasks, PersistentTasksCustomMetadata currentTasks, boolean alwaysAuditUnassigned) { for (PersistentTask currentTask : currentTasks.tasks()) { @@ -103,45 +96,67 @@ private void auditMlTasks(DiscoveryNodes nodes, PersistentTasksCustomMetadata pr (isTaskAssigned || alwaysAuditUnassigned == false)) { continue; } + boolean wasTaskAssigned = (previousAssignment != null) && (previousAssignment.getExecutorNode() != null); if (MlTasks.JOB_TASK_NAME.equals(currentTask.getTaskName())) { String jobId = ((OpenJobAction.JobParams) currentTask.getParams()).getJobId(); if (isTaskAssigned) { - DiscoveryNode node = nodes.get(currentAssignment.getExecutorNode()); - anomalyDetectionAuditor.info(jobId, "Opening job on node [" + node.toString() + "]"); - } else { + String nodeName = nodeName(currentNodes, currentAssignment.getExecutorNode()); + anomalyDetectionAuditor.info(jobId, "Opening job on node [" + nodeName + "]"); + } else if (alwaysAuditUnassigned) { anomalyDetectionAuditor.warning(jobId, "No node found to open job. Reasons [" + currentAssignment.getExplanation() + "]"); + } else if (wasTaskAssigned) { + String nodeName = nodeName(previousNodes, previousAssignment.getExecutorNode()); + anomalyDetectionAuditor.info(jobId, "Job unassigned from node [" + nodeName + "]"); } } else if (MlTasks.DATAFEED_TASK_NAME.equals(currentTask.getTaskName())) { StartDatafeedAction.DatafeedParams datafeedParams = (StartDatafeedAction.DatafeedParams) currentTask.getParams(); String jobId = datafeedParams.getJobId(); - if (isTaskAssigned) { - DiscoveryNode node = nodes.get(currentAssignment.getExecutorNode()); - if (jobId != null) { + if (jobId != null) { + if (isTaskAssigned) { + String nodeName = nodeName(currentNodes, currentAssignment.getExecutorNode()); anomalyDetectionAuditor.info(jobId, - "Starting datafeed [" + datafeedParams.getDatafeedId() + "] on node [" + node + "]"); - } - } else { - String msg = "No node found to start datafeed [" + datafeedParams.getDatafeedId() +"]. Reasons [" + - currentAssignment.getExplanation() + "]"; - if (alwaysAuditUnassigned == false) { - logger.warn("[{}] {}", jobId, msg); - } - if (jobId != null) { - anomalyDetectionAuditor.warning(jobId, msg); + "Starting datafeed [" + datafeedParams.getDatafeedId() + "] on node [" + nodeName + "]"); + } else if (alwaysAuditUnassigned) { + anomalyDetectionAuditor.warning(jobId, + "No node found to start datafeed [" + datafeedParams.getDatafeedId() + "]. Reasons [" + + currentAssignment.getExplanation() + "]"); + } else if (wasTaskAssigned) { + String nodeName = nodeName(previousNodes, previousAssignment.getExecutorNode()); + anomalyDetectionAuditor.info(jobId, + "Datafeed [" + datafeedParams.getDatafeedId() + "] unassigned from node [" + nodeName + "]"); + } else { + logger.warn("[{}] No node found to start datafeed [{}]. Reasons [{}]", + jobId, datafeedParams.getDatafeedId(), currentAssignment.getExplanation()); } } } else if (MlTasks.DATA_FRAME_ANALYTICS_TASK_NAME.equals(currentTask.getTaskName())) { String id = ((StartDataFrameAnalyticsAction.TaskParams) currentTask.getParams()).getId(); if (isTaskAssigned) { - DiscoveryNode node = nodes.get(currentAssignment.getExecutorNode()); - dataFrameAnalyticsAuditor.info(id, "Starting analytics on node [" + node.toString() + "]"); - } else { + String nodeName = nodeName(currentNodes, currentAssignment.getExecutorNode()); + dataFrameAnalyticsAuditor.info(id, "Starting analytics on node [" + nodeName + "]"); + } else if (alwaysAuditUnassigned) { dataFrameAnalyticsAuditor.warning(id, "No node found to start analytics. Reasons [" + currentAssignment.getExplanation() + "]"); + } else if (wasTaskAssigned) { + String nodeName = nodeName(previousNodes, previousAssignment.getExecutorNode()); + anomalyDetectionAuditor.info(id, "Analytics unassigned from node [" + nodeName + "]"); } } } } + + static String nodeName(DiscoveryNodes nodes, String nodeId) { + // It's possible that we're reporting on a node that left the + // cluster in an earlier cluster state update, in which case + // the cluster state we've got doesn't record its friendly + // name. In this case we have no choice but to use the ID. (We + // also use the ID in tests that don't bother to name nodes.) + DiscoveryNode node = nodes.get(nodeId); + if (node != null && Strings.hasLength(node.getName())) { + return node.getName(); + } + return nodeId; + } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MlAssignmentNotifierTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MlAssignmentNotifierTests.java index faabd0b67a94b..5214113cddce8 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MlAssignmentNotifierTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MlAssignmentNotifierTests.java @@ -7,7 +7,6 @@ package org.elasticsearch.xpack.ml; import org.elasticsearch.Version; -import org.elasticsearch.action.ActionListener; import org.elasticsearch.cluster.ClusterChangedEvent; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; @@ -30,8 +29,6 @@ import static org.elasticsearch.xpack.ml.job.task.OpenJobPersistentTasksExecutorTests.addJobTask; import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyString; -import static org.mockito.Matchers.eq; -import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -44,16 +41,13 @@ public class MlAssignmentNotifierTests extends ESTestCase { private DataFrameAnalyticsAuditor dataFrameAnalyticsAuditor; private ClusterService clusterService; private ThreadPool threadPool; - private MlConfigMigrator configMigrator; @Before - @SuppressWarnings("unchecked") - private void setupMocks() { + public void setupMocks() { anomalyDetectionAuditor = mock(AnomalyDetectionAuditor.class); dataFrameAnalyticsAuditor = mock(DataFrameAnalyticsAuditor.class); clusterService = mock(ClusterService.class); threadPool = mock(ThreadPool.class); - configMigrator = mock(MlConfigMigrator.class); threadPool = mock(ThreadPool.class); ExecutorService executorService = mock(ExecutorService.class); @@ -62,17 +56,11 @@ private void setupMocks() { return null; }).when(executorService).execute(any(Runnable.class)); when(threadPool.executor(anyString())).thenReturn(executorService); - - doAnswer(invocation -> { - ActionListener listener = (ActionListener) invocation.getArguments()[1]; - listener.onResponse(Boolean.TRUE); - return null; - }).when(configMigrator).migrateConfigs(any(ClusterState.class), any(ActionListener.class)); } - public void testClusterChanged_info() { + public void testClusterChanged_assign() { MlAssignmentNotifier notifier = new MlAssignmentNotifier(anomalyDetectionAuditor, dataFrameAnalyticsAuditor, threadPool, - configMigrator, clusterService); + clusterService); ClusterState previous = ClusterState.builder(new ClusterName("_name")) .metadata(Metadata.builder().putCustom(PersistentTasksCustomMetadata.TYPE, @@ -91,49 +79,56 @@ public void testClusterChanged_info() { .masterNodeId("_node_id")) .build(); notifier.clusterChanged(new ClusterChangedEvent("_test", newState, previous)); - verify(anomalyDetectionAuditor, times(1)).info(eq("job_id"), any()); - verify(configMigrator, times(1)).migrateConfigs(eq(newState), any()); + verify(anomalyDetectionAuditor, times(1)).info("job_id", "Opening job on node [_node_id]"); // no longer master newState = ClusterState.builder(new ClusterName("_name")) .metadata(metadata) .nodes(DiscoveryNodes.builder() - .add(new DiscoveryNode("_node_id", new TransportAddress(InetAddress.getLoopbackAddress(), 9300), Version.CURRENT))) + .add(new DiscoveryNode("_node_id", new TransportAddress(InetAddress.getLoopbackAddress(), 9300), Version.CURRENT)) + .localNodeId("_node_id")) .build(); notifier.clusterChanged(new ClusterChangedEvent("_test", newState, previous)); verifyNoMoreInteractions(anomalyDetectionAuditor); } - public void testClusterChanged_warning() { + public void testClusterChanged_unassign() { MlAssignmentNotifier notifier = new MlAssignmentNotifier(anomalyDetectionAuditor, dataFrameAnalyticsAuditor, threadPool, - configMigrator, clusterService); + clusterService); + PersistentTasksCustomMetadata.Builder tasksBuilder = PersistentTasksCustomMetadata.builder(); + addJobTask("job_id", "_node_id", null, tasksBuilder); + Metadata metadata = Metadata.builder().putCustom(PersistentTasksCustomMetadata.TYPE, tasksBuilder.build()).build(); ClusterState previous = ClusterState.builder(new ClusterName("_name")) - .metadata(Metadata.builder().putCustom(PersistentTasksCustomMetadata.TYPE, - new PersistentTasksCustomMetadata(0L, Collections.emptyMap()))) - .build(); - - PersistentTasksCustomMetadata.Builder tasksBuilder = PersistentTasksCustomMetadata.builder(); + .metadata(metadata) + // set local node master + .nodes(DiscoveryNodes.builder() + .add(new DiscoveryNode("_node_id", new TransportAddress(InetAddress.getLoopbackAddress(), 9200), Version.CURRENT)) + .localNodeId("_node_id") + .masterNodeId("_node_id")) + .build(); + + tasksBuilder = PersistentTasksCustomMetadata.builder(); addJobTask("job_id", null, null, tasksBuilder); - Metadata metadata = Metadata.builder().putCustom(PersistentTasksCustomMetadata.TYPE, tasksBuilder.build()).build(); + metadata = Metadata.builder().putCustom(PersistentTasksCustomMetadata.TYPE, tasksBuilder.build()).build(); ClusterState newState = ClusterState.builder(new ClusterName("_name")) - .metadata(metadata) - // set local node master - .nodes(DiscoveryNodes.builder() - .add(new DiscoveryNode("_node_id", new TransportAddress(InetAddress.getLoopbackAddress(), 9200), Version.CURRENT)) - .localNodeId("_node_id") - .masterNodeId("_node_id")) - .build(); + .metadata(metadata) + // set local node master + .nodes(DiscoveryNodes.builder() + .add(new DiscoveryNode("_node_id", new TransportAddress(InetAddress.getLoopbackAddress(), 9200), Version.CURRENT)) + .localNodeId("_node_id") + .masterNodeId("_node_id")) + .build(); notifier.clusterChanged(new ClusterChangedEvent("_test", newState, previous)); - verify(anomalyDetectionAuditor, times(1)).warning(eq("job_id"), any()); - verify(configMigrator, times(1)).migrateConfigs(eq(newState), any()); + verify(anomalyDetectionAuditor, times(1)).info("job_id", "Job unassigned from node [_node_id]"); // no longer master newState = ClusterState.builder(new ClusterName("_name")) - .metadata(metadata) - .nodes(DiscoveryNodes.builder() - .add(new DiscoveryNode("_node_id", new TransportAddress(InetAddress.getLoopbackAddress(), 9200), Version.CURRENT))) - .build(); + .metadata(metadata) + .nodes(DiscoveryNodes.builder() + .add(new DiscoveryNode("_node_id", new TransportAddress(InetAddress.getLoopbackAddress(), 9200), Version.CURRENT)) + .localNodeId("_node_id")) + .build(); notifier.clusterChanged(new ClusterChangedEvent("_test", newState, previous)); verifyNoMoreInteractions(anomalyDetectionAuditor); @@ -141,7 +136,7 @@ public void testClusterChanged_warning() { public void testClusterChanged_noPersistentTaskChanges() { MlAssignmentNotifier notifier = new MlAssignmentNotifier(anomalyDetectionAuditor, dataFrameAnalyticsAuditor, threadPool, - configMigrator, clusterService); + clusterService); PersistentTasksCustomMetadata.Builder tasksBuilder = PersistentTasksCustomMetadata.builder(); addJobTask("job_id", null, null, tasksBuilder); @@ -160,16 +155,35 @@ public void testClusterChanged_noPersistentTaskChanges() { .build(); notifier.clusterChanged(new ClusterChangedEvent("_test", newState, previous)); - verify(configMigrator, times(1)).migrateConfigs(any(), any()); verifyNoMoreInteractions(anomalyDetectionAuditor); // no longer master newState = ClusterState.builder(new ClusterName("_name")) .metadata(metadata) .nodes(DiscoveryNodes.builder() - .add(new DiscoveryNode("_node_id", new TransportAddress(InetAddress.getLoopbackAddress(), 9200), Version.CURRENT))) + .add(new DiscoveryNode("_node_id", new TransportAddress(InetAddress.getLoopbackAddress(), 9200), Version.CURRENT)) + .localNodeId("_node_id")) .build(); notifier.clusterChanged(new ClusterChangedEvent("_test", newState, previous)); - verifyNoMoreInteractions(configMigrator); + } + + public void testAuditUnassignedMlTasks() { + MlAssignmentNotifier notifier = new MlAssignmentNotifier(anomalyDetectionAuditor, dataFrameAnalyticsAuditor, threadPool, + clusterService); + + PersistentTasksCustomMetadata.Builder tasksBuilder = PersistentTasksCustomMetadata.builder(); + addJobTask("job_id", null, null, tasksBuilder); + Metadata metadata = Metadata.builder().putCustom(PersistentTasksCustomMetadata.TYPE, tasksBuilder.build()).build(); + ClusterState newState = ClusterState.builder(new ClusterName("_name")) + .metadata(metadata) + // set local node master + .nodes(DiscoveryNodes.builder() + .add(new DiscoveryNode("_node_id", new TransportAddress(InetAddress.getLoopbackAddress(), 9200), Version.CURRENT)) + .localNodeId("_node_id") + .masterNodeId("_node_id")) + .build(); + notifier.auditUnassignedMlTasks(newState.nodes(), newState.metadata().custom(PersistentTasksCustomMetadata.TYPE)); + verify(anomalyDetectionAuditor, times(1)) + .warning("job_id", "No node found to open job. Reasons [test assignment]"); } } From f245c477d18cb54c80fe86edb82459b61487baa1 Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Thu, 21 Oct 2021 12:14:27 -0400 Subject: [PATCH 13/21] [ML] fail on poor configuration for categorize_text (#79586) This commit fixes a handful of bugs with categorize_text agg - The agg now fails on fields that are not text fields - Limits the number of tokens categorized - Validates the configuration inputs to disallow settings above static maximums --- .../categorize-text-aggregation.asciidoc | 6 +- x-pack/plugin/build.gradle | 1 + .../ml/qa/ml-with-security/build.gradle | 1 + .../CategorizeTextAggregationBuilder.java | 8 +-- .../CategorizeTextAggregator.java | 11 ++- .../CategorizeTextAggregatorFactory.java | 14 ++++ ...CategorizeTextAggregationBuilderTests.java | 7 +- .../test/ml/categorization_agg.yml | 71 ++++++++++++++++--- 8 files changed, 100 insertions(+), 19 deletions(-) diff --git a/docs/reference/aggregations/bucket/categorize-text-aggregation.asciidoc b/docs/reference/aggregations/bucket/categorize-text-aggregation.asciidoc index cc0a0e787f844..c6e191fb23539 100644 --- a/docs/reference/aggregations/bucket/categorize-text-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/categorize-text-aggregation.asciidoc @@ -8,7 +8,8 @@ experimental::[] A multi-bucket aggregation that groups semi-structured text into buckets. Each `text` field is re-analyzed using a custom analyzer. The resulting tokens are then categorized creating buckets of similarly formatted -text values. This aggregation works best with machine generated text like system logs. +text values. This aggregation works best with machine generated text like system logs. Only the first 100 analyzed +tokens are used to categorize the text. NOTE: If you have considerable memory allocated to your JVM but are receiving circuit breaker exceptions from this aggregation, you may be attempting to categorize text that is poorly formatted for categorization. Consider @@ -27,11 +28,13 @@ The semi-structured text field to categorize. The maximum number of unique tokens at any position up to `max_matched_tokens`. Must be larger than 1. Smaller values use less memory and create fewer categories. Larger values will use more memory and create narrower categories. +Max allowed value is `100`. `max_matched_tokens`:: (Optional, integer, default: `5`) The maximum number of token positions to match on before attempting to merge categories. Larger values will use more memory and create narrower categories. +Max allowed value is `100`. Example: `max_matched_tokens` of 2 would disallow merging of the categories @@ -90,7 +93,6 @@ include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=tokenizer] (array of strings or objects) include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=filter] ===== -end::categorization-analyzer[] `shard_size`:: (Optional, integer) diff --git a/x-pack/plugin/build.gradle b/x-pack/plugin/build.gradle index 0ec41b33e6330..9045f490eaf70 100644 --- a/x-pack/plugin/build.gradle +++ b/x-pack/plugin/build.gradle @@ -108,6 +108,7 @@ tasks.named("yamlRestTestV7CompatTransform").configure{ task -> task.skipTest("rollup/put_job/Test basic put_job", "rollup was an experimental feature, also see #41227") task.skipTest("rollup/start_job/Test start job twice", "rollup was an experimental feature, also see #41227") task.skipTest("ml/trained_model_cat_apis/Test cat trained models", "A type field was added to cat.ml_trained_models #73660, this is a backwards compatible change. Still this is a cat api, and we don't support them with rest api compatibility. (the test would be very hard to transform too)") + task.skipTest("ml/categorization_agg/Test categorization aggregation with poor settings", "https://github.com/elastic/elasticsearch/pull/79586") task.replaceKeyInDo("license.delete", "xpack-license.delete") task.replaceKeyInDo("license.get", "xpack-license.get") diff --git a/x-pack/plugin/ml/qa/ml-with-security/build.gradle b/x-pack/plugin/ml/qa/ml-with-security/build.gradle index 448fc8b1fc39b..2a2b34b804b6e 100644 --- a/x-pack/plugin/ml/qa/ml-with-security/build.gradle +++ b/x-pack/plugin/ml/qa/ml-with-security/build.gradle @@ -37,6 +37,7 @@ tasks.named("yamlRestTest").configure { 'ml/calendar_crud/Test delete job from non existing calendar', // These are searching tests with aggregations, and do not call any ML endpoints 'ml/categorization_agg/Test categorization agg simple', + 'ml/categorization_agg/Test categorization aggregation against unsupported field', 'ml/categorization_agg/Test categorization aggregation with poor settings', 'ml/custom_all_field/Test querying custom all field', 'ml/datafeeds_crud/Test delete datafeed with missing id', diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregationBuilder.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregationBuilder.java index d2987ffd33356..66938aad1a819 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregationBuilder.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregationBuilder.java @@ -123,9 +123,9 @@ public int getMaxUniqueTokens() { public CategorizeTextAggregationBuilder setMaxUniqueTokens(int maxUniqueTokens) { this.maxUniqueTokens = maxUniqueTokens; - if (maxUniqueTokens <= 0) { + if (maxUniqueTokens <= 0 || maxUniqueTokens > MAX_MAX_UNIQUE_TOKENS) { throw ExceptionsHelper.badRequestException( - "[{}] must be greater than 0 and less than [{}]. Found [{}] in [{}]", + "[{}] must be greater than 0 and less than or equal [{}]. Found [{}] in [{}]", MAX_UNIQUE_TOKENS.getPreferredName(), MAX_MAX_UNIQUE_TOKENS, maxUniqueTokens, @@ -191,9 +191,9 @@ public int getMaxMatchedTokens() { public CategorizeTextAggregationBuilder setMaxMatchedTokens(int maxMatchedTokens) { this.maxMatchedTokens = maxMatchedTokens; - if (maxMatchedTokens <= 0) { + if (maxMatchedTokens <= 0 || maxMatchedTokens > MAX_MAX_MATCHED_TOKENS) { throw ExceptionsHelper.badRequestException( - "[{}] must be greater than 0 and less than [{}]. Found [{}] in [{}]", + "[{}] must be greater than 0 and less than or equal [{}]. Found [{}] in [{}]", MAX_MATCHED_TOKENS.getPreferredName(), MAX_MAX_MATCHED_TOKENS, maxMatchedTokens, diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java index d413fb055dbbf..f491be16c71fb 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java @@ -40,6 +40,8 @@ import java.util.Map; import java.util.Optional; +import static org.elasticsearch.xpack.ml.aggs.categorization.CategorizeTextAggregationBuilder.MAX_MAX_MATCHED_TOKENS; + public class CategorizeTextAggregator extends DeferableBucketAggregator { private final TermsAggregator.BucketCountThresholds bucketCountThresholds; @@ -206,8 +208,13 @@ private void processTokenStream( try { CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); - while (ts.incrementToken()) { - tokens.add(bytesRefHash.put(new BytesRef(termAtt))); + int numTokens = 0; + // Only categorize the first MAX_MAX_MATCHED_TOKENS tokens + while (ts.incrementToken() && numTokens < MAX_MAX_MATCHED_TOKENS) { + if (termAtt.length() > 0) { + tokens.add(bytesRefHash.put(new BytesRef(termAtt))); + numTokens++; + } } if (tokens.isEmpty()) { return; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregatorFactory.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregatorFactory.java index f63b4ba1f802b..24d22d22fd78b 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregatorFactory.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregatorFactory.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.ml.aggs.categorization; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.TextSearchInfo; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.AggregatorFactory; @@ -83,6 +84,19 @@ protected Aggregator createInternal(Aggregator parent, CardinalityUpperBound car if (fieldType == null) { return createUnmapped(parent, metadata); } + // TODO add support for Keyword && KeywordScriptFieldType + if (fieldType.getTextSearchInfo() == TextSearchInfo.NONE + || fieldType.getTextSearchInfo() == TextSearchInfo.SIMPLE_MATCH_WITHOUT_TERMS) { + throw new IllegalArgumentException( + "categorize_text agg [" + + name + + "] only works on analyzable text fields. Cannot aggregate field type [" + + fieldType.name() + + "] via [" + + fieldType.getClass().getSimpleName() + + "]" + ); + } TermsAggregator.BucketCountThresholds bucketCountThresholds = new TermsAggregator.BucketCountThresholds(this.bucketCountThresholds); if (bucketCountThresholds.getShardSize() == CategorizeTextAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS.getShardSize()) { // The user has not made a shardSize selection. Use default diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregationBuilderTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregationBuilderTests.java index 7b907ea3ecd29..2bcc010b26694 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregationBuilderTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregationBuilderTests.java @@ -17,6 +17,9 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.elasticsearch.xpack.ml.aggs.categorization.CategorizeTextAggregationBuilder.MAX_MAX_MATCHED_TOKENS; +import static org.elasticsearch.xpack.ml.aggs.categorization.CategorizeTextAggregationBuilder.MAX_MAX_UNIQUE_TOKENS; + public class CategorizeTextAggregationBuilderTests extends BaseAggregationTestCase { @Override @@ -35,10 +38,10 @@ protected CategorizeTextAggregationBuilder createTestAggregatorBuilder() { builder.setCategorizationAnalyzerConfig(CategorizationAnalyzerConfigTests.createRandomized().build()); } if (randomBoolean()) { - builder.setMaxUniqueTokens(randomIntBetween(1, 500)); + builder.setMaxUniqueTokens(randomIntBetween(1, MAX_MAX_UNIQUE_TOKENS)); } if (randomBoolean()) { - builder.setMaxMatchedTokens(randomIntBetween(1, 10)); + builder.setMaxMatchedTokens(randomIntBetween(1, MAX_MAX_MATCHED_TOKENS)); } if (randomBoolean()) { builder.setSimilarityThreshold(randomIntBetween(1, 100)); diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/categorization_agg.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/categorization_agg.yml index c2d5e0dbf09f1..f3e6dc14867dc 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/categorization_agg.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/categorization_agg.yml @@ -13,6 +13,8 @@ setup: type: keyword text: type: text + value: + type: long - do: headers: @@ -23,19 +25,19 @@ setup: refresh: true body: | {"index": {}} - {"product": "server","text": "Node 2 stopping"} + {"product": "server","text": "Node 2 stopping", "value": 1} {"index": {}} - {"product": "server", "text": "Node 2 starting"} + {"product": "server", "text": "Node 2 starting", "value": 1} {"index": {}} - {"product": "server", "text": "Node 4 stopping"} + {"product": "server", "text": "Node 4 stopping", "value": 1} {"index": {}} - {"product": "server", "text": "Node 5 stopping"} + {"product": "server", "text": "Node 5 stopping", "value": 1} {"index": {}} - {"product": "user", "text": "User Foo logging on"} + {"product": "user", "text": "User Foo logging on", "value": 1} {"index": {}} - {"product": "user", "text": "User Foo logging on"} + {"product": "user", "text": "User Foo logging on", "value": 1} {"index": {}} - {"product": "user", "text": "User Foo logging off"} + {"product": "user", "text": "User Foo logging off", "value": 1} --- "Test categorization agg simple": @@ -83,10 +85,28 @@ setup: - match: { aggregations.categories.buckets.1.doc_count: 3 } - match: { aggregations.categories.buckets.1.key: "User Foo logging *" } --- +"Test categorization aggregation against unsupported field": + - do: + catch: /categorize_text agg \[categories\] only works on analyzable text fields/ + search: + index: to_categorize + body: > + { + "size": 0, + "aggs": { + "categories": { + "categorize_text": { + "field": "value" + } + } + } + } + +--- "Test categorization aggregation with poor settings": - do: - catch: /\[max_unique_tokens\] must be greater than 0 and less than \[100\]/ + catch: /\[max_unique_tokens\] must be greater than 0 and less than or equal \[100\]/ search: index: to_categorize body: > @@ -102,7 +122,23 @@ setup: } } - do: - catch: /\[max_matched_tokens\] must be greater than 0 and less than \[100\]/ + catch: /\[max_unique_tokens\] must be greater than 0 and less than or equal \[100\]/ + search: + index: to_categorize + body: > + { + "size": 0, + "aggs": { + "categories": { + "categorize_text": { + "field": "text", + "max_unique_tokens": 101 + } + } + } + } + - do: + catch: /\[max_matched_tokens\] must be greater than 0 and less than or equal \[100\]/ search: index: to_categorize body: > @@ -117,6 +153,23 @@ setup: } } } + - do: + catch: /\[max_matched_tokens\] must be greater than 0 and less than or equal \[100\]/ + search: + index: to_categorize + body: > + { + "size": 0, + "aggs": { + "categories": { + "categorize_text": { + "field": "text", + "max_matched_tokens": 101 + } + } + } + } + - do: catch: /\[similarity_threshold\] must be in the range \[1, 100\]/ search: From 05cc6c16bd0fcc620cdeabbbe437156bcd3ceacf Mon Sep 17 00:00:00 2001 From: Rene Groeschke Date: Thu, 21 Oct 2021 18:56:30 +0200 Subject: [PATCH 14/21] Do not build bwc artifacts when running build task (#79639) The spotless plugin applies the gradle base plugin which results in building all artifacts in the bwc projects when just running build. This is not intended and happened as part of work on #78910 The correct fix is to not apply the base plugin in the spotless plugin IMO. We will work on getting that fix upstream to the third party gradle plugin Meanwhile we just ignore bwc projects for our formatting as they also have no source available anyhow. This fixes #79606 when backported to 7.x --- .../conventions/precommit/FormattingPrecommitPlugin.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/build-conventions/src/main/java/org/elasticsearch/gradle/internal/conventions/precommit/FormattingPrecommitPlugin.java b/build-conventions/src/main/java/org/elasticsearch/gradle/internal/conventions/precommit/FormattingPrecommitPlugin.java index 7ea9b41ca32f9..f60cafc190f42 100644 --- a/build-conventions/src/main/java/org/elasticsearch/gradle/internal/conventions/precommit/FormattingPrecommitPlugin.java +++ b/build-conventions/src/main/java/org/elasticsearch/gradle/internal/conventions/precommit/FormattingPrecommitPlugin.java @@ -118,6 +118,10 @@ private Object[] getTargets(String projectPath) { ":client:sniffer", ":client:test", ":distribution:archives:integ-test-zip", + ":distribution:bwc:bugfix", + ":distribution:bwc:bugfix", + ":distribution:bwc:minor", + ":distribution:bwc:staged", ":distribution:docker", ":docs", ":example-plugins:custom-settings", From 8c262f0edf1cc9a44245d37021a140b0e6e7292a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Thu, 21 Oct 2021 19:03:01 +0200 Subject: [PATCH 15/21] [DOCS] Updates transforms upgrade API docs (#79608) --- .../transform/apis/transform-apis.asciidoc | 1 + .../apis/upgrade-transforms.asciidoc | 24 +++++++++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/docs/reference/transform/apis/transform-apis.asciidoc b/docs/reference/transform/apis/transform-apis.asciidoc index b44c5f4970b74..a6bd6fc261770 100644 --- a/docs/reference/transform/apis/transform-apis.asciidoc +++ b/docs/reference/transform/apis/transform-apis.asciidoc @@ -11,3 +11,4 @@ * <> * <> * <> +* <> diff --git a/docs/reference/transform/apis/upgrade-transforms.asciidoc b/docs/reference/transform/apis/upgrade-transforms.asciidoc index 505f070a344b5..b813c47ca2c36 100644 --- a/docs/reference/transform/apis/upgrade-transforms.asciidoc +++ b/docs/reference/transform/apis/upgrade-transforms.asciidoc @@ -38,11 +38,10 @@ If a {transform} upgrade step fails, the upgrade stops, and an error is returned about the underlying issue. Resolve the issue then re-run the process again. A summary is returned when the upgrade is finished. -For a major version update – for example, from 7.16 to 8.0 –, it is recommended -to have a recent cluster backup prior to performing a {transform} upgrade which -can be run either before or after an {es} upgrade. However, it is recommended to -perform it before upgrading {es} to the next major version to ensure -{ctransforms} remain running. +To ensure {ctransforms} remain running during a major version upgrade of the +cluster – for example, from 7.16 to 8.0 – it is recommended to upgrade +{transforms} before upgrading the cluster. You may want to perform a recent +cluster backup prior to the upgrade. [IMPORTANT] @@ -65,6 +64,20 @@ destination index remains unchanged. them. Defaults to `false`. +[[upgrade-transforms-response-body]] +== {api-response-body-title} + +`needs_update`:: + (integer) The number of {transforms} that need to be upgraded. + +`no_action`:: + (integer) The number of {transforms} that don't require upgrading. + +`updated`:: + (integer) The number of {transforms} that have been upgraded. + + + [[upgrade-transforms-example]] == {api-examples-title} @@ -82,6 +95,7 @@ When all {transforms} are upgraded, you receive a summary: [source,console-result] ---- { + "needs_update": 0, "updated": 2, "no_action": 1 } From 6fb40a899a793e4187eaf872f02f718c38ffc88f Mon Sep 17 00:00:00 2001 From: Mark Vieira Date: Thu, 21 Oct 2021 10:27:12 -0700 Subject: [PATCH 16/21] Add optional pull request release tests CI job --- ...asticsearch+pull-request+release-tests.yml | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 .ci/jobs.t/elastic+elasticsearch+pull-request+release-tests.yml diff --git a/.ci/jobs.t/elastic+elasticsearch+pull-request+release-tests.yml b/.ci/jobs.t/elastic+elasticsearch+pull-request+release-tests.yml new file mode 100644 index 0000000000000..91eba35d76e14 --- /dev/null +++ b/.ci/jobs.t/elastic+elasticsearch+pull-request+release-tests.yml @@ -0,0 +1,49 @@ +--- +- job: + name: "elastic+elasticsearch+pull-request+release-tests" + display-name: "elastic / elasticsearch - pull request release-tests" + description: "Testing of Elasticsearch pull requests - release-tests" + workspace: "/dev/shm/elastic+elasticsearch+pull-request+release-tests" + scm: + - git: + refspec: "+refs/pull/${ghprbPullId}/*:refs/remotes/origin/pr/${ghprbPullId}/*" + branches: + - "${ghprbActualCommit}" + triggers: + - github-pull-request: + org-list: + - elastic + allow-whitelist-orgs-as-admins: true + trigger-phrase: '.*run\W+elasticsearch-ci/release-tests.*' + github-hooks: true + status-context: elasticsearch-ci/release-tests + cancel-builds-on-update: true + excluded-regions: + - ^docs/.* + white-list-labels: + - 'test-release' + black-list-target-branches: + - 7.15 + - 6.8 + builders: + - inject: + properties-file: '.ci/java-versions.properties' + properties-content: | + JAVA_HOME=$HOME/.java/$ES_BUILD_JAVA + RUNTIME_JAVA_HOME=$HOME/.java/$ES_RUNTIME_JAVA + JAVA8_HOME=$HOME/.java/java8 + JAVA11_HOME=$HOME/.java/java11 + JAVA15_HOME=$HOME/.java/openjdk15 + - shell: | + #!/usr/local/bin/runbld --redirect-stderr + + # Fetch beats artifacts + export ES_VERSION=$(grep 'elasticsearch' build-tools-internal/version.properties | awk '{print $3}') + export BEATS_DIR=$(pwd)/distribution/docker/build/artifacts/beats + + mkdir -p ${BEATS_DIR} + curl -o "${BEATS_DIR}/metricbeat-${ES_VERSION}-linux-x86_64.tar.gz" https://snapshots-no-kpi.elastic.co/downloads/beats/metricbeat/metricbeat-${ES_VERSION}-SNAPSHOT-linux-x86_64.tar.gz + curl -o "${BEATS_DIR}/filebeat-${ES_VERSION}-linux-x86_64.tar.gz" https://snapshots-no-kpi.elastic.co/downloads/beats/filebeat/filebeat-${ES_VERSION}-SNAPSHOT-linux-x86_64.tar.gz + + $WORKSPACE/.ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dbuild.snapshot=false \ + -Dtests.jvm.argline=-Dbuild.snapshot=false -Dlicense.key=${WORKSPACE}/x-pack/license-tools/src/test/resources/public.key -Dbuild.id=deadbeef build From e288a1a4af60fcea72943de3a2498953e619785f Mon Sep 17 00:00:00 2001 From: Ioannis Kakavas Date: Thu, 21 Oct 2021 20:36:05 +0300 Subject: [PATCH 17/21] Deprecate setup-passwords tool (#76902) With Security ON by default project where the `elastic` user password is auto-generated, we have decided to deprecate the setup-passwords tool and consider removing it in a future version. Users will get a password for the `elastic` built-in user when the node starts for the first time and they can also use the newly introduced elastisearch-reset-elastic-password tool to set or reset that password. With credentials for the elastic user available, the password for the rest of the built-in users can be set using the Change Password API, or via Kibana. --- .../commands/setup-passwords.asciidoc | 2 ++ .../esnative/tool/SetupPasswordTool.java | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/docs/reference/commands/setup-passwords.asciidoc b/docs/reference/commands/setup-passwords.asciidoc index 7a443b492d470..45f79eebee884 100644 --- a/docs/reference/commands/setup-passwords.asciidoc +++ b/docs/reference/commands/setup-passwords.asciidoc @@ -3,6 +3,8 @@ [[setup-passwords]] == elasticsearch-setup-passwords +deprecated[8.0, "The `elasticsearch-setup-passwords` tool is deprecated and will be removed in a future release. To manually reset the password for the `elastic` user, use the <>. To change passwords for other users, use either {kib} or the {es} change passwords API."] + The `elasticsearch-setup-passwords` command sets the passwords for the <>. diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/esnative/tool/SetupPasswordTool.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/esnative/tool/SetupPasswordTool.java index c657140e42154..be417f544e939 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/esnative/tool/SetupPasswordTool.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/esnative/tool/SetupPasswordTool.java @@ -65,7 +65,11 @@ * mode prompts for each individual user's password. This tool only runs once, * if successful. After the elastic user password is set you have to use the * `security` API to manipulate passwords. + * + * @deprecated Use {@link ResetBuiltinPasswordTool} for setting the password of the + * elastic user and the ChangePassword API for setting the password of the rest of the built-in users when needed. */ +@Deprecated public class SetupPasswordTool extends LoggingAwareMultiCommand { private static final char[] CHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789").toCharArray(); @@ -134,6 +138,11 @@ protected void execute(Terminal terminal, OptionSet options, Environment env) th checkClusterHealth(terminal); if (shouldPrompt) { + terminal.println("******************************************************************************"); + terminal.println("Note: The 'elasticsearch-setup-passwords' tool has been deprecated. This " + + " command will be removed in a future release."); + terminal.println("******************************************************************************"); + terminal.println(""); terminal.println("Initiating the setup of passwords for reserved users " + String.join(",", USERS) + "."); terminal.println("The passwords will be randomly generated and printed to the console."); boolean shouldContinue = terminal.promptYesNo("Please confirm that you would like to continue", false); @@ -180,6 +189,11 @@ protected void execute(Terminal terminal, OptionSet options, Environment env) th checkClusterHealth(terminal); if (shouldPrompt) { + terminal.println("******************************************************************************"); + terminal.println("Note: The 'elasticsearch-setup-passwords' tool has been deprecated. This " + + " command will be removed in a future release."); + terminal.println("******************************************************************************"); + terminal.println(""); terminal.println("Initiating the setup of passwords for reserved users " + String.join(",", USERS) + "."); terminal.println("You will be prompted to enter passwords as the process progresses."); boolean shouldContinue = terminal.promptYesNo("Please confirm that you would like to continue", false); @@ -314,6 +328,11 @@ void checkElasticKeystorePasswordValid(Terminal terminal, Environment env) throw terminal.errorPrintln(" * Your elasticsearch node is running against a different keystore"); terminal.errorPrintln(" This tool used the keystore at " + KeyStoreWrapper.keystorePath(env.configFile())); terminal.errorPrintln(""); + terminal.errorPrintln( + "You can use the `elasticsearch-reset-elastic-password` CLI tool to reset the password of the '" + elasticUser + + "' user" + ); + terminal.errorPrintln(""); throw new UserException(ExitCodes.CONFIG, "Failed to verify bootstrap password"); } else if (httpCode != HttpURLConnection.HTTP_OK) { terminal.errorPrintln(""); From 4e8ed09f473bd7aea19761be7cb026b9e25d5fa3 Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Thu, 21 Oct 2021 14:51:03 -0400 Subject: [PATCH 18/21] [ML] optimize source extraction for categorize_text aggregation (#79099) This optimizes the text value extraction from source in categorize_text aggregation. Early measurements indicate that the bulk of the time spent in this aggregation is inflating and deserializing the source. We can optimize this a bit (for larger sources) by only extracting the text field we care about. The main downside here is if there is a sub-agg that requires the source, the that agg will need to extract the entire source again. This should be a rare case. NOTE: opening as draft as measurements need to be done on some realistic data to see if this actually saves us time. This takes advantage of the work done here: https://github.com/elastic/elasticsearch/pull/77154 --- .../org/elasticsearch/xcontent/XContent.java | 4 + .../xcontent/cbor/CborXContent.java | 20 +++++ .../xcontent/json/JsonXContent.java | 20 +++++ .../xcontent/smile/SmileXContent.java | 20 +++++ .../xcontent/yaml/YamlXContent.java | 20 +++++ .../common/xcontent/XContentHelper.java | 86 ++++++++++++++++--- .../search/lookup/SourceLookup.java | 39 +++++++++ .../CategorizeTextAggregator.java | 2 +- 8 files changed, 199 insertions(+), 12 deletions(-) diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContent.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContent.java index d40bedf38b39f..227518b44c209 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContent.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContent.java @@ -82,6 +82,10 @@ XContentParser createParser(NamedXContentRegistry xContentRegistry, DeprecationH XContentParser createParser(NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, byte[] data, int offset, int length) throws IOException; + XContentParser createParser(NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, byte[] data, int offset, int length, FilterPath[] includes, + FilterPath[] excludes) throws IOException; + /** * Creates a parser over the provided reader. */ diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/cbor/CborXContent.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/cbor/CborXContent.java index 9dfb6f47f7e86..d43e3b10b225c 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/cbor/CborXContent.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/cbor/CborXContent.java @@ -112,6 +112,26 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry, return createParserForCompatibility(xContentRegistry, deprecationHandler, data, offset, length, RestApiVersion.current()); } + @Override + public XContentParser createParser( + NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, + byte[] data, + int offset, + int length, + FilterPath[] includes, + FilterPath[] excludes + ) throws IOException { + return new CborXContentParser( + xContentRegistry, + deprecationHandler, + cborFactory.createParser(new ByteArrayInputStream(data, offset, length)), + RestApiVersion.current(), + includes, + excludes + ); + } + @Override public XContentParser createParser(NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, Reader reader) throws IOException { diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/json/JsonXContent.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/json/JsonXContent.java index cf551f5761315..10df2c1c10d8d 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/json/JsonXContent.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/json/JsonXContent.java @@ -113,6 +113,26 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry, return createParserForCompatibility(xContentRegistry, deprecationHandler, data, offset, length, RestApiVersion.current()); } + @Override + public XContentParser createParser( + NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, + byte[] data, + int offset, + int length, + FilterPath[] includes, + FilterPath[] excludes + ) throws IOException { + return new JsonXContentParser( + xContentRegistry, + deprecationHandler, + jsonFactory.createParser(new ByteArrayInputStream(data, offset, length)), + RestApiVersion.current(), + includes, + excludes + ); + } + @Override public XContentParser createParser(NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, Reader reader) throws IOException { diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/smile/SmileXContent.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/smile/SmileXContent.java index e02f8ec307af8..696865a242830 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/smile/SmileXContent.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/smile/SmileXContent.java @@ -114,6 +114,26 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry, return createParserForCompatibility(xContentRegistry, deprecationHandler, data, offset, length, RestApiVersion.current()); } + @Override + public XContentParser createParser( + NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, + byte[] data, + int offset, + int length, + FilterPath[] includes, + FilterPath[] excludes + ) throws IOException { + return new SmileXContentParser( + xContentRegistry, + deprecationHandler, + smileFactory.createParser(new ByteArrayInputStream(data, offset, length)), + RestApiVersion.current(), + includes, + excludes + ); + } + @Override public XContentParser createParser(NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, Reader reader) throws IOException { diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/yaml/YamlXContent.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/yaml/YamlXContent.java index b3a684d20583d..68f1ac2bbf27b 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/yaml/YamlXContent.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/yaml/YamlXContent.java @@ -106,6 +106,26 @@ public XContentParser createParser(NamedXContentRegistry xContentRegistry, return createParserForCompatibility(xContentRegistry, deprecationHandler, data, offset, length, RestApiVersion.current()); } + @Override + public XContentParser createParser( + NamedXContentRegistry xContentRegistry, + DeprecationHandler deprecationHandler, + byte[] data, + int offset, + int length, + FilterPath[] includes, + FilterPath[] excludes + ) throws IOException { + return new YamlXContentParser( + xContentRegistry, + deprecationHandler, + yamlFactory.createParser(new ByteArrayInputStream(data, offset, length)), + RestApiVersion.current(), + includes, + excludes + ); + } + @Override public XContentParser createParser(NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, Reader reader) throws IOException { diff --git a/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java b/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java index 5c891843c0438..6a5e253ffffe3 100644 --- a/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java +++ b/server/src/main/java/org/elasticsearch/common/xcontent/XContentHelper.java @@ -13,6 +13,7 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Tuple; import org.elasticsearch.common.compress.Compressor; import org.elasticsearch.common.compress.CompressorFactory; @@ -27,6 +28,7 @@ import org.elasticsearch.xcontent.XContentParseException; import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.support.filtering.FilterPath; import java.io.BufferedInputStream; import java.io.IOException; @@ -101,6 +103,14 @@ public static Tuple> convertToMap(BytesReferen return convertToMap(bytes, ordered, null); } + /** + * Exactly the same as {@link XContentHelper#convertToMap(BytesReference, boolean, XContentType, FilterPath[], FilterPath[])} but + * none of the fields are filtered + */ + public static Tuple> convertToMap(BytesReference bytes, boolean ordered, XContentType xContentType) { + return convertToMap(bytes, ordered, xContentType, null, null); + } + /** * Converts the given bytes into a map that is optionally ordered. The provided {@link XContentType} must be non-null. *

@@ -110,8 +120,13 @@ public static Tuple> convertToMap(BytesReferen * frequently when folks write nanosecond precision dates as a decimal * number. */ - public static Tuple> convertToMap(BytesReference bytes, boolean ordered, XContentType xContentType) - throws ElasticsearchParseException { + public static Tuple> convertToMap( + BytesReference bytes, + boolean ordered, + XContentType xContentType, + @Nullable FilterPath[] include, + @Nullable FilterPath[] exclude + ) throws ElasticsearchParseException { try { final XContentType contentType; InputStream input; @@ -129,14 +144,16 @@ public static Tuple> convertToMap(BytesReferen final int length = bytes.length(); contentType = xContentType != null ? xContentType : XContentFactory.xContentType(raw, offset, length); return new Tuple<>(Objects.requireNonNull(contentType), - convertToMap(XContentFactory.xContent(contentType), raw, offset, length, ordered)); + convertToMap(XContentFactory.xContent(contentType), raw, offset, length, ordered, include, exclude)); } else { input = bytes.streamInput(); contentType = xContentType != null ? xContentType : XContentFactory.xContentType(input); } try (InputStream stream = input) { - return new Tuple<>(Objects.requireNonNull(contentType), - convertToMap(XContentFactory.xContent(contentType), stream, ordered)); + return new Tuple<>( + Objects.requireNonNull(contentType), + convertToMap(XContentFactory.xContent(contentType), stream, ordered, include, exclude) + ); } } catch (IOException e) { throw new ElasticsearchParseException("Failed to parse content to map", e); @@ -158,14 +175,35 @@ public static Map convertToMap(XContent xContent, String string, } /** - * Convert a string in some {@link XContent} format to a {@link Map}. Throws an {@link ElasticsearchParseException} if there is any - * error. Note that unlike {@link #convertToMap(BytesReference, boolean)}, this doesn't automatically uncompress the input. + * The same as {@link XContentHelper#convertToMap(XContent, byte[], int, int, boolean, FilterPath[], FilterPath[])} but none of the + * fields are filtered. */ public static Map convertToMap(XContent xContent, InputStream input, boolean ordered) throws ElasticsearchParseException { + return convertToMap(xContent, input, ordered, null, null); + } + + /** + * Convert a string in some {@link XContent} format to a {@link Map}. Throws an {@link ElasticsearchParseException} if there is any + * error. Note that unlike {@link #convertToMap(BytesReference, boolean)}, this doesn't automatically uncompress the input. + * + * Additionally, fields may be included or excluded from the parsing. + */ + public static Map convertToMap( + XContent xContent, + InputStream input, + boolean ordered, + @Nullable FilterPath[] include, + @Nullable FilterPath[] exclude + ) throws ElasticsearchParseException { // It is safe to use EMPTY here because this never uses namedObject - try (XContentParser parser = xContent.createParser(NamedXContentRegistry.EMPTY, - DeprecationHandler.THROW_UNSUPPORTED_OPERATION, input)) { + try (XContentParser parser = xContent.createParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + input, + include, + exclude + )) { return ordered ? parser.mapOrdered() : parser.map(); } catch (IOException e) { throw new ElasticsearchParseException("Failed to parse content to map", e); @@ -178,9 +216,35 @@ public static Map convertToMap(XContent xContent, InputStream in */ public static Map convertToMap(XContent xContent, byte[] bytes, int offset, int length, boolean ordered) throws ElasticsearchParseException { + return convertToMap(xContent, bytes, offset, length, ordered, null, null); + } + + /** + * Convert a byte array in some {@link XContent} format to a {@link Map}. Throws an {@link ElasticsearchParseException} if there is any + * error. Note that unlike {@link #convertToMap(BytesReference, boolean)}, this doesn't automatically uncompress the input. + * + * Unlike {@link XContentHelper#convertToMap(XContent, byte[], int, int, boolean)} this optionally accepts fields to include or exclude + * during XContent parsing. + */ + public static Map convertToMap( + XContent xContent, + byte[] bytes, + int offset, + int length, + boolean ordered, + @Nullable FilterPath[] include, + @Nullable FilterPath[] exclude + ) throws ElasticsearchParseException { // It is safe to use EMPTY here because this never uses namedObject - try (XContentParser parser = xContent.createParser(NamedXContentRegistry.EMPTY, - DeprecationHandler.THROW_UNSUPPORTED_OPERATION, bytes, offset, length)) { + try (XContentParser parser = xContent.createParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + bytes, + offset, + length, + include, + exclude) + ) { return ordered ? parser.mapOrdered() : parser.map(); } catch (IOException e) { throw new ElasticsearchParseException("Failed to parse content to map", e); diff --git a/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java b/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java index 505ea16927e97..d498c9cffe468 100644 --- a/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java +++ b/server/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java @@ -20,6 +20,7 @@ import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.fieldvisitor.FieldsVisitor; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; +import org.elasticsearch.xcontent.support.filtering.FilterPath; import java.io.IOException; import java.util.Collection; @@ -157,6 +158,44 @@ public List extractRawValues(String path) { return XContentMapValues.extractRawValues(path, source()); } + /** + * Returns the values associated with the path. Those are "low" level values, and it can + * handle path expression where an array/list is navigated within. + * + * The major difference with {@link SourceLookup#extractRawValues(String)} is that this version will: + * + * - not cache source if it's not already parsed + * - will only extract the desired values from the compressed source instead of deserializing the whole object + * + * This is useful when the caller only wants a single value from source and does not care of source is fully parsed and cached + * for later use. + * @param path The path from which to extract the values from source + * @return The list of found values or an empty list if none are found + */ + public List extractRawValuesWithoutCaching(String path) { + if (source != null) { + return XContentMapValues.extractRawValues(path, source); + } + FilterPath[] filterPaths = FilterPath.compile(Set.of(path)); + if (sourceAsBytes != null) { + return XContentMapValues.extractRawValues( + path, + XContentHelper.convertToMap(sourceAsBytes, false, null, filterPaths, null).v2() + ); + } + try { + FieldsVisitor sourceFieldVisitor = new FieldsVisitor(true); + fieldReader.accept(docId, sourceFieldVisitor); + BytesReference source = sourceFieldVisitor.source(); + return XContentMapValues.extractRawValues( + path, + XContentHelper.convertToMap(source, false, null, filterPaths, null).v2() + ); + } catch (Exception e) { + throw new ElasticsearchParseException("failed to parse / load source", e); + } + } + /** * For the provided path, return its value in the source. * diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java index f491be16c71fb..386c41255747d 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java @@ -183,7 +183,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException { private void collectFromSource(int doc, long owningBucketOrd, CategorizationTokenTree categorizer) throws IOException { sourceLookup.setSegmentAndDocument(ctx, doc); - Iterator itr = sourceLookup.extractRawValues(sourceFieldName).stream().map(obj -> { + Iterator itr = sourceLookup.extractRawValuesWithoutCaching(sourceFieldName).stream().map(obj -> { if (obj == null) { return null; } From 8c0883f5294ee664df96266e880404ad5e2444b0 Mon Sep 17 00:00:00 2001 From: Aleksandr Maus Date: Thu, 21 Oct 2021 15:41:49 -0400 Subject: [PATCH 19/21] Add action_response into .fleet-actions-results mapping (#79584) Co-authored-by: Elastic Machine --- .../plugin/core/src/main/resources/fleet-actions-results.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/x-pack/plugin/core/src/main/resources/fleet-actions-results.json b/x-pack/plugin/core/src/main/resources/fleet-actions-results.json index e4a4acce782b3..fb37e6ef72936 100644 --- a/x-pack/plugin/core/src/main/resources/fleet-actions-results.json +++ b/x-pack/plugin/core/src/main/resources/fleet-actions-results.json @@ -23,6 +23,10 @@ "enabled": false, "type": "object" }, + "action_response": { + "dynamic": true, + "type": "object" + }, "data": { "enabled": false, "type": "object" From 3724feac3300f5227154a235537e5e2a7351872b Mon Sep 17 00:00:00 2001 From: Justin Cranford <89857999+justincr-elastic@users.noreply.github.com> Date: Thu, 21 Oct 2021 16:13:10 -0400 Subject: [PATCH 20/21] Put License API can return HTTP 500 (#79093) * Put License API can return HTTP 500 Put License now returns HTTP 400 if parsing user input fails, such as Base64 encoding or invalid signature. Closes #74058 --- .../org/elasticsearch/client/LicenseIT.java | 37 +++++++++++++++++++ .../org/elasticsearch/license/License.java | 6 +-- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/LicenseIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/LicenseIT.java index d0423f893e8aa..a950abcb99bf4 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/LicenseIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/LicenseIT.java @@ -9,6 +9,7 @@ package org.elasticsearch.client; import org.elasticsearch.Build; +import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.client.license.DeleteLicenseRequest; import org.elasticsearch.client.license.GetBasicStatusResponse; @@ -23,6 +24,7 @@ import org.elasticsearch.client.license.StartTrialRequest; import org.elasticsearch.client.license.StartTrialResponse; import org.elasticsearch.common.Strings; +import org.elasticsearch.rest.RestStatus; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.json.JsonXContent; import org.junit.After; @@ -40,6 +42,7 @@ import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.emptyOrNullString; import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.stringContainsInOrder; public class LicenseIT extends ESRestHighLevelClientTestCase { @@ -91,6 +94,40 @@ public void testStartTrial() throws Exception { } } + public void testPutInvalidTrialLicense() throws Exception { + assumeTrue("Trial license is only valid when tested against snapshot/test builds", + Build.CURRENT.isSnapshot()); + + // use a hard-coded trial license for 20 yrs to be able to roll back from another licenses + final String signature = "xx"; // Truncated, so it is expected to fail validation + final String licenseDefinition = Strings.toString(jsonBuilder() + .startObject() + .field("licenses", List.of( + Map.of( + "uid", "96fc37c6-6fc9-43e2-a40d-73143850cd72", + "type", "trial", + // 2018-10-16 07:02:48 UTC + "issue_date_in_millis", "1539673368158", + // 2038-10-11 07:02:48 UTC, 20 yrs later + "expiry_date_in_millis", "2170393368158", + "max_nodes", "5", + "issued_to", "client_rest-high-level_integTestCluster", + "issuer", "elasticsearch", + "start_date_in_millis", "-1", + "signature", signature))) + .endObject()); + + final PutLicenseRequest request = new PutLicenseRequest(); + request.setAcknowledge(true); + request.setLicenseDefinition(licenseDefinition); + ElasticsearchStatusException e = expectThrows( + ElasticsearchStatusException.class, + () -> highLevelClient().license().putLicense(request, RequestOptions.DEFAULT) + ); + assertThat(e.status(), equalTo(RestStatus.BAD_REQUEST)); + assertThat(e.getMessage(), stringContainsInOrder("malformed signature for license")); + } + public static void putTrialLicense() throws IOException { assumeTrue("Trial license is only valid when tested against snapshot/test builds", Build.CURRENT.isSnapshot()); diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/License.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/License.java index fc9a01b6a9d9e..f08c147236991 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/License.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/License.java @@ -642,7 +642,7 @@ public static License fromXContent(XContentParser parser) throws IOException { ByteBuffer byteBuffer = ByteBuffer.wrap(signatureBytes); version = byteBuffer.getInt(); } catch (Exception e) { - throw new ElasticsearchException("malformed signature for license [" + builder.uid + "]", e); + throw new ElasticsearchParseException("malformed signature for license [" + builder.uid + "]", e); } // we take the absolute version, because negative versions // mean that the license was generated by the cluster (see TrialLicense) @@ -651,9 +651,9 @@ public static License fromXContent(XContentParser parser) throws IOException { version *= -1; } if (version == 0) { - throw new ElasticsearchException("malformed signature for license [" + builder.uid + "]"); + throw new ElasticsearchParseException("malformed signature for license [" + builder.uid + "]"); } else if (version > VERSION_CURRENT) { - throw new ElasticsearchException("Unknown license version found, please upgrade all nodes to the latest " + + throw new ElasticsearchParseException("Unknown license version found, please upgrade all nodes to the latest " + "elasticsearch-license plugin"); } // signature version is the source of truth From 6cc0a41af0b8e14317ab817938d4144ec31fd005 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 21 Oct 2021 21:28:43 +0100 Subject: [PATCH 21/21] Expand warning about modifying data path contents (#79649) Today we have a short note in one place in the docs saying not to touch the contents of the data path. This commit expands the warning to describe more precisely what is forbidden, and to give some more detail of the consequences, and also duplicates the warning to the other location that documents the `path.data` setting. --- docs/reference/modules/node.asciidoc | 13 +++++++++++++ .../setup/important-settings/path-settings.asciidoc | 6 ++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/docs/reference/modules/node.asciidoc b/docs/reference/modules/node.asciidoc index 50923d3103f6e..2626c255baf05 100644 --- a/docs/reference/modules/node.asciidoc +++ b/docs/reference/modules/node.asciidoc @@ -459,6 +459,19 @@ should be configured to locate the data directory outside the {es} home directory, so that the home directory can be deleted without deleting your data! The RPM and Debian distributions do this for you already. +// tag::modules-node-data-path-warning-tag[] +WARNING: Don't modify anything within the data directory or run processes that +might interfere with its contents. If something other than {es} modifies the +contents of the data directory, then {es} may fail, reporting corruption or +other data inconsistencies, or may appear to work correctly having silently +lost some of your data. Don't attempt to take filesystem backups of the data +directory; there is no supported way to restore such a backup. Instead, use +<> to take backups safely. Don't run virus scanners on the +data directory. A virus scanner can prevent {es} from working correctly and may +modify the contents of the data directory. The data directory contains no +executables so a virus scan will only find false positives. +// end::modules-node-data-path-warning-tag[] + [discrete] [[other-node-settings]] === Other node settings diff --git a/docs/reference/setup/important-settings/path-settings.asciidoc b/docs/reference/setup/important-settings/path-settings.asciidoc index f66477c25146b..5767ba7dcd52c 100644 --- a/docs/reference/setup/important-settings/path-settings.asciidoc +++ b/docs/reference/setup/important-settings/path-settings.asciidoc @@ -17,16 +17,14 @@ In production, we strongly recommend you set the `path.data` and `path.logs` in `.msi`>> installations write data and log to locations outside of `$ES_HOME` by default. -IMPORTANT: To avoid errors, only {es} should open files in the `path.data` -directory. Exclude the `path.data` directory from other services that may open -and lock its files, such as antivirus or backup programs. - Supported `path.data` and `path.logs` values vary by platform: include::{es-repo-dir}/tab-widgets/code.asciidoc[] include::{es-repo-dir}/tab-widgets/customize-data-log-path-widget.asciidoc[] +include::{es-repo-dir}/modules/node.asciidoc[tag=modules-node-data-path-warning-tag] + [discrete] ==== Multiple data paths deprecated::[7.13.0]