From a6351d63ad148e9b165be61a275163137bc86967 Mon Sep 17 00:00:00 2001 From: Tim Vernum Date: Tue, 10 Dec 2019 16:46:07 +1100 Subject: [PATCH 01/22] Add setting to restrict license types (#49418) This adds a new "xpack.license.upload.types" setting that restricts which license types may be uploaded to a cluster. By default all types are allowed (excluding basic, which can only be generated and never uploaded). This setting does not restrict APIs that generate licenses such as the start trial API. This setting is not documented as it is intended to be set by orchestrators and not end users. --- .../org/elasticsearch/license/License.java | 49 ++++-- .../elasticsearch/license/LicenseService.java | 49 +++++- .../license/OperationModeFileWatcher.java | 2 +- .../license/RemoteClusterLicenseChecker.java | 6 +- .../xpack/core/XPackClientPlugin.java | 1 + .../core/ml/inference/TrainedModelConfig.java | 4 +- .../license/LicenseFIPSTests.java | 10 ++ .../license/LicenseOperationModeTests.java | 6 +- .../LicenseOperationModeUpdateTests.java | 2 +- .../license/LicenseServiceTests.java | 164 ++++++++++++++++++ 10 files changed, 273 insertions(+), 20 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/License.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/License.java index 6731518f5b53..004c9ff98776 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/License.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/License.java @@ -63,7 +63,23 @@ public static LicenseType parse(String type) throws IllegalArgumentException { /** * Backward compatible license type parsing for older license models */ - public static LicenseType resolve(String name) { + public static LicenseType resolve(License license) { + if (license.version == VERSION_START) { + // in 1.x: the acceptable values for 'subscription_type': none | dev | silver | gold | platinum + return resolve(license.subscriptionType); + } else { + // in 2.x: the acceptable values for 'type': trial | basic | silver | dev | gold | platinum + // in 5.x: the acceptable values for 'type': trial | basic | standard | dev | gold | platinum + // in 6.x: the acceptable values for 'type': trial | basic | standard | dev | gold | platinum + // in 7.x: the acceptable values for 'type': trial | basic | standard | dev | gold | platinum | enterprise + return resolve(license.type); + } + } + + /** + * Backward compatible license type parsing for older license models + */ + static LicenseType resolve(String name) { switch (name.toLowerCase(Locale.ROOT)) { case "missing": return null; @@ -165,8 +181,12 @@ public static int compare(OperationMode opMode1, OperationMode opMode2) { return Integer.compare(opMode1.id, opMode2.id); } - public static OperationMode resolve(String typeName) { - LicenseType type = LicenseType.resolve(typeName); + /** + * Determine the operating mode for a license type + * @see LicenseType#resolve(License) + * @see #parse(String) + */ + public static OperationMode resolve(LicenseType type) { if (type == null) { return MISSING; } @@ -187,6 +207,21 @@ public static OperationMode resolve(String typeName) { } } + /** + * Parses an {@code OperatingMode} from a String. + * The string must name an operating mode, and not a licensing level (that is, it cannot parse old style license levels + * such as "dev" or "silver"). + * @see #description() + */ + public static OperationMode parse(String mode) { + try { + return OperationMode.valueOf(mode.toUpperCase(Locale.ROOT)); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException("unrecognised license operating mode [ " + mode + "], supported modes are [" + + Stream.of(values()).map(OperationMode::description).collect(Collectors.joining(",")) + "]"); + } + } + public String description() { return name().toLowerCase(Locale.ROOT); } @@ -212,13 +247,7 @@ private License(int version, String uid, String issuer, String issuedTo, long is } this.maxNodes = maxNodes; this.startDate = startDate; - if (version == VERSION_START) { - // in 1.x: the acceptable values for 'subscription_type': none | dev | silver | gold | platinum - this.operationMode = OperationMode.resolve(subscriptionType); - } else { - // in 2.x: the acceptable values for 'type': trial | basic | silver | dev | gold | platinum - this.operationMode = OperationMode.resolve(type); - } + this.operationMode = OperationMode.resolve(LicenseType.resolve(this)); validate(); } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/LicenseService.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/LicenseService.java index f16cb2fbe393..af34d31c1442 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/LicenseService.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/LicenseService.java @@ -47,6 +47,7 @@ import java.util.Set; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Service responsible for managing {@link LicensesMetaData}. @@ -64,6 +65,12 @@ public class LicenseService extends AbstractLifecycleComponent implements Cluste return SelfGeneratedLicense.validateSelfGeneratedType(type); }, Setting.Property.NodeScope); + static final List ALLOWABLE_UPLOAD_TYPES = getAllowableUploadTypes(); + + public static final Setting> ALLOWED_LICENSE_TYPES_SETTING = Setting.listSetting("xpack.license.upload.types", + ALLOWABLE_UPLOAD_TYPES.stream().map(License.LicenseType::getTypeName).collect(Collectors.toUnmodifiableList()), + License.LicenseType::parse, LicenseService::validateUploadTypesSetting, Setting.Property.NodeScope); + // pkg private for tests static final TimeValue NON_BASIC_SELF_GENERATED_LICENSE_DURATION = TimeValue.timeValueHours(30 * 24); @@ -104,6 +111,12 @@ public class LicenseService extends AbstractLifecycleComponent implements Cluste */ private List expirationCallbacks = new ArrayList<>(); + /** + * Which license types are permitted to be uploaded to the cluster + * @see #ALLOWED_LICENSE_TYPES_SETTING + */ + private final List allowedLicenseTypes; + /** * Max number of nodes licensed by generated trial license */ @@ -123,6 +136,7 @@ public LicenseService(Settings settings, ClusterService clusterService, Clock cl this.clock = clock; this.scheduler = new SchedulerEngine(settings, clock); this.licenseState = licenseState; + this.allowedLicenseTypes = ALLOWED_LICENSE_TYPES_SETTING.get(settings); this.operationModeFileWatcher = new OperationModeFileWatcher(resourceWatcherService, XPackPlugin.resolveConfigFile(env, "license_mode"), logger, () -> updateLicenseState(getLicensesMetaData())); @@ -196,8 +210,20 @@ public void registerLicense(final PutLicenseRequest request, final ActionListene final long now = clock.millis(); if (!LicenseVerifier.verifyLicense(newLicense) || newLicense.issueDate() > now || newLicense.startDate() > now) { listener.onResponse(new PutLicenseResponse(true, LicensesStatus.INVALID)); - } else if (newLicense.type().equals(License.LicenseType.BASIC.getTypeName())) { + return; + } + final License.LicenseType licenseType; + try { + licenseType = License.LicenseType.resolve(newLicense); + } catch (Exception e) { + listener.onFailure(e); + return; + } + if (licenseType == License.LicenseType.BASIC) { listener.onFailure(new IllegalArgumentException("Registering basic licenses is not allowed.")); + } else if (isAllowedLicenseType(licenseType) == false) { + listener.onFailure(new IllegalArgumentException( + "Registering [" + licenseType.getTypeName() + "] licenses is not allowed on this cluster")); } else if (newLicense.expiryDate() < now) { listener.onResponse(new PutLicenseResponse(true, LicensesStatus.EXPIRED)); } else { @@ -272,6 +298,11 @@ private static boolean licenseIsCompatible(License license, Version version) { } } + private boolean isAllowedLicenseType(License.LicenseType type) { + logger.debug("Checking license [{}] against allowed license types: {}", type, allowedLicenseTypes); + return allowedLicenseTypes.contains(type); + } + public static Map getAckMessages(License newLicense, License currentLicense) { Map acknowledgeMessages = new HashMap<>(); if (!License.isAutoGeneratedLicense(currentLicense.signature()) // current license is not auto-generated @@ -574,4 +605,20 @@ private static boolean isProductionMode(Settings settings, DiscoveryNode localNo private static boolean isBoundToLoopback(DiscoveryNode localNode) { return localNode.getAddress().address().getAddress().isLoopbackAddress(); } + + private static List getAllowableUploadTypes() { + return Stream.of(License.LicenseType.values()) + .filter(t -> t != License.LicenseType.BASIC) + .collect(Collectors.toUnmodifiableList()); + } + + private static void validateUploadTypesSetting(List value) { + if (ALLOWABLE_UPLOAD_TYPES.containsAll(value) == false) { + throw new IllegalArgumentException("Invalid value [" + + value.stream().map(License.LicenseType::getTypeName).collect(Collectors.joining(",")) + + "] for " + ALLOWED_LICENSE_TYPES_SETTING.getKey() + ", allowed values are [" + + ALLOWABLE_UPLOAD_TYPES.stream().map(License.LicenseType::getTypeName).collect(Collectors.joining(",")) + + "]"); + } + } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/OperationModeFileWatcher.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/OperationModeFileWatcher.java index b8e6446b9f49..ee08b9f7330c 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/OperationModeFileWatcher.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/OperationModeFileWatcher.java @@ -106,7 +106,7 @@ private synchronized void onChange(Path file) { // this UTF-8 conversion is much pickier than java String final String operationMode = new BytesRef(content).utf8ToString(); try { - newOperationMode = OperationMode.resolve(operationMode); + newOperationMode = OperationMode.parse(operationMode); } catch (IllegalArgumentException e) { logger.error( (Supplier) () -> new ParameterizedMessage( diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/RemoteClusterLicenseChecker.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/RemoteClusterLicenseChecker.java index 7d5a3b5e9a53..5de1186767f4 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/license/RemoteClusterLicenseChecker.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/license/RemoteClusterLicenseChecker.java @@ -138,7 +138,7 @@ public RemoteClusterLicenseChecker(final Client client, final Predicate> getSettings() { settings.addAll(XPackSettings.getAllSettings()); settings.add(LicenseService.SELF_GENERATED_LICENSE_TYPE); + settings.add(LicenseService.ALLOWED_LICENSE_TYPES_SETTING); // we add the `xpack.version` setting to all internal indices settings.add(Setting.simpleString("index.xpack.version", Setting.Property.IndexScope)); diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/TrainedModelConfig.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/TrainedModelConfig.java index 21e145546f8b..343a520d9b5d 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/TrainedModelConfig.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/TrainedModelConfig.java @@ -138,7 +138,7 @@ public static TrainedModelConfig.Builder fromXContent(XContentParser parser, boo throw new IllegalArgumentException("[" + ESTIMATED_OPERATIONS.getPreferredName() + "] must be greater than or equal to 0"); } this.estimatedOperations = estimatedOperations; - this.licenseLevel = License.OperationMode.resolve(ExceptionsHelper.requireNonNull(licenseLevel, LICENSE_LEVEL)); + this.licenseLevel = License.OperationMode.parse(ExceptionsHelper.requireNonNull(licenseLevel, LICENSE_LEVEL)); } public TrainedModelConfig(StreamInput in) throws IOException { @@ -153,7 +153,7 @@ public TrainedModelConfig(StreamInput in) throws IOException { input = new TrainedModelInput(in); estimatedHeapMemory = in.readVLong(); estimatedOperations = in.readVLong(); - licenseLevel = License.OperationMode.resolve(in.readString()); + licenseLevel = License.OperationMode.parse(in.readString()); } public String getModelId() { diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseFIPSTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseFIPSTests.java index c432a207fcb7..eb357661d50c 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseFIPSTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseFIPSTests.java @@ -34,6 +34,11 @@ public void testFIPSCheckWithAllowedLicense() throws Exception { licenseService.start(); PlainActionFuture responseFuture = new PlainActionFuture<>(); licenseService.registerLicense(request, responseFuture); + if (responseFuture.isDone()) { + // If the future is done, it means request/license validation failed. + // In which case, this `actionGet` should throw a more useful exception than the verify below. + responseFuture.actionGet(); + } verify(clusterService).submitStateUpdateTask(any(String.class), any(ClusterStateUpdateTask.class)); } @@ -67,6 +72,11 @@ public void testFIPSCheckWithoutAllowedLicense() throws Exception { setInitialState(null, licenseState, settings); licenseService.start(); licenseService.registerLicense(request, responseFuture); + if (responseFuture.isDone()) { + // If the future is done, it means request/license validation failed. + // In which case, this `actionGet` should throw a more useful exception than the verify below. + responseFuture.actionGet(); + } verify(clusterService).submitStateUpdateTask(any(String.class), any(ClusterStateUpdateTask.class)); } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseOperationModeTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseOperationModeTests.java index 648f48ff2ea1..a1fbfbe6c6a4 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseOperationModeTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseOperationModeTests.java @@ -57,7 +57,8 @@ public void testResolveUnknown() { for (String type : types) { try { - OperationMode.resolve(type); + final License.LicenseType licenseType = License.LicenseType.resolve(type); + OperationMode.resolve(licenseType); fail(String.format(Locale.ROOT, "[%s] should not be recognized as an operation mode", type)); } @@ -69,7 +70,8 @@ public void testResolveUnknown() { private static void assertResolve(OperationMode expected, String... types) { for (String type : types) { - assertThat(OperationMode.resolve(type), equalTo(expected)); + License.LicenseType licenseType = License.LicenseType.resolve(type); + assertThat(OperationMode.resolve(licenseType), equalTo(expected)); } } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseOperationModeUpdateTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseOperationModeUpdateTests.java index a69331287918..20df885261fe 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseOperationModeUpdateTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseOperationModeUpdateTests.java @@ -34,7 +34,7 @@ public void init() throws Exception { } public void testLicenseOperationModeUpdate() throws Exception { - String type = randomFrom("trial", "basic", "standard", "gold", "platinum"); + License.LicenseType type = randomFrom(License.LicenseType.values()); License license = License.builder() .uid("id") .expiryDate(0) diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseServiceTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseServiceTests.java index 750b3d67c5f6..b1b22f15c259 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseServiceTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicenseServiceTests.java @@ -6,12 +6,47 @@ package org.elasticsearch.license; +import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.cluster.AckedClusterStateUpdateTask; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.ClusterStateUpdateTask; +import org.elasticsearch.cluster.metadata.MetaData; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.env.Environment; +import org.elasticsearch.env.TestEnvironment; +import org.elasticsearch.license.licensor.LicenseSigner; +import org.elasticsearch.protocol.xpack.license.LicensesStatus; +import org.elasticsearch.protocol.xpack.license.PutLicenseResponse; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.TestMatchers; +import org.elasticsearch.watcher.ResourceWatcherService; +import org.mockito.ArgumentCaptor; +import org.mockito.Mockito; +import java.io.IOException; +import java.nio.file.Path; +import java.time.Clock; import java.time.LocalDate; import java.time.ZoneOffset; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.startsWith; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; /** * Due to changes in JDK9 where locale data is used from CLDR, the licence message will differ in jdk 8 and jdk9+ @@ -30,4 +65,133 @@ public void testLogExpirationWarning() { assertThat(message, startsWith("License [will expire] on [Thursday, November 15, 2018].\n")); } } + + /** + * Tests loading a license when {@link LicenseService#ALLOWED_LICENSE_TYPES_SETTING} is on its default value (all license types) + */ + public void testRegisterLicenseWithoutTypeRestrictions() throws Exception { + assertRegisterValidLicense(Settings.EMPTY, + randomValueOtherThan(License.LicenseType.BASIC, () -> randomFrom(License.LicenseType.values()))); + } + + /** + * Tests loading a license when {@link LicenseService#ALLOWED_LICENSE_TYPES_SETTING} is set, + * and the uploaded license type matches + */ + public void testSuccessfullyRegisterLicenseMatchingTypeRestrictions() throws Exception { + final List allowed = randomSubsetOf( + randomIntBetween(1, LicenseService.ALLOWABLE_UPLOAD_TYPES.size() - 1), LicenseService.ALLOWABLE_UPLOAD_TYPES); + final List allowedNames = allowed.stream().map(License.LicenseType::getTypeName).collect(Collectors.toUnmodifiableList()); + final Settings settings = Settings.builder() + .putList("xpack.license.upload.types", allowedNames) + .build(); + assertRegisterValidLicense(settings, randomFrom(allowed)); + } + + /** + * Tests loading a license when {@link LicenseService#ALLOWED_LICENSE_TYPES_SETTING} is set, + * and the uploaded license type does not match + */ + public void testFailToRegisterLicenseNotMatchingTypeRestrictions() throws Exception { + final List allowed = randomSubsetOf( + randomIntBetween(1, LicenseService.ALLOWABLE_UPLOAD_TYPES.size() - 2), LicenseService.ALLOWABLE_UPLOAD_TYPES); + final List allowedNames = allowed.stream().map(License.LicenseType::getTypeName).collect(Collectors.toUnmodifiableList()); + final Settings settings = Settings.builder() + .putList("xpack.license.upload.types", allowedNames) + .build(); + final License.LicenseType notAllowed = randomValueOtherThanMany( + test -> allowed.contains(test), + () -> randomFrom(LicenseService.ALLOWABLE_UPLOAD_TYPES)); + assertRegisterDisallowedLicenseType(settings, notAllowed); + } + + private void assertRegisterValidLicense(Settings baseSettings, License.LicenseType licenseType) throws IOException { + tryRegisterLicense(baseSettings, licenseType, + future -> assertThat(future.actionGet().status(), equalTo(LicensesStatus.VALID))); + } + + private void assertRegisterDisallowedLicenseType(Settings baseSettings, License.LicenseType licenseType) throws IOException { + tryRegisterLicense(baseSettings, licenseType, future -> { + final IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, future::actionGet); + assertThat(exception, TestMatchers.throwableWithMessage( + "Registering [" + licenseType.getTypeName() + "] licenses is not allowed on " + "this cluster")); + }); + } + + private void tryRegisterLicense(Settings baseSettings, License.LicenseType licenseType, + Consumer> assertion) throws IOException { + final Settings settings = Settings.builder() + .put(baseSettings) + .put("path.home", createTempDir()) + .put("discovery.type", "single-node") // So we skip TLS checks + .build(); + + final ClusterState clusterState = Mockito.mock(ClusterState.class); + Mockito.when(clusterState.metaData()).thenReturn(MetaData.EMPTY_META_DATA); + + final ClusterService clusterService = Mockito.mock(ClusterService.class); + Mockito.when(clusterService.state()).thenReturn(clusterState); + + final Clock clock = randomBoolean() ? Clock.systemUTC() : Clock.systemDefaultZone(); + final Environment env = TestEnvironment.newEnvironment(settings); + final ResourceWatcherService resourceWatcherService = Mockito.mock(ResourceWatcherService.class); + final XPackLicenseState licenseState = Mockito.mock(XPackLicenseState.class); + final LicenseService service = new LicenseService(settings, clusterService, clock, env, resourceWatcherService, licenseState); + + final PutLicenseRequest request = new PutLicenseRequest(); + request.license(spec(licenseType, TimeValue.timeValueDays(randomLongBetween(1, 1000))), XContentType.JSON); + final PlainActionFuture future = new PlainActionFuture<>(); + service.registerLicense(request, future); + + if (future.isDone()) { + // If validation failed, the future might be done without calling the updater task. + assertion.accept(future); + } else { + ArgumentCaptor taskCaptor = ArgumentCaptor.forClass(ClusterStateUpdateTask.class); + verify(clusterService, times(1)).submitStateUpdateTask(any(), taskCaptor.capture()); + + final ClusterStateUpdateTask task = taskCaptor.getValue(); + assertThat(task, instanceOf(AckedClusterStateUpdateTask.class)); + ((AckedClusterStateUpdateTask) task).onAllNodesAcked(null); + + assertion.accept(future); + } + } + + private BytesReference spec(License.LicenseType type, TimeValue expires) throws IOException { + final License signed = sign(buildLicense(type, expires)); + return toSpec(signed); + } + + private BytesReference toSpec(License license) throws IOException { + XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); + builder.startObject(); + builder.startObject("license"); + license.toInnerXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject(); + builder.endObject(); + builder.flush(); + return BytesReference.bytes(builder); + } + + private License sign(License license) throws IOException { + final Path publicKey = getDataPath("/public.key"); + final Path privateKey = getDataPath("/private.key"); + final LicenseSigner signer = new LicenseSigner(privateKey, publicKey); + + return signer.sign(license); + } + + private License buildLicense(License.LicenseType type, TimeValue expires) { + return License.builder() + .uid(new UUID(randomLong(), randomLong()).toString()) + .type(type) + .expiryDate(System.currentTimeMillis() + expires.millis()) + .issuer(randomAlphaOfLengthBetween(5, 60)) + .issuedTo(randomAlphaOfLengthBetween(5, 60)) + .issueDate(System.currentTimeMillis() - TimeUnit.MINUTES.toMillis(randomLongBetween(1, 5000))) + .maxNodes(randomIntBetween(1, 500)) + .signature(null) + .build(); + } } From 269425b54d573322624dd2a274ef07a3efec79e2 Mon Sep 17 00:00:00 2001 From: Dimitris Athanasiou Date: Tue, 10 Dec 2019 10:22:53 +0200 Subject: [PATCH 02/22] [ML] Introduce randomize_seed setting for regression and classification (#49990) This adds a new `randomize_seed` for regression and classification. When not explicitly set, the seed is randomly generated. One can reuse the seed in a similar job in order to ensure the same docs are picked for training. --- .../client/ml/dataframe/Classification.java | 27 +++++- .../client/ml/dataframe/Regression.java | 29 +++++-- .../client/MachineLearningIT.java | 2 + .../MlClientDocumentationIT.java | 4 +- .../ml/dataframe/ClassificationTests.java | 1 + .../ml/put-data-frame-analytics.asciidoc | 4 +- .../apis/dfanalyticsresources.asciidoc | 4 + .../apis/put-dfanalytics.asciidoc | 4 +- docs/reference/ml/ml-shared.asciidoc | 9 ++ .../dataframe/DataFrameAnalyticsConfig.java | 3 +- .../dataframe/analyses/BoostedTreeParams.java | 4 +- .../ml/dataframe/analyses/Classification.java | 41 +++++++-- .../ml/dataframe/analyses/Regression.java | 41 +++++++-- .../DataFrameAnalyticsConfigTests.java | 47 ++++++++++- .../analyses/ClassificationTests.java | 84 +++++++++++++++---- .../dataframe/analyses/RegressionTests.java | 71 ++++++++++++++-- .../ml/integration/ClassificationIT.java | 50 +++++++++-- ...NativeDataFrameAnalyticsIntegTestCase.java | 22 +++++ .../xpack/ml/integration/RegressionIT.java | 41 ++++++++- .../TransportPutDataFrameAnalyticsAction.java | 12 +-- .../CustomProcessorFactory.java | 4 +- .../DatasetSplittingCustomProcessor.java | 6 +- .../DatasetSplittingCustomProcessorTests.java | 10 ++- .../test/ml/data_frame_analytics_crud.yml | 16 ++-- 24 files changed, 460 insertions(+), 76 deletions(-) diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/Classification.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/Classification.java index d4e7bce5ec44..9d384e6d8678 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/Classification.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/Classification.java @@ -49,6 +49,7 @@ public static Builder builder(String dependentVariable) { static final ParseField PREDICTION_FIELD_NAME = new ParseField("prediction_field_name"); static final ParseField TRAINING_PERCENT = new ParseField("training_percent"); static final ParseField NUM_TOP_CLASSES = new ParseField("num_top_classes"); + static final ParseField RANDOMIZE_SEED = new ParseField("randomize_seed"); private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( @@ -63,7 +64,8 @@ public static Builder builder(String dependentVariable) { (Double) a[5], (String) a[6], (Double) a[7], - (Integer) a[8])); + (Integer) a[8], + (Long) a[9])); static { PARSER.declareString(ConstructingObjectParser.constructorArg(), DEPENDENT_VARIABLE); @@ -75,6 +77,7 @@ public static Builder builder(String dependentVariable) { PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), PREDICTION_FIELD_NAME); PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), TRAINING_PERCENT); PARSER.declareInt(ConstructingObjectParser.optionalConstructorArg(), NUM_TOP_CLASSES); + PARSER.declareLong(ConstructingObjectParser.optionalConstructorArg(), RANDOMIZE_SEED); } private final String dependentVariable; @@ -86,10 +89,11 @@ public static Builder builder(String dependentVariable) { private final String predictionFieldName; private final Double trainingPercent; private final Integer numTopClasses; + private final Long randomizeSeed; private Classification(String dependentVariable, @Nullable Double lambda, @Nullable Double gamma, @Nullable Double eta, @Nullable Integer maximumNumberTrees, @Nullable Double featureBagFraction, @Nullable String predictionFieldName, - @Nullable Double trainingPercent, @Nullable Integer numTopClasses) { + @Nullable Double trainingPercent, @Nullable Integer numTopClasses, @Nullable Long randomizeSeed) { this.dependentVariable = Objects.requireNonNull(dependentVariable); this.lambda = lambda; this.gamma = gamma; @@ -99,6 +103,7 @@ private Classification(String dependentVariable, @Nullable Double lambda, @Nulla this.predictionFieldName = predictionFieldName; this.trainingPercent = trainingPercent; this.numTopClasses = numTopClasses; + this.randomizeSeed = randomizeSeed; } @Override @@ -138,6 +143,10 @@ public Double getTrainingPercent() { return trainingPercent; } + public Long getRandomizeSeed() { + return randomizeSeed; + } + public Integer getNumTopClasses() { return numTopClasses; } @@ -167,6 +176,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (trainingPercent != null) { builder.field(TRAINING_PERCENT.getPreferredName(), trainingPercent); } + if (randomizeSeed != null) { + builder.field(RANDOMIZE_SEED.getPreferredName(), randomizeSeed); + } if (numTopClasses != null) { builder.field(NUM_TOP_CLASSES.getPreferredName(), numTopClasses); } @@ -177,7 +189,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws @Override public int hashCode() { return Objects.hash(dependentVariable, lambda, gamma, eta, maximumNumberTrees, featureBagFraction, predictionFieldName, - trainingPercent, numTopClasses); + trainingPercent, randomizeSeed, numTopClasses); } @Override @@ -193,6 +205,7 @@ public boolean equals(Object o) { && Objects.equals(featureBagFraction, that.featureBagFraction) && Objects.equals(predictionFieldName, that.predictionFieldName) && Objects.equals(trainingPercent, that.trainingPercent) + && Objects.equals(randomizeSeed, that.randomizeSeed) && Objects.equals(numTopClasses, that.numTopClasses); } @@ -211,6 +224,7 @@ public static class Builder { private String predictionFieldName; private Double trainingPercent; private Integer numTopClasses; + private Long randomizeSeed; private Builder(String dependentVariable) { this.dependentVariable = Objects.requireNonNull(dependentVariable); @@ -251,6 +265,11 @@ public Builder setTrainingPercent(Double trainingPercent) { return this; } + public Builder setRandomizeSeed(Long randomizeSeed) { + this.randomizeSeed = randomizeSeed; + return this; + } + public Builder setNumTopClasses(Integer numTopClasses) { this.numTopClasses = numTopClasses; return this; @@ -258,7 +277,7 @@ public Builder setNumTopClasses(Integer numTopClasses) { public Classification build() { return new Classification(dependentVariable, lambda, gamma, eta, maximumNumberTrees, featureBagFraction, predictionFieldName, - trainingPercent, numTopClasses); + trainingPercent, numTopClasses, randomizeSeed); } } } diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/Regression.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/Regression.java index 3c1edece6fc1..fa55ee40b27f 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/Regression.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/ml/dataframe/Regression.java @@ -48,6 +48,7 @@ public static Builder builder(String dependentVariable) { static final ParseField FEATURE_BAG_FRACTION = new ParseField("feature_bag_fraction"); static final ParseField PREDICTION_FIELD_NAME = new ParseField("prediction_field_name"); static final ParseField TRAINING_PERCENT = new ParseField("training_percent"); + static final ParseField RANDOMIZE_SEED = new ParseField("randomize_seed"); private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( @@ -61,7 +62,8 @@ public static Builder builder(String dependentVariable) { (Integer) a[4], (Double) a[5], (String) a[6], - (Double) a[7])); + (Double) a[7], + (Long) a[8])); static { PARSER.declareString(ConstructingObjectParser.constructorArg(), DEPENDENT_VARIABLE); @@ -72,6 +74,7 @@ public static Builder builder(String dependentVariable) { PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), FEATURE_BAG_FRACTION); PARSER.declareString(ConstructingObjectParser.optionalConstructorArg(), PREDICTION_FIELD_NAME); PARSER.declareDouble(ConstructingObjectParser.optionalConstructorArg(), TRAINING_PERCENT); + PARSER.declareLong(ConstructingObjectParser.optionalConstructorArg(), RANDOMIZE_SEED); } private final String dependentVariable; @@ -82,10 +85,11 @@ public static Builder builder(String dependentVariable) { private final Double featureBagFraction; private final String predictionFieldName; private final Double trainingPercent; + private final Long randomizeSeed; private Regression(String dependentVariable, @Nullable Double lambda, @Nullable Double gamma, @Nullable Double eta, @Nullable Integer maximumNumberTrees, @Nullable Double featureBagFraction, @Nullable String predictionFieldName, - @Nullable Double trainingPercent) { + @Nullable Double trainingPercent, @Nullable Long randomizeSeed) { this.dependentVariable = Objects.requireNonNull(dependentVariable); this.lambda = lambda; this.gamma = gamma; @@ -94,6 +98,7 @@ private Regression(String dependentVariable, @Nullable Double lambda, @Nullable this.featureBagFraction = featureBagFraction; this.predictionFieldName = predictionFieldName; this.trainingPercent = trainingPercent; + this.randomizeSeed = randomizeSeed; } @Override @@ -133,6 +138,10 @@ public Double getTrainingPercent() { return trainingPercent; } + public Long getRandomizeSeed() { + return randomizeSeed; + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); @@ -158,6 +167,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (trainingPercent != null) { builder.field(TRAINING_PERCENT.getPreferredName(), trainingPercent); } + if (randomizeSeed != null) { + builder.field(RANDOMIZE_SEED.getPreferredName(), randomizeSeed); + } builder.endObject(); return builder; } @@ -165,7 +177,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws @Override public int hashCode() { return Objects.hash(dependentVariable, lambda, gamma, eta, maximumNumberTrees, featureBagFraction, predictionFieldName, - trainingPercent); + trainingPercent, randomizeSeed); } @Override @@ -180,7 +192,8 @@ public boolean equals(Object o) { && Objects.equals(maximumNumberTrees, that.maximumNumberTrees) && Objects.equals(featureBagFraction, that.featureBagFraction) && Objects.equals(predictionFieldName, that.predictionFieldName) - && Objects.equals(trainingPercent, that.trainingPercent); + && Objects.equals(trainingPercent, that.trainingPercent) + && Objects.equals(randomizeSeed, that.randomizeSeed); } @Override @@ -197,6 +210,7 @@ public static class Builder { private Double featureBagFraction; private String predictionFieldName; private Double trainingPercent; + private Long randomizeSeed; private Builder(String dependentVariable) { this.dependentVariable = Objects.requireNonNull(dependentVariable); @@ -237,9 +251,14 @@ public Builder setTrainingPercent(Double trainingPercent) { return this; } + public Builder setRandomizeSeed(Long randomizeSeed) { + this.randomizeSeed = randomizeSeed; + return this; + } + public Regression build() { return new Regression(dependentVariable, lambda, gamma, eta, maximumNumberTrees, featureBagFraction, predictionFieldName, - trainingPercent); + trainingPercent, randomizeSeed); } } } diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java index 6ed3734831aa..29e69c5095cb 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java @@ -1291,6 +1291,7 @@ public void testPutDataFrameAnalyticsConfig_GivenRegression() throws Exception { .setAnalysis(org.elasticsearch.client.ml.dataframe.Regression.builder("my_dependent_variable") .setPredictionFieldName("my_dependent_variable_prediction") .setTrainingPercent(80.0) + .setRandomizeSeed(42L) .build()) .setDescription("this is a regression") .build(); @@ -1326,6 +1327,7 @@ public void testPutDataFrameAnalyticsConfig_GivenClassification() throws Excepti .setAnalysis(org.elasticsearch.client.ml.dataframe.Classification.builder("my_dependent_variable") .setPredictionFieldName("my_dependent_variable_prediction") .setTrainingPercent(80.0) + .setRandomizeSeed(42L) .setNumTopClasses(1) .build()) .setDescription("this is a classification") diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java index 1d9a151cf8ae..13185e221633 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java @@ -2975,7 +2975,8 @@ public void testPutDataFrameAnalytics() throws Exception { .setFeatureBagFraction(0.4) // <6> .setPredictionFieldName("my_prediction_field_name") // <7> .setTrainingPercent(50.0) // <8> - .setNumTopClasses(1) // <9> + .setRandomizeSeed(1234L) // <9> + .setNumTopClasses(1) // <10> .build(); // end::put-data-frame-analytics-classification @@ -2988,6 +2989,7 @@ public void testPutDataFrameAnalytics() throws Exception { .setFeatureBagFraction(0.4) // <6> .setPredictionFieldName("my_prediction_field_name") // <7> .setTrainingPercent(50.0) // <8> + .setRandomizeSeed(1234L) // <9> .build(); // end::put-data-frame-analytics-regression diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/ClassificationTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/ClassificationTests.java index 98f060cc8534..5ef8fdaef5a2 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/ClassificationTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/ml/dataframe/ClassificationTests.java @@ -34,6 +34,7 @@ public static Classification randomClassification() { .setFeatureBagFraction(randomBoolean() ? null : randomDoubleBetween(0.0, 1.0, false)) .setPredictionFieldName(randomBoolean() ? null : randomAlphaOfLength(10)) .setTrainingPercent(randomBoolean() ? null : randomDoubleBetween(1.0, 100.0, true)) + .setRandomizeSeed(randomBoolean() ? null : randomLong()) .setNumTopClasses(randomBoolean() ? null : randomIntBetween(0, 10)) .build(); } diff --git a/docs/java-rest/high-level/ml/put-data-frame-analytics.asciidoc b/docs/java-rest/high-level/ml/put-data-frame-analytics.asciidoc index 91a97ad604ce..2152eff5c085 100644 --- a/docs/java-rest/high-level/ml/put-data-frame-analytics.asciidoc +++ b/docs/java-rest/high-level/ml/put-data-frame-analytics.asciidoc @@ -119,7 +119,8 @@ include-tagged::{doc-tests-file}[{api}-classification] <6> The fraction of features which will be used when selecting a random bag for each candidate split. A double in (0, 1]. <7> The name of the prediction field in the results object. <8> The percentage of training-eligible rows to be used in training. Defaults to 100%. -<9> The number of top classes to be reported in the results. Defaults to 2. +<9> The seed to be used by the random generator that picks which rows are used in training. +<10> The number of top classes to be reported in the results. Defaults to 2. ===== Regression @@ -138,6 +139,7 @@ include-tagged::{doc-tests-file}[{api}-regression] <6> The fraction of features which will be used when selecting a random bag for each candidate split. A double in (0, 1]. <7> The name of the prediction field in the results object. <8> The percentage of training-eligible rows to be used in training. Defaults to 100%. +<9> The seed to be used by the random generator that picks which rows are used in training. ==== Analyzed fields diff --git a/docs/reference/ml/df-analytics/apis/dfanalyticsresources.asciidoc b/docs/reference/ml/df-analytics/apis/dfanalyticsresources.asciidoc index e8ee463c66af..111953b8321a 100644 --- a/docs/reference/ml/df-analytics/apis/dfanalyticsresources.asciidoc +++ b/docs/reference/ml/df-analytics/apis/dfanalyticsresources.asciidoc @@ -204,6 +204,8 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=prediction_field_name] include::{docdir}/ml/ml-shared.asciidoc[tag=training_percent] +include::{docdir}/ml/ml-shared.asciidoc[tag=randomize_seed] + [float] [[regression-resources-advanced]] @@ -252,6 +254,8 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=prediction_field_name] include::{docdir}/ml/ml-shared.asciidoc[tag=training_percent] +include::{docdir}/ml/ml-shared.asciidoc[tag=randomize_seed] + [float] [[classification-resources-advanced]] diff --git a/docs/reference/ml/df-analytics/apis/put-dfanalytics.asciidoc b/docs/reference/ml/df-analytics/apis/put-dfanalytics.asciidoc index 5b0987e41c4b..123eb6633e37 100644 --- a/docs/reference/ml/df-analytics/apis/put-dfanalytics.asciidoc +++ b/docs/reference/ml/df-analytics/apis/put-dfanalytics.asciidoc @@ -397,7 +397,8 @@ PUT _ml/data_frame/analytics/student_performance_mathematics_0.3 { "regression": { "dependent_variable": "G3", - "training_percent": 70 <1> + "training_percent": 70, <1> + "randomize_seed": 19673948271 <2> } } } @@ -406,6 +407,7 @@ PUT _ml/data_frame/analytics/student_performance_mathematics_0.3 <1> The `training_percent` defines the percentage of the data set that will be used for training the model. +<2> The `randomize_seed` is the seed used to randomly pick which data is used for training. [[ml-put-dfanalytics-example-c]] diff --git a/docs/reference/ml/ml-shared.asciidoc b/docs/reference/ml/ml-shared.asciidoc index 11e062796afa..bea970078d06 100644 --- a/docs/reference/ml/ml-shared.asciidoc +++ b/docs/reference/ml/ml-shared.asciidoc @@ -681,6 +681,15 @@ those that contain arrays) won’t be included in the calculation for used percentage. Defaults to `100`. end::training_percent[] +tag::randomize_seed[] +`randomize_seed`:: +(Optional, long) Defines the seed to the random generator that is used to pick +which documents will be used for training. By default it is randomly generated. +Set it to a specific value to ensure the same documents are used for training +assuming other related parameters (e.g. `source`, `analyzed_fields`, etc.) are the same. +end::randomize_seed[] + + tag::use-null[] Defines whether a new series is used as the null series when there is no value for the by or partition fields. The default value is `false`. diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfig.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfig.java index 9fd7f8aa86fc..1142b5411fb0 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfig.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfig.java @@ -225,7 +225,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(DEST.getPreferredName(), dest); builder.startObject(ANALYSIS.getPreferredName()); - builder.field(analysis.getWriteableName(), analysis); + builder.field(analysis.getWriteableName(), analysis, + new MapParams(Collections.singletonMap(VERSION.getPreferredName(), version == null ? null : version.toString()))); builder.endObject(); if (params.paramAsBoolean(ToXContentParams.FOR_INTERNAL_STORAGE, false)) { diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/BoostedTreeParams.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/BoostedTreeParams.java index ed3cff7d73c0..0f06b08444f5 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/BoostedTreeParams.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/BoostedTreeParams.java @@ -49,7 +49,7 @@ static void declareFields(AbstractObjectParser parser) { private final Integer maximumNumberTrees; private final Double featureBagFraction; - BoostedTreeParams(@Nullable Double lambda, + public BoostedTreeParams(@Nullable Double lambda, @Nullable Double gamma, @Nullable Double eta, @Nullable Integer maximumNumberTrees, @@ -76,7 +76,7 @@ static void declareFields(AbstractObjectParser parser) { this.featureBagFraction = featureBagFraction; } - BoostedTreeParams() { + public BoostedTreeParams() { this(null, null, null, null, null); } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Classification.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Classification.java index b4b258ea161f..cd96b815fc11 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Classification.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Classification.java @@ -5,8 +5,10 @@ */ package org.elasticsearch.xpack.core.ml.dataframe.analyses; +import org.elasticsearch.Version; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.Randomness; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.ConstructingObjectParser; @@ -35,6 +37,7 @@ public class Classification implements DataFrameAnalysis { public static final ParseField PREDICTION_FIELD_NAME = new ParseField("prediction_field_name"); public static final ParseField NUM_TOP_CLASSES = new ParseField("num_top_classes"); public static final ParseField TRAINING_PERCENT = new ParseField("training_percent"); + public static final ParseField RANDOMIZE_SEED = new ParseField("randomize_seed"); private static final ConstructingObjectParser LENIENT_PARSER = createParser(true); private static final ConstructingObjectParser STRICT_PARSER = createParser(false); @@ -48,12 +51,14 @@ private static ConstructingObjectParser createParser(boole new BoostedTreeParams((Double) a[1], (Double) a[2], (Double) a[3], (Integer) a[4], (Double) a[5]), (String) a[6], (Integer) a[7], - (Double) a[8])); + (Double) a[8], + (Long) a[9])); parser.declareString(constructorArg(), DEPENDENT_VARIABLE); BoostedTreeParams.declareFields(parser); parser.declareString(optionalConstructorArg(), PREDICTION_FIELD_NAME); parser.declareInt(optionalConstructorArg(), NUM_TOP_CLASSES); parser.declareDouble(optionalConstructorArg(), TRAINING_PERCENT); + parser.declareLong(optionalConstructorArg(), RANDOMIZE_SEED); return parser; } @@ -82,12 +87,14 @@ public static Classification fromXContent(XContentParser parser, boolean ignoreU private final String predictionFieldName; private final int numTopClasses; private final double trainingPercent; + private final long randomizeSeed; public Classification(String dependentVariable, BoostedTreeParams boostedTreeParams, @Nullable String predictionFieldName, @Nullable Integer numTopClasses, - @Nullable Double trainingPercent) { + @Nullable Double trainingPercent, + @Nullable Long randomizeSeed) { if (numTopClasses != null && (numTopClasses < 0 || numTopClasses > 1000)) { throw ExceptionsHelper.badRequestException("[{}] must be an integer in [0, 1000]", NUM_TOP_CLASSES.getPreferredName()); } @@ -99,10 +106,11 @@ public Classification(String dependentVariable, this.predictionFieldName = predictionFieldName == null ? dependentVariable + "_prediction" : predictionFieldName; this.numTopClasses = numTopClasses == null ? DEFAULT_NUM_TOP_CLASSES : numTopClasses; this.trainingPercent = trainingPercent == null ? 100.0 : trainingPercent; + this.randomizeSeed = randomizeSeed == null ? Randomness.get().nextLong() : randomizeSeed; } public Classification(String dependentVariable) { - this(dependentVariable, new BoostedTreeParams(), null, null, null); + this(dependentVariable, new BoostedTreeParams(), null, null, null, null); } public Classification(StreamInput in) throws IOException { @@ -111,12 +119,21 @@ public Classification(StreamInput in) throws IOException { predictionFieldName = in.readOptionalString(); numTopClasses = in.readOptionalVInt(); trainingPercent = in.readDouble(); + if (in.getVersion().onOrAfter(Version.CURRENT)) { + randomizeSeed = in.readOptionalLong(); + } else { + randomizeSeed = Randomness.get().nextLong(); + } } public String getDependentVariable() { return dependentVariable; } + public BoostedTreeParams getBoostedTreeParams() { + return boostedTreeParams; + } + public String getPredictionFieldName() { return predictionFieldName; } @@ -129,6 +146,11 @@ public double getTrainingPercent() { return trainingPercent; } + @Nullable + public Long getRandomizeSeed() { + return randomizeSeed; + } + @Override public String getWriteableName() { return NAME.getPreferredName(); @@ -141,10 +163,15 @@ public void writeTo(StreamOutput out) throws IOException { out.writeOptionalString(predictionFieldName); out.writeOptionalVInt(numTopClasses); out.writeDouble(trainingPercent); + if (out.getVersion().onOrAfter(Version.CURRENT)) { + out.writeOptionalLong(randomizeSeed); + } } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + Version version = Version.fromString(params.param("version", Version.CURRENT.toString())); + builder.startObject(); builder.field(DEPENDENT_VARIABLE.getPreferredName(), dependentVariable); boostedTreeParams.toXContent(builder, params); @@ -153,6 +180,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(PREDICTION_FIELD_NAME.getPreferredName(), predictionFieldName); } builder.field(TRAINING_PERCENT.getPreferredName(), trainingPercent); + if (version.onOrAfter(Version.CURRENT)) { + builder.field(RANDOMIZE_SEED.getPreferredName(), randomizeSeed); + } builder.endObject(); return builder; } @@ -238,11 +268,12 @@ public boolean equals(Object o) { && Objects.equals(boostedTreeParams, that.boostedTreeParams) && Objects.equals(predictionFieldName, that.predictionFieldName) && Objects.equals(numTopClasses, that.numTopClasses) - && trainingPercent == that.trainingPercent; + && trainingPercent == that.trainingPercent + && randomizeSeed == that.randomizeSeed; } @Override public int hashCode() { - return Objects.hash(dependentVariable, boostedTreeParams, predictionFieldName, numTopClasses, trainingPercent); + return Objects.hash(dependentVariable, boostedTreeParams, predictionFieldName, numTopClasses, trainingPercent, randomizeSeed); } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Regression.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Regression.java index 01388f01d807..dd8f6a91272c 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Regression.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Regression.java @@ -5,8 +5,10 @@ */ package org.elasticsearch.xpack.core.ml.dataframe.analyses; +import org.elasticsearch.Version; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.Randomness; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.ConstructingObjectParser; @@ -32,6 +34,7 @@ public class Regression implements DataFrameAnalysis { public static final ParseField DEPENDENT_VARIABLE = new ParseField("dependent_variable"); public static final ParseField PREDICTION_FIELD_NAME = new ParseField("prediction_field_name"); public static final ParseField TRAINING_PERCENT = new ParseField("training_percent"); + public static final ParseField RANDOMIZE_SEED = new ParseField("randomize_seed"); private static final ConstructingObjectParser LENIENT_PARSER = createParser(true); private static final ConstructingObjectParser STRICT_PARSER = createParser(false); @@ -44,11 +47,13 @@ private static ConstructingObjectParser createParser(boolean l (String) a[0], new BoostedTreeParams((Double) a[1], (Double) a[2], (Double) a[3], (Integer) a[4], (Double) a[5]), (String) a[6], - (Double) a[7])); + (Double) a[7], + (Long) a[8])); parser.declareString(constructorArg(), DEPENDENT_VARIABLE); BoostedTreeParams.declareFields(parser); parser.declareString(optionalConstructorArg(), PREDICTION_FIELD_NAME); parser.declareDouble(optionalConstructorArg(), TRAINING_PERCENT); + parser.declareLong(optionalConstructorArg(), RANDOMIZE_SEED); return parser; } @@ -60,11 +65,13 @@ public static Regression fromXContent(XContentParser parser, boolean ignoreUnkno private final BoostedTreeParams boostedTreeParams; private final String predictionFieldName; private final double trainingPercent; + private final long randomizeSeed; public Regression(String dependentVariable, BoostedTreeParams boostedTreeParams, @Nullable String predictionFieldName, - @Nullable Double trainingPercent) { + @Nullable Double trainingPercent, + @Nullable Long randomizeSeed) { if (trainingPercent != null && (trainingPercent < 1.0 || trainingPercent > 100.0)) { throw ExceptionsHelper.badRequestException("[{}] must be a double in [1, 100]", TRAINING_PERCENT.getPreferredName()); } @@ -72,10 +79,11 @@ public Regression(String dependentVariable, this.boostedTreeParams = ExceptionsHelper.requireNonNull(boostedTreeParams, BoostedTreeParams.NAME); this.predictionFieldName = predictionFieldName == null ? dependentVariable + "_prediction" : predictionFieldName; this.trainingPercent = trainingPercent == null ? 100.0 : trainingPercent; + this.randomizeSeed = randomizeSeed == null ? Randomness.get().nextLong() : randomizeSeed; } public Regression(String dependentVariable) { - this(dependentVariable, new BoostedTreeParams(), null, null); + this(dependentVariable, new BoostedTreeParams(), null, null, null); } public Regression(StreamInput in) throws IOException { @@ -83,12 +91,21 @@ public Regression(StreamInput in) throws IOException { boostedTreeParams = new BoostedTreeParams(in); predictionFieldName = in.readOptionalString(); trainingPercent = in.readDouble(); + if (in.getVersion().onOrAfter(Version.CURRENT)) { + randomizeSeed = in.readOptionalLong(); + } else { + randomizeSeed = Randomness.get().nextLong(); + } } public String getDependentVariable() { return dependentVariable; } + public BoostedTreeParams getBoostedTreeParams() { + return boostedTreeParams; + } + public String getPredictionFieldName() { return predictionFieldName; } @@ -97,6 +114,11 @@ public double getTrainingPercent() { return trainingPercent; } + @Nullable + public Long getRandomizeSeed() { + return randomizeSeed; + } + @Override public String getWriteableName() { return NAME.getPreferredName(); @@ -108,10 +130,15 @@ public void writeTo(StreamOutput out) throws IOException { boostedTreeParams.writeTo(out); out.writeOptionalString(predictionFieldName); out.writeDouble(trainingPercent); + if (out.getVersion().onOrAfter(Version.CURRENT)) { + out.writeOptionalLong(randomizeSeed); + } } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + Version version = Version.fromString(params.param("version", Version.CURRENT.toString())); + builder.startObject(); builder.field(DEPENDENT_VARIABLE.getPreferredName(), dependentVariable); boostedTreeParams.toXContent(builder, params); @@ -119,6 +146,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(PREDICTION_FIELD_NAME.getPreferredName(), predictionFieldName); } builder.field(TRAINING_PERCENT.getPreferredName(), trainingPercent); + if (version.onOrAfter(Version.CURRENT)) { + builder.field(RANDOMIZE_SEED.getPreferredName(), randomizeSeed); + } builder.endObject(); return builder; } @@ -177,11 +207,12 @@ public boolean equals(Object o) { return Objects.equals(dependentVariable, that.dependentVariable) && Objects.equals(boostedTreeParams, that.boostedTreeParams) && Objects.equals(predictionFieldName, that.predictionFieldName) - && trainingPercent == that.trainingPercent; + && trainingPercent == that.trainingPercent + && randomizeSeed == randomizeSeed; } @Override public int hashCode() { - return Objects.hash(dependentVariable, boostedTreeParams, predictionFieldName, trainingPercent); + return Objects.hash(dependentVariable, boostedTreeParams, predictionFieldName, trainingPercent, randomizeSeed); } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java index d6b2c077388e..880bea888465 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/DataFrameAnalyticsConfigTests.java @@ -9,6 +9,7 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.Version; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.Writeable; @@ -20,17 +21,20 @@ import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.ObjectParser; import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.XContentParseException; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.index.query.MatchAllQueryBuilder; import org.elasticsearch.search.SearchModule; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.test.AbstractSerializingTestCase; import org.elasticsearch.xpack.core.ml.dataframe.analyses.MlDataFrameAnalysisNamedXContentProvider; import org.elasticsearch.xpack.core.ml.dataframe.analyses.OutlierDetectionTests; +import org.elasticsearch.xpack.core.ml.dataframe.analyses.Regression; import org.elasticsearch.xpack.core.ml.utils.ToXContentParams; import org.junit.Before; @@ -42,10 +46,13 @@ import java.util.List; import java.util.Map; -import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasEntry; import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.startsWith; public class DataFrameAnalyticsConfigTests extends AbstractSerializingTestCase { @@ -339,6 +346,44 @@ public void testPreventVersionInjection() throws IOException { } } + public void testToXContent_GivenAnalysisWithRandomizeSeedAndVersionIsCurrent() throws IOException { + Regression regression = new Regression("foo"); + assertThat(regression.getRandomizeSeed(), is(notNullValue())); + + DataFrameAnalyticsConfig config = new DataFrameAnalyticsConfig.Builder() + .setVersion(Version.CURRENT) + .setId("test_config") + .setSource(new DataFrameAnalyticsSource(new String[] {"source_index"}, null, null)) + .setDest(new DataFrameAnalyticsDest("dest_index", null)) + .setAnalysis(regression) + .build(); + + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + config.toXContent(builder, ToXContent.EMPTY_PARAMS); + String json = Strings.toString(builder); + assertThat(json, containsString("randomize_seed")); + } + } + + public void testToXContent_GivenAnalysisWithRandomizeSeedAndVersionIsBeforeItWasIntroduced() throws IOException { + Regression regression = new Regression("foo"); + assertThat(regression.getRandomizeSeed(), is(notNullValue())); + + DataFrameAnalyticsConfig config = new DataFrameAnalyticsConfig.Builder() + .setVersion(Version.V_7_5_0) + .setId("test_config") + .setSource(new DataFrameAnalyticsSource(new String[] {"source_index"}, null, null)) + .setDest(new DataFrameAnalyticsDest("dest_index", null)) + .setAnalysis(regression) + .build(); + + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + config.toXContent(builder, ToXContent.EMPTY_PARAMS); + String json = Strings.toString(builder); + assertThat(json, not(containsString("randomize_seed"))); + } + } + private static void assertTooSmall(ElasticsearchStatusException e) { assertThat(e.getMessage(), startsWith("model_memory_limit must be at least 1kb.")); } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/ClassificationTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/ClassificationTests.java index 61d6b4dfe3f7..8308ef8dad28 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/ClassificationTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/ClassificationTests.java @@ -6,20 +6,28 @@ package org.elasticsearch.xpack.core.ml.dataframe.analyses; import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.Version; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.index.mapper.BooleanFieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.NumberFieldMapper; import org.elasticsearch.test.AbstractSerializingTestCase; import java.io.IOException; +import java.util.Collections; import java.util.Map; import java.util.Set; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.nullValue; public class ClassificationTests extends AbstractSerializingTestCase { @@ -42,7 +50,9 @@ public static Classification createRandom() { String predictionFieldName = randomBoolean() ? null : randomAlphaOfLength(10); Integer numTopClasses = randomBoolean() ? null : randomIntBetween(0, 1000); Double trainingPercent = randomBoolean() ? null : randomDoubleBetween(1.0, 100.0, true); - return new Classification(dependentVariableName, boostedTreeParams, predictionFieldName, numTopClasses, trainingPercent); + Long randomizeSeed = randomBoolean() ? null : randomLong(); + return new Classification(dependentVariableName, boostedTreeParams, predictionFieldName, numTopClasses, trainingPercent, + randomizeSeed); } @Override @@ -52,71 +62,71 @@ protected Writeable.Reader instanceReader() { public void testConstructor_GivenTrainingPercentIsLessThanOne() { ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, - () -> new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 0.999)); + () -> new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 0.999, randomLong())); assertThat(e.getMessage(), equalTo("[training_percent] must be a double in [1, 100]")); } public void testConstructor_GivenTrainingPercentIsGreaterThan100() { ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, - () -> new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 100.0001)); + () -> new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 100.0001, randomLong())); assertThat(e.getMessage(), equalTo("[training_percent] must be a double in [1, 100]")); } public void testConstructor_GivenNumTopClassesIsLessThanZero() { ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, - () -> new Classification("foo", BOOSTED_TREE_PARAMS, "result", -1, 1.0)); + () -> new Classification("foo", BOOSTED_TREE_PARAMS, "result", -1, 1.0, randomLong())); assertThat(e.getMessage(), equalTo("[num_top_classes] must be an integer in [0, 1000]")); } public void testConstructor_GivenNumTopClassesIsGreaterThan1000() { ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, - () -> new Classification("foo", BOOSTED_TREE_PARAMS, "result", 1001, 1.0)); + () -> new Classification("foo", BOOSTED_TREE_PARAMS, "result", 1001, 1.0, randomLong())); assertThat(e.getMessage(), equalTo("[num_top_classes] must be an integer in [0, 1000]")); } public void testGetPredictionFieldName() { - Classification classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 50.0); + Classification classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 50.0, randomLong()); assertThat(classification.getPredictionFieldName(), equalTo("result")); - classification = new Classification("foo", BOOSTED_TREE_PARAMS, null, 3, 50.0); + classification = new Classification("foo", BOOSTED_TREE_PARAMS, null, 3, 50.0, randomLong()); assertThat(classification.getPredictionFieldName(), equalTo("foo_prediction")); } public void testGetNumTopClasses() { - Classification classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 7, 1.0); + Classification classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 7, 1.0, randomLong()); assertThat(classification.getNumTopClasses(), equalTo(7)); // Boundary condition: num_top_classes == 0 - classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 0, 1.0); + classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 0, 1.0, randomLong()); assertThat(classification.getNumTopClasses(), equalTo(0)); // Boundary condition: num_top_classes == 1000 - classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 1000, 1.0); + classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 1000, 1.0, randomLong()); assertThat(classification.getNumTopClasses(), equalTo(1000)); // num_top_classes == null, default applied - classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", null, 1.0); + classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", null, 1.0, randomLong()); assertThat(classification.getNumTopClasses(), equalTo(2)); } public void testGetTrainingPercent() { - Classification classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 50.0); + Classification classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 50.0, randomLong()); assertThat(classification.getTrainingPercent(), equalTo(50.0)); // Boundary condition: training_percent == 1.0 - classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 1.0); + classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 1.0, randomLong()); assertThat(classification.getTrainingPercent(), equalTo(1.0)); // Boundary condition: training_percent == 100.0 - classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 100.0); + classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, 100.0, randomLong()); assertThat(classification.getTrainingPercent(), equalTo(100.0)); // training_percent == null, default applied - classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, null); + classification = new Classification("foo", BOOSTED_TREE_PARAMS, "result", 3, null, randomLong()); assertThat(classification.getTrainingPercent(), equalTo(100.0)); } @@ -155,4 +165,48 @@ public void testGetParams() { public void testFieldCardinalityLimitsIsNonNull() { assertThat(createTestInstance().getFieldCardinalityLimits(), is(not(nullValue()))); } + + public void testToXContent_GivenVersionBeforeRandomizeSeedWasIntroduced() throws IOException { + Classification classification = createRandom(); + assertThat(classification.getRandomizeSeed(), is(notNullValue())); + + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + classification.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("version", "7.5.0"))); + String json = Strings.toString(builder); + assertThat(json, not(containsString("randomize_seed"))); + } + } + + public void testToXContent_GivenVersionAfterRandomizeSeedWasIntroduced() throws IOException { + Classification classification = createRandom(); + assertThat(classification.getRandomizeSeed(), is(notNullValue())); + + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + classification.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("version", Version.CURRENT.toString()))); + String json = Strings.toString(builder); + assertThat(json, containsString("randomize_seed")); + } + } + + public void testToXContent_GivenVersionIsNull() throws IOException { + Classification classification = createRandom(); + assertThat(classification.getRandomizeSeed(), is(notNullValue())); + + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + classification.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("version", null))); + String json = Strings.toString(builder); + assertThat(json, containsString("randomize_seed")); + } + } + + public void testToXContent_GivenEmptyParams() throws IOException { + Classification classification = createRandom(); + assertThat(classification.getRandomizeSeed(), is(notNullValue())); + + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + classification.toXContent(builder, ToXContent.EMPTY_PARAMS); + String json = Strings.toString(builder); + assertThat(json, containsString("randomize_seed")); + } + } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/RegressionTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/RegressionTests.java index f3d5312280e8..58e19f6ef6a2 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/RegressionTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/RegressionTests.java @@ -6,16 +6,24 @@ package org.elasticsearch.xpack.core.ml.dataframe.analyses; import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.Version; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.test.AbstractSerializingTestCase; import java.io.IOException; +import java.util.Collections; import java.util.Map; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.nullValue; public class RegressionTests extends AbstractSerializingTestCase { @@ -37,7 +45,8 @@ public static Regression createRandom() { BoostedTreeParams boostedTreeParams = BoostedTreeParamsTests.createRandom(); String predictionFieldName = randomBoolean() ? null : randomAlphaOfLength(10); Double trainingPercent = randomBoolean() ? null : randomDoubleBetween(1.0, 100.0, true); - return new Regression(dependentVariableName, boostedTreeParams, predictionFieldName, trainingPercent); + Long randomizeSeed = randomBoolean() ? null : randomLong(); + return new Regression(dependentVariableName, boostedTreeParams, predictionFieldName, trainingPercent, randomizeSeed); } @Override @@ -47,40 +56,40 @@ protected Writeable.Reader instanceReader() { public void testConstructor_GivenTrainingPercentIsLessThanOne() { ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, - () -> new Regression("foo", BOOSTED_TREE_PARAMS, "result", 0.999)); + () -> new Regression("foo", BOOSTED_TREE_PARAMS, "result", 0.999, randomLong())); assertThat(e.getMessage(), equalTo("[training_percent] must be a double in [1, 100]")); } public void testConstructor_GivenTrainingPercentIsGreaterThan100() { ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, - () -> new Regression("foo", BOOSTED_TREE_PARAMS, "result", 100.0001)); + () -> new Regression("foo", BOOSTED_TREE_PARAMS, "result", 100.0001, randomLong())); assertThat(e.getMessage(), equalTo("[training_percent] must be a double in [1, 100]")); } public void testGetPredictionFieldName() { - Regression regression = new Regression("foo", BOOSTED_TREE_PARAMS, "result", 50.0); + Regression regression = new Regression("foo", BOOSTED_TREE_PARAMS, "result", 50.0, randomLong()); assertThat(regression.getPredictionFieldName(), equalTo("result")); - regression = new Regression("foo", BOOSTED_TREE_PARAMS, null, 50.0); + regression = new Regression("foo", BOOSTED_TREE_PARAMS, null, 50.0, randomLong()); assertThat(regression.getPredictionFieldName(), equalTo("foo_prediction")); } public void testGetTrainingPercent() { - Regression regression = new Regression("foo", BOOSTED_TREE_PARAMS, "result", 50.0); + Regression regression = new Regression("foo", BOOSTED_TREE_PARAMS, "result", 50.0, randomLong()); assertThat(regression.getTrainingPercent(), equalTo(50.0)); // Boundary condition: training_percent == 1.0 - regression = new Regression("foo", BOOSTED_TREE_PARAMS, "result", 1.0); + regression = new Regression("foo", BOOSTED_TREE_PARAMS, "result", 1.0, randomLong()); assertThat(regression.getTrainingPercent(), equalTo(1.0)); // Boundary condition: training_percent == 100.0 - regression = new Regression("foo", BOOSTED_TREE_PARAMS, "result", 100.0); + regression = new Regression("foo", BOOSTED_TREE_PARAMS, "result", 100.0, randomLong()); assertThat(regression.getTrainingPercent(), equalTo(100.0)); // training_percent == null, default applied - regression = new Regression("foo", BOOSTED_TREE_PARAMS, "result", null); + regression = new Regression("foo", BOOSTED_TREE_PARAMS, "result", null, randomLong()); assertThat(regression.getTrainingPercent(), equalTo(100.0)); } @@ -100,4 +109,48 @@ public void testGetStateDocId() { String randomId = randomAlphaOfLength(10); assertThat(regression.getStateDocId(randomId), equalTo(randomId + "_regression_state#1")); } + + public void testToXContent_GivenVersionBeforeRandomizeSeedWasIntroduced() throws IOException { + Regression regression = createRandom(); + assertThat(regression.getRandomizeSeed(), is(notNullValue())); + + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + regression.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("version", "7.5.0"))); + String json = Strings.toString(builder); + assertThat(json, not(containsString("randomize_seed"))); + } + } + + public void testToXContent_GivenVersionAfterRandomizeSeedWasIntroduced() throws IOException { + Regression regression = createRandom(); + assertThat(regression.getRandomizeSeed(), is(notNullValue())); + + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + regression.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("version", Version.CURRENT.toString()))); + String json = Strings.toString(builder); + assertThat(json, containsString("randomize_seed")); + } + } + + public void testToXContent_GivenVersionIsNull() throws IOException { + Regression regression = createRandom(); + assertThat(regression.getRandomizeSeed(), is(notNullValue())); + + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + regression.toXContent(builder, new ToXContent.MapParams(Collections.singletonMap("version", null))); + String json = Strings.toString(builder); + assertThat(json, containsString("randomize_seed")); + } + } + + public void testToXContent_GivenEmptyParams() throws IOException { + Regression regression = createRandom(); + assertThat(regression.getRandomizeSeed(), is(notNullValue())); + + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + regression.toXContent(builder, ToXContent.EMPTY_PARAMS); + String json = Strings.toString(builder); + assertThat(json, containsString("randomize_seed")); + } + } } diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/ClassificationIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/ClassificationIT.java index f5db9ae690a9..e7c0ccd0e055 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/ClassificationIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/ClassificationIT.java @@ -20,6 +20,7 @@ import org.elasticsearch.search.SearchHit; import org.elasticsearch.xpack.core.ml.action.EvaluateDataFrameAction; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; +import org.elasticsearch.xpack.core.ml.dataframe.analyses.BoostedTreeParams; import org.elasticsearch.xpack.core.ml.dataframe.analyses.BoostedTreeParamsTests; import org.elasticsearch.xpack.core.ml.dataframe.analyses.Classification; import org.elasticsearch.xpack.core.ml.dataframe.evaluation.classification.Accuracy; @@ -31,6 +32,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; import static java.util.stream.Collectors.toList; import static org.hamcrest.Matchers.allOf; @@ -158,7 +160,7 @@ public void testWithOnlyTrainingRowsAndTrainingPercentIsFifty( sourceIndex, destIndex, null, - new Classification(dependentVariable, BoostedTreeParamsTests.createRandom(), null, numTopClasses, 50.0)); + new Classification(dependentVariable, BoostedTreeParamsTests.createRandom(), null, numTopClasses, 50.0, null)); registerAnalytics(config); putAnalytics(config); @@ -269,6 +271,44 @@ public void testDependentVariableCardinalityTooHighButWithQueryMakesItWithinRang assertProgress(jobId, 100, 100, 100, 100); } + public void testTwoJobsWithSameRandomizeSeedUseSameTrainingSet() throws Exception { + String sourceIndex = "classification_two_jobs_with_same_randomize_seed_source"; + String dependentVariable = KEYWORD_FIELD; + indexData(sourceIndex, 10, 0, dependentVariable); + + String firstJobId = "classification_two_jobs_with_same_randomize_seed_1"; + String firstJobDestIndex = firstJobId + "_dest"; + + BoostedTreeParams boostedTreeParams = new BoostedTreeParams(1.0, 1.0, 1.0, 1, 1.0); + + DataFrameAnalyticsConfig firstJob = buildAnalytics(firstJobId, sourceIndex, firstJobDestIndex, null, + new Classification(dependentVariable, boostedTreeParams, null, 1, 50.0, null)); + registerAnalytics(firstJob); + putAnalytics(firstJob); + + String secondJobId = "classification_two_jobs_with_same_randomize_seed_2"; + String secondJobDestIndex = secondJobId + "_dest"; + + long randomizeSeed = ((Classification) firstJob.getAnalysis()).getRandomizeSeed(); + DataFrameAnalyticsConfig secondJob = buildAnalytics(secondJobId, sourceIndex, secondJobDestIndex, null, + new Classification(dependentVariable, boostedTreeParams, null, 1, 50.0, randomizeSeed)); + + registerAnalytics(secondJob); + putAnalytics(secondJob); + + // Let's run both jobs in parallel and wait until they are finished + startAnalytics(firstJobId); + startAnalytics(secondJobId); + waitUntilAnalyticsIsStopped(firstJobId); + waitUntilAnalyticsIsStopped(secondJobId); + + // Now we compare they both used the same training rows + Set firstRunTrainingRowsIds = getTrainingRowsIds(firstJobDestIndex); + Set secondRunTrainingRowsIds = getTrainingRowsIds(secondJobDestIndex); + + assertThat(secondRunTrainingRowsIds, equalTo(firstRunTrainingRowsIds)); + } + private void initialize(String jobId) { this.jobId = jobId; this.sourceIndex = jobId + "_source_index"; @@ -340,10 +380,10 @@ private static Map getMlResultsObjectFromDestDoc(Map void assertTopClasses( - Map resultsObject, - int numTopClasses, - String dependentVariable, - List dependentVariableValues) { + Map resultsObject, + int numTopClasses, + String dependentVariable, + List dependentVariableValues) { assertThat(resultsObject.containsKey("top_classes"), is(true)); List> topClasses = (List>) resultsObject.get("top_classes"); assertThat(topClasses, hasSize(numTopClasses)); diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/MlNativeDataFrameAnalyticsIntegTestCase.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/MlNativeDataFrameAnalyticsIntegTestCase.java index 29ef54d3f752..99223247d730 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/MlNativeDataFrameAnalyticsIntegTestCase.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/MlNativeDataFrameAnalyticsIntegTestCase.java @@ -18,6 +18,7 @@ import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.xpack.core.ml.action.DeleteDataFrameAnalyticsAction; import org.elasticsearch.xpack.core.ml.action.EvaluateDataFrameAction; @@ -45,7 +46,10 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; @@ -252,4 +256,22 @@ private static List fetchAllAuditMessages(String dataFrameAnalyticsId) { .map(hit -> (String) hit.getSourceAsMap().get("message")) .collect(Collectors.toList()); } + + protected static Set getTrainingRowsIds(String index) { + Set trainingRowsIds = new HashSet<>(); + SearchResponse hits = client().prepareSearch(index).get(); + for (SearchHit hit : hits.getHits()) { + Map sourceAsMap = hit.getSourceAsMap(); + assertThat(sourceAsMap.containsKey("ml"), is(true)); + @SuppressWarnings("unchecked") + Map resultsObject = (Map) sourceAsMap.get("ml"); + + assertThat(resultsObject.containsKey("is_training"), is(true)); + if (Boolean.TRUE.equals(resultsObject.get("is_training"))) { + trainingRowsIds.add(hit.getId()); + } + } + assertThat(trainingRowsIds.isEmpty(), is(false)); + return trainingRowsIds; + } } diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java index 71ea840c53ea..84d408daacc6 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/RegressionIT.java @@ -16,6 +16,7 @@ import org.elasticsearch.search.SearchHit; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsState; +import org.elasticsearch.xpack.core.ml.dataframe.analyses.BoostedTreeParams; import org.elasticsearch.xpack.core.ml.dataframe.analyses.BoostedTreeParamsTests; import org.elasticsearch.xpack.core.ml.dataframe.analyses.Regression; import org.elasticsearch.xpack.core.ml.job.persistence.AnomalyDetectorsIndex; @@ -25,6 +26,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.equalTo; @@ -139,7 +141,7 @@ public void testWithOnlyTrainingRowsAndTrainingPercentIsFifty() throws Exception sourceIndex, destIndex, null, - new Regression(DEPENDENT_VARIABLE_FIELD, BoostedTreeParamsTests.createRandom(), null, 50.0)); + new Regression(DEPENDENT_VARIABLE_FIELD, BoostedTreeParamsTests.createRandom(), null, 50.0, null)); registerAnalytics(config); putAnalytics(config); @@ -235,6 +237,43 @@ public void testStopAndRestart() throws Exception { assertInferenceModelPersisted(jobId); } + public void testTwoJobsWithSameRandomizeSeedUseSameTrainingSet() throws Exception { + String sourceIndex = "regression_two_jobs_with_same_randomize_seed_source"; + indexData(sourceIndex, 10, 0); + + String firstJobId = "regression_two_jobs_with_same_randomize_seed_1"; + String firstJobDestIndex = firstJobId + "_dest"; + + BoostedTreeParams boostedTreeParams = new BoostedTreeParams(1.0, 1.0, 1.0, 1, 1.0); + + DataFrameAnalyticsConfig firstJob = buildAnalytics(firstJobId, sourceIndex, firstJobDestIndex, null, + new Regression(DEPENDENT_VARIABLE_FIELD, boostedTreeParams, null, 50.0, null)); + registerAnalytics(firstJob); + putAnalytics(firstJob); + + String secondJobId = "regression_two_jobs_with_same_randomize_seed_2"; + String secondJobDestIndex = secondJobId + "_dest"; + + long randomizeSeed = ((Regression) firstJob.getAnalysis()).getRandomizeSeed(); + DataFrameAnalyticsConfig secondJob = buildAnalytics(secondJobId, sourceIndex, secondJobDestIndex, null, + new Regression(DEPENDENT_VARIABLE_FIELD, boostedTreeParams, null, 50.0, randomizeSeed)); + + registerAnalytics(secondJob); + putAnalytics(secondJob); + + // Let's run both jobs in parallel and wait until they are finished + startAnalytics(firstJobId); + startAnalytics(secondJobId); + waitUntilAnalyticsIsStopped(firstJobId); + waitUntilAnalyticsIsStopped(secondJobId); + + // Now we compare they both used the same training rows + Set firstRunTrainingRowsIds = getTrainingRowsIds(firstJobDestIndex); + Set secondRunTrainingRowsIds = getTrainingRowsIds(secondJobDestIndex); + + assertThat(secondRunTrainingRowsIds, equalTo(firstRunTrainingRowsIds)); + } + private void initialize(String jobId) { this.jobId = jobId; this.sourceIndex = jobId + "_source_index"; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportPutDataFrameAnalyticsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportPutDataFrameAnalyticsAction.java index 2884cd331779..1cbed7ed7661 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportPutDataFrameAnalyticsAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportPutDataFrameAnalyticsAction.java @@ -111,7 +111,7 @@ protected ClusterBlockException checkBlock(PutDataFrameAnalyticsAction.Request r protected void masterOperation(Task task, PutDataFrameAnalyticsAction.Request request, ClusterState state, ActionListener listener) { validateConfig(request.getConfig()); - DataFrameAnalyticsConfig memoryCappedConfig = + DataFrameAnalyticsConfig preparedForPutConfig = new DataFrameAnalyticsConfig.Builder(request.getConfig(), maxModelMemoryLimit) .setCreateTime(Instant.now()) .setVersion(Version.CURRENT) @@ -120,11 +120,11 @@ protected void masterOperation(Task task, PutDataFrameAnalyticsAction.Request re if (licenseState.isAuthAllowed()) { final String username = securityContext.getUser().principal(); RoleDescriptor.IndicesPrivileges sourceIndexPrivileges = RoleDescriptor.IndicesPrivileges.builder() - .indices(memoryCappedConfig.getSource().getIndex()) + .indices(preparedForPutConfig.getSource().getIndex()) .privileges("read") .build(); RoleDescriptor.IndicesPrivileges destIndexPrivileges = RoleDescriptor.IndicesPrivileges.builder() - .indices(memoryCappedConfig.getDest().getIndex()) + .indices(preparedForPutConfig.getDest().getIndex()) .privileges("read", "index", "create_index") .build(); @@ -135,16 +135,16 @@ protected void masterOperation(Task task, PutDataFrameAnalyticsAction.Request re privRequest.indexPrivileges(sourceIndexPrivileges, destIndexPrivileges); ActionListener privResponseListener = ActionListener.wrap( - r -> handlePrivsResponse(username, memoryCappedConfig, r, listener), + r -> handlePrivsResponse(username, preparedForPutConfig, r, listener), listener::onFailure); client.execute(HasPrivilegesAction.INSTANCE, privRequest, privResponseListener); } else { updateDocMappingAndPutConfig( - memoryCappedConfig, + preparedForPutConfig, threadPool.getThreadContext().getHeaders(), ActionListener.wrap( - indexResponse -> listener.onResponse(new PutDataFrameAnalyticsAction.Response(memoryCappedConfig)), + indexResponse -> listener.onResponse(new PutDataFrameAnalyticsAction.Response(preparedForPutConfig)), listener::onFailure )); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/CustomProcessorFactory.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/CustomProcessorFactory.java index fd52a3fd8da5..77f0b127a263 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/CustomProcessorFactory.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/CustomProcessorFactory.java @@ -24,12 +24,12 @@ public CustomProcessor create(DataFrameAnalysis analysis) { if (analysis instanceof Regression) { Regression regression = (Regression) analysis; return new DatasetSplittingCustomProcessor( - fieldNames, regression.getDependentVariable(), regression.getTrainingPercent()); + fieldNames, regression.getDependentVariable(), regression.getTrainingPercent(), regression.getRandomizeSeed()); } if (analysis instanceof Classification) { Classification classification = (Classification) analysis; return new DatasetSplittingCustomProcessor( - fieldNames, classification.getDependentVariable(), classification.getTrainingPercent()); + fieldNames, classification.getDependentVariable(), classification.getTrainingPercent(), classification.getRandomizeSeed()); } return row -> {}; } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/DatasetSplittingCustomProcessor.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/DatasetSplittingCustomProcessor.java index ed42cf519885..bf6284aa7a5c 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/DatasetSplittingCustomProcessor.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/DatasetSplittingCustomProcessor.java @@ -5,7 +5,6 @@ */ package org.elasticsearch.xpack.ml.dataframe.process.customprocessing; -import org.elasticsearch.common.Randomness; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; import java.util.List; @@ -23,12 +22,13 @@ class DatasetSplittingCustomProcessor implements CustomProcessor { private final int dependentVariableIndex; private final double trainingPercent; - private final Random random = Randomness.get(); + private final Random random; private boolean isFirstRow = true; - DatasetSplittingCustomProcessor(List fieldNames, String dependentVariable, double trainingPercent) { + DatasetSplittingCustomProcessor(List fieldNames, String dependentVariable, double trainingPercent, long randomizeSeed) { this.dependentVariableIndex = findDependentVariableIndex(fieldNames, dependentVariable); this.trainingPercent = trainingPercent; + this.random = new Random(randomizeSeed); } private static int findDependentVariableIndex(List fieldNames, String dependentVariable) { diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/DatasetSplittingCustomProcessorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/DatasetSplittingCustomProcessorTests.java index d5973f878246..d18adc3dcdb4 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/DatasetSplittingCustomProcessorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/customprocessing/DatasetSplittingCustomProcessorTests.java @@ -24,6 +24,7 @@ public class DatasetSplittingCustomProcessorTests extends ESTestCase { private List fields; private int dependentVariableIndex; private String dependentVariable; + private long randomizeSeed; @Before public void setUpTests() { @@ -34,10 +35,11 @@ public void setUpTests() { } dependentVariableIndex = randomIntBetween(0, fieldCount - 1); dependentVariable = fields.get(dependentVariableIndex); + randomizeSeed = randomLong(); } public void testProcess_GivenRowsWithoutDependentVariableValue() { - CustomProcessor customProcessor = new DatasetSplittingCustomProcessor(fields, dependentVariable, 50.0); + CustomProcessor customProcessor = new DatasetSplittingCustomProcessor(fields, dependentVariable, 50.0, randomizeSeed); for (int i = 0; i < 100; i++) { String[] row = new String[fields.size()]; @@ -55,7 +57,7 @@ public void testProcess_GivenRowsWithoutDependentVariableValue() { } public void testProcess_GivenRowsWithDependentVariableValue_AndTrainingPercentIsHundred() { - CustomProcessor customProcessor = new DatasetSplittingCustomProcessor(fields, dependentVariable, 100.0); + CustomProcessor customProcessor = new DatasetSplittingCustomProcessor(fields, dependentVariable, 100.0, randomizeSeed); for (int i = 0; i < 100; i++) { String[] row = new String[fields.size()]; @@ -75,7 +77,7 @@ public void testProcess_GivenRowsWithDependentVariableValue_AndTrainingPercentIs public void testProcess_GivenRowsWithDependentVariableValue_AndTrainingPercentIsRandom() { double trainingPercent = randomDoubleBetween(1.0, 100.0, true); double trainingFraction = trainingPercent / 100; - CustomProcessor customProcessor = new DatasetSplittingCustomProcessor(fields, dependentVariable, trainingPercent); + CustomProcessor customProcessor = new DatasetSplittingCustomProcessor(fields, dependentVariable, trainingPercent, randomizeSeed); int runCount = 20; int rowsCount = 1000; @@ -121,7 +123,7 @@ public void testProcess_GivenRowsWithDependentVariableValue_AndTrainingPercentIs } public void testProcess_ShouldHaveAtLeastOneTrainingRow() { - CustomProcessor customProcessor = new DatasetSplittingCustomProcessor(fields, dependentVariable, 1.0); + CustomProcessor customProcessor = new DatasetSplittingCustomProcessor(fields, dependentVariable, 1.0, randomizeSeed); // We have some non-training rows and then a training row to check // we maintain the first training row and not just the first row diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_crud.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_crud.yml index a1d78b744405..4335a50382a9 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_crud.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_crud.yml @@ -1456,7 +1456,8 @@ setup: "eta": 0.5, "maximum_number_trees": 400, "feature_bag_fraction": 0.3, - "training_percent": 60.3 + "training_percent": 60.3, + "randomize_seed": 42 } } } @@ -1472,7 +1473,8 @@ setup: "maximum_number_trees": 400, "feature_bag_fraction": 0.3, "prediction_field_name": "foo_prediction", - "training_percent": 60.3 + "training_percent": 60.3, + "randomize_seed": 42 } }} - is_true: create_time @@ -1796,7 +1798,8 @@ setup: "eta": 0.5, "maximum_number_trees": 400, "feature_bag_fraction": 0.3, - "training_percent": 60.3 + "training_percent": 60.3, + "randomize_seed": 24 } } } @@ -1813,6 +1816,7 @@ setup: "feature_bag_fraction": 0.3, "prediction_field_name": "foo_prediction", "training_percent": 60.3, + "randomize_seed": 24, "num_top_classes": 2 } }} @@ -1836,7 +1840,8 @@ setup: }, "analysis": { "regression": { - "dependent_variable": "foo" + "dependent_variable": "foo", + "randomize_seed": 42 } } } @@ -1848,7 +1853,8 @@ setup: "regression":{ "dependent_variable": "foo", "prediction_field_name": "foo_prediction", - "training_percent": 100.0 + "training_percent": 100.0, + "randomize_seed": 42 } }} - is_true: create_time From f400b1897d84be568961518bcef1df19c60847e3 Mon Sep 17 00:00:00 2001 From: Dimitris Athanasiou Date: Fri, 29 Nov 2019 14:38:06 +0200 Subject: [PATCH 03/22] [ML] Mute data frame analytics BWC tests Until #49990 is backported to 7.x --- .../test/mixed_cluster/90_ml_data_frame_analytics_crud.yml | 5 +++++ .../test/old_cluster/90_ml_data_frame_analytics_crud.yml | 3 +++ .../upgraded_cluster/90_ml_data_frame_analytics_crud.yml | 5 +++++ 3 files changed, 13 insertions(+) diff --git a/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/mixed_cluster/90_ml_data_frame_analytics_crud.yml b/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/mixed_cluster/90_ml_data_frame_analytics_crud.yml index b0cb91c4c0f5..808214716071 100644 --- a/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/mixed_cluster/90_ml_data_frame_analytics_crud.yml +++ b/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/mixed_cluster/90_ml_data_frame_analytics_crud.yml @@ -1,3 +1,8 @@ +setup: + - skip: + version: "all" + reason: "Until backport of https://github.com/elastic/elasticsearch/issues/49690" + --- "Get old outlier_detection job": diff --git a/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/old_cluster/90_ml_data_frame_analytics_crud.yml b/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/old_cluster/90_ml_data_frame_analytics_crud.yml index fe160bba15f2..ba2cf4041167 100644 --- a/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/old_cluster/90_ml_data_frame_analytics_crud.yml +++ b/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/old_cluster/90_ml_data_frame_analytics_crud.yml @@ -1,4 +1,7 @@ setup: + - skip: + version: "all" + reason: "Until backport of https://github.com/elastic/elasticsearch/issues/49690" - do: index: diff --git a/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/upgraded_cluster/90_ml_data_frame_analytics_crud.yml b/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/upgraded_cluster/90_ml_data_frame_analytics_crud.yml index 28ec80c6373a..462a1fd76c01 100644 --- a/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/upgraded_cluster/90_ml_data_frame_analytics_crud.yml +++ b/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/upgraded_cluster/90_ml_data_frame_analytics_crud.yml @@ -1,3 +1,8 @@ +setup: + - skip: + version: "all" + reason: "Until backport of https://github.com/elastic/elasticsearch/issues/49690" + --- "Get old cluster outlier_detection job": From 678aeb747ea0a09c2ac96f4fae6697ec3b8a8adf Mon Sep 17 00:00:00 2001 From: Yannick Welsch Date: Tue, 10 Dec 2019 09:45:27 +0100 Subject: [PATCH 04/22] Make elasticsearch-node tools custom metadata-aware (#48390) The elasticsearch-node tools allow manipulating the on-disk cluster state. The tool is currently unaware of plugins and will therefore drop custom metadata from the cluster state once the state is written out again (as it skips over the custom metadata that it can't read). This commit preserves unknown customs when editing on-disk metadata through the elasticsearch-node command-line tools. --- .../testclusters/ElasticsearchNode.java | 12 +-- .../common/xcontent/XContentBuilder.java | 19 +++-- .../cli/EnvironmentAwareCommand.java | 13 ++- .../ElasticsearchNodeCommand.java | 9 +-- .../UnsafeBootstrapMasterCommand.java | 3 +- .../cluster/metadata/IndexMetaData.java | 3 - .../cluster/metadata/MetaData.java | 81 +++++++++++++++---- .../env/NodeRepurposeCommand.java | 5 +- .../env/OverrideNodeVersionCommand.java | 3 +- .../cluster/metadata/MetaDataTests.java | 2 +- .../metadata/ToAndFromJsonMetaDataTests.java | 2 +- .../gateway/MetaDataStateFormatTests.java | 51 ++++++++++-- .../LicensesMetaDataSerializationTests.java | 2 +- .../WatcherMetaDataSerializationTests.java | 2 +- 14 files changed, 153 insertions(+), 54 deletions(-) diff --git a/buildSrc/src/main/java/org/elasticsearch/gradle/testclusters/ElasticsearchNode.java b/buildSrc/src/main/java/org/elasticsearch/gradle/testclusters/ElasticsearchNode.java index de8e02ed0c7d..2f258733e757 100644 --- a/buildSrc/src/main/java/org/elasticsearch/gradle/testclusters/ElasticsearchNode.java +++ b/buildSrc/src/main/java/org/elasticsearch/gradle/testclusters/ElasticsearchNode.java @@ -424,7 +424,7 @@ public synchronized void start() { if (plugins.isEmpty() == false) { logToProcessStdout("Installing " + plugins.size() + " plugins"); - plugins.forEach(plugin -> runElaticsearchBinScript( + plugins.forEach(plugin -> runElasticsearchBinScript( "elasticsearch-plugin", "install", "--batch", plugin.toString()) ); @@ -432,7 +432,7 @@ public synchronized void start() { if (getVersion().before("6.3.0") && testDistribution == TestDistribution.DEFAULT) { LOGGER.info("emulating the {} flavor for {} by installing x-pack", testDistribution, getVersion()); - runElaticsearchBinScript( + runElasticsearchBinScript( "elasticsearch-plugin", "install", "--batch", "x-pack" ); @@ -440,7 +440,7 @@ public synchronized void start() { if (keystoreSettings.isEmpty() == false || keystoreFiles.isEmpty() == false) { logToProcessStdout("Adding " + keystoreSettings.size() + " keystore settings and " + keystoreFiles.size() + " keystore files"); - runElaticsearchBinScript("elasticsearch-keystore", "create"); + runElasticsearchBinScript("elasticsearch-keystore", "create"); keystoreSettings.forEach((key, value) -> runElasticsearchBinScriptWithInput(value.toString(), "elasticsearch-keystore", "add", "-x", key) @@ -452,7 +452,7 @@ public synchronized void start() { if (file.exists() == false) { throw new TestClustersException("supplied keystore file " + file + " does not exist, require for " + this); } - runElaticsearchBinScript("elasticsearch-keystore", "add-file", entry.getKey(), file.getAbsolutePath()); + runElasticsearchBinScript("elasticsearch-keystore", "add-file", entry.getKey(), file.getAbsolutePath()); } } @@ -467,7 +467,7 @@ public synchronized void start() { if (credentials.isEmpty() == false) { logToProcessStdout("Setting up " + credentials.size() + " users"); - credentials.forEach(paramMap -> runElaticsearchBinScript( + credentials.forEach(paramMap -> runElasticsearchBinScript( getVersion().onOrAfter("6.3.0") ? "elasticsearch-users" : "x-pack/users", paramMap.entrySet().stream() .flatMap(entry -> Stream.of(entry.getKey(), entry.getValue())) @@ -663,7 +663,7 @@ private void runElasticsearchBinScriptWithInput(String input, String tool, Strin } } - private void runElaticsearchBinScript(String tool, String... args) { + private void runElasticsearchBinScript(String tool, String... args) { runElasticsearchBinScriptWithInput("", tool, args); } diff --git a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java index 51a4f86a0d3b..20fde0891b6f 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java +++ b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java @@ -819,7 +819,7 @@ private void unknownValue(Object value, boolean ensureNoSelfReferences) throws I } else if (value instanceof Map) { @SuppressWarnings("unchecked") final Map valueMap = (Map) value; - map(valueMap, ensureNoSelfReferences); + map(valueMap, ensureNoSelfReferences, true); } else if (value instanceof Iterable) { value((Iterable) value, ensureNoSelfReferences); } else if (value instanceof Object[]) { @@ -867,10 +867,15 @@ public XContentBuilder field(String name, Map values) throws IOE } public XContentBuilder map(Map values) throws IOException { - return map(values, true); + return map(values, true, true); } - private XContentBuilder map(Map values, boolean ensureNoSelfReferences) throws IOException { + /** writes a map without the start object and end object headers */ + public XContentBuilder mapContents(Map values) throws IOException { + return map(values, true, false); + } + + private XContentBuilder map(Map values, boolean ensureNoSelfReferences, boolean writeStartAndEndHeaders) throws IOException { if (values == null) { return nullValue(); } @@ -881,13 +886,17 @@ private XContentBuilder map(Map values, boolean ensureNoSelfReference ensureNoSelfReferences(values); } - startObject(); + if (writeStartAndEndHeaders) { + startObject(); + } for (Map.Entry value : values.entrySet()) { field(value.getKey()); // pass ensureNoSelfReferences=false as we already performed the check at a higher level unknownValue(value.getValue(), false); } - endObject(); + if (writeStartAndEndHeaders) { + endObject(); + } return this; } diff --git a/server/src/main/java/org/elasticsearch/cli/EnvironmentAwareCommand.java b/server/src/main/java/org/elasticsearch/cli/EnvironmentAwareCommand.java index 6fc3349c7623..1d3a31f0a72d 100644 --- a/server/src/main/java/org/elasticsearch/cli/EnvironmentAwareCommand.java +++ b/server/src/main/java/org/elasticsearch/cli/EnvironmentAwareCommand.java @@ -88,14 +88,19 @@ protected void execute(Terminal terminal, OptionSet options) throws Exception { /** Create an {@link Environment} for the command to use. Overrideable for tests. */ protected Environment createEnv(final Map settings) throws UserException { + return createEnv(Settings.EMPTY, settings); + } + + /** Create an {@link Environment} for the command to use. Overrideable for tests. */ + protected final Environment createEnv(final Settings baseSettings, final Map settings) throws UserException { final String esPathConf = System.getProperty("es.path.conf"); if (esPathConf == null) { throw new UserException(ExitCodes.CONFIG, "the system property [es.path.conf] must be set"); } - return InternalSettingsPreparer.prepareEnvironment(Settings.EMPTY, settings, - getConfigPath(esPathConf), - // HOSTNAME is set by elasticsearch-env and elasticsearch-env.bat so it is always available - () -> System.getenv("HOSTNAME")); + return InternalSettingsPreparer.prepareEnvironment(baseSettings, settings, + getConfigPath(esPathConf), + // HOSTNAME is set by elasticsearch-env and elasticsearch-env.bat so it is always available + () -> System.getenv("HOSTNAME")); } @SuppressForbidden(reason = "need path to construct environment") diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/ElasticsearchNodeCommand.java b/server/src/main/java/org/elasticsearch/cluster/coordination/ElasticsearchNodeCommand.java index a65934c76776..800269520e36 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/ElasticsearchNodeCommand.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/ElasticsearchNodeCommand.java @@ -26,7 +26,6 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.cli.EnvironmentAwareCommand; import org.elasticsearch.cli.Terminal; -import org.elasticsearch.cluster.ClusterModule; import org.elasticsearch.cluster.metadata.Manifest; import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.common.collect.Tuple; @@ -42,7 +41,6 @@ public abstract class ElasticsearchNodeCommand extends EnvironmentAwareCommand { private static final Logger logger = LogManager.getLogger(ElasticsearchNodeCommand.class); - protected final NamedXContentRegistry namedXContentRegistry; protected static final String DELIMITER = "------------------------------------------------------------------------\n"; static final String STOP_WARNING_MSG = @@ -61,7 +59,6 @@ public abstract class ElasticsearchNodeCommand extends EnvironmentAwareCommand { public ElasticsearchNodeCommand(String description) { super(description); - namedXContentRegistry = new NamedXContentRegistry(ClusterModule.getNamedXWriteables()); } protected void processNodePaths(Terminal terminal, OptionSet options, Environment env) throws IOException { @@ -80,7 +77,7 @@ protected void processNodePaths(Terminal terminal, OptionSet options, Environmen protected Tuple loadMetaData(Terminal terminal, Path[] dataPaths) throws IOException { terminal.println(Terminal.Verbosity.VERBOSE, "Loading manifest file"); - final Manifest manifest = Manifest.FORMAT.loadLatestState(logger, namedXContentRegistry, dataPaths); + final Manifest manifest = Manifest.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, dataPaths); if (manifest == null) { throw new ElasticsearchException(NO_MANIFEST_FILE_FOUND_MSG); @@ -89,8 +86,8 @@ protected Tuple loadMetaData(Terminal terminal, Path[] dataP throw new ElasticsearchException(GLOBAL_GENERATION_MISSING_MSG); } terminal.println(Terminal.Verbosity.VERBOSE, "Loading global metadata file"); - final MetaData metaData = MetaData.FORMAT.loadGeneration(logger, namedXContentRegistry, manifest.getGlobalGeneration(), - dataPaths); + final MetaData metaData = MetaData.FORMAT_PRESERVE_CUSTOMS.loadGeneration( + logger, NamedXContentRegistry.EMPTY, manifest.getGlobalGeneration(), dataPaths); if (metaData == null) { throw new ElasticsearchException(NO_GLOBAL_METADATA_MSG + " [generation = " + manifest.getGlobalGeneration() + "]"); } diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/UnsafeBootstrapMasterCommand.java b/server/src/main/java/org/elasticsearch/cluster/coordination/UnsafeBootstrapMasterCommand.java index c15e832142ea..05bc0116c13c 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/UnsafeBootstrapMasterCommand.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/UnsafeBootstrapMasterCommand.java @@ -28,6 +28,7 @@ import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.env.Environment; import org.elasticsearch.env.NodeMetaData; import org.elasticsearch.node.Node; @@ -84,7 +85,7 @@ protected boolean validateBeforeLock(Terminal terminal, Environment env) { protected void processNodePaths(Terminal terminal, Path[] dataPaths, Environment env) throws IOException { terminal.println(Terminal.Verbosity.VERBOSE, "Loading node metadata"); - final NodeMetaData nodeMetaData = NodeMetaData.FORMAT.loadLatestState(logger, namedXContentRegistry, dataPaths); + final NodeMetaData nodeMetaData = NodeMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, dataPaths); if (nodeMetaData == null) { throw new ElasticsearchException(NO_NODE_METADATA_FOUND_MSG); } diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java index d8fd88696e9f..f8e1b48c6dd8 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java @@ -45,7 +45,6 @@ import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.ToXContentFragment; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -1421,8 +1420,6 @@ public void toXContent(XContentBuilder builder, IndexMetaData state) throws IOEx @Override public IndexMetaData fromXContent(XContentParser parser) throws IOException { - assert parser.getXContentRegistry() != NamedXContentRegistry.EMPTY - : "loading index metadata requires a working named xcontent registry"; return Builder.fromXContent(parser); } }; diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetaData.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetaData.java index ce67e5b72f1d..482d57bed54b 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetaData.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetaData.java @@ -755,7 +755,7 @@ public static Diff readDiffFrom(StreamInput in) throws IOException { } public static MetaData fromXContent(XContentParser parser) throws IOException { - return Builder.fromXContent(parser); + return Builder.fromXContent(parser, false); } @Override @@ -1277,7 +1277,7 @@ public static void toXContent(MetaData metaData, XContentBuilder builder, ToXCon builder.endObject(); } - public static MetaData fromXContent(XContentParser parser) throws IOException { + public static MetaData fromXContent(XContentParser parser, boolean preserveUnknownCustoms) throws IOException { Builder builder = new Builder(); // we might get here after the meta-data element, or on a fresh parser @@ -1327,8 +1327,13 @@ public static MetaData fromXContent(XContentParser parser) throws IOException { Custom custom = parser.namedObject(Custom.class, currentFieldName, null); builder.putCustom(custom.getWriteableName(), custom); } catch (NamedObjectNotFoundException ex) { - logger.warn("Skipping unknown custom object with type {}", currentFieldName); - parser.skipChildren(); + if (preserveUnknownCustoms) { + logger.warn("Adding unknown custom object with type {}", currentFieldName); + builder.putCustom(currentFieldName, new UnknownGatewayOnlyCustom(parser.mapOrdered())); + } else { + logger.warn("Skipping unknown custom object with type {}", currentFieldName); + parser.skipChildren(); + } } } } else if (token.isValue()) { @@ -1349,6 +1354,45 @@ public static MetaData fromXContent(XContentParser parser) throws IOException { } } + public static class UnknownGatewayOnlyCustom implements Custom { + + private final Map contents; + + UnknownGatewayOnlyCustom(Map contents) { + this.contents = contents; + } + + @Override + public EnumSet context() { + return EnumSet.of(MetaData.XContentContext.API, MetaData.XContentContext.GATEWAY); + } + + @Override + public Diff diff(Custom previousState) { + throw new UnsupportedOperationException(); + } + + @Override + public String getWriteableName() { + throw new UnsupportedOperationException(); + } + + @Override + public Version getMinimalSupportedVersion() { + throw new UnsupportedOperationException(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + return builder.mapContents(contents); + } + } + private static final ToXContent.Params FORMAT_PARAMS; static { Map params = new HashMap<>(2); @@ -1360,16 +1404,25 @@ public static MetaData fromXContent(XContentParser parser) throws IOException { /** * State format for {@link MetaData} to write to and load from disk */ - public static final MetaDataStateFormat FORMAT = new MetaDataStateFormat(GLOBAL_STATE_FILE_PREFIX) { + public static final MetaDataStateFormat FORMAT = createMetaDataStateFormat(false); - @Override - public void toXContent(XContentBuilder builder, MetaData state) throws IOException { - Builder.toXContent(state, builder, FORMAT_PARAMS); - } + /** + * Special state format for {@link MetaData} to write to and load from disk, preserving unknown customs + */ + public static final MetaDataStateFormat FORMAT_PRESERVE_CUSTOMS = createMetaDataStateFormat(true); - @Override - public MetaData fromXContent(XContentParser parser) throws IOException { - return Builder.fromXContent(parser); - } - }; + private static MetaDataStateFormat createMetaDataStateFormat(boolean preserveUnknownCustoms) { + return new MetaDataStateFormat(GLOBAL_STATE_FILE_PREFIX) { + + @Override + public void toXContent(XContentBuilder builder, MetaData state) throws IOException { + Builder.toXContent(state, builder, FORMAT_PARAMS); + } + + @Override + public MetaData fromXContent(XContentParser parser) throws IOException { + return Builder.fromXContent(parser, preserveUnknownCustoms); + } + }; + } } diff --git a/server/src/main/java/org/elasticsearch/env/NodeRepurposeCommand.java b/server/src/main/java/org/elasticsearch/env/NodeRepurposeCommand.java index 20b5552dfa8f..25b4f79866ea 100644 --- a/server/src/main/java/org/elasticsearch/env/NodeRepurposeCommand.java +++ b/server/src/main/java/org/elasticsearch/env/NodeRepurposeCommand.java @@ -29,6 +29,7 @@ import org.elasticsearch.cluster.metadata.Manifest; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.core.internal.io.IOUtils; import org.elasticsearch.gateway.WriteStateException; @@ -165,7 +166,7 @@ private String toIndexName(NodeEnvironment.NodePath[] nodePaths, String uuid) { indexPaths[i] = nodePaths[i].resolve(uuid); } try { - IndexMetaData metaData = IndexMetaData.FORMAT.loadLatestState(logger, namedXContentRegistry, indexPaths); + IndexMetaData metaData = IndexMetaData.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, indexPaths); return metaData.getIndex().getName(); } catch (Exception e) { return "no name for uuid: " + uuid + ": " + e; @@ -194,7 +195,7 @@ private void rewriteManifest(Terminal terminal, Manifest manifest, Path[] dataPa private Manifest loadManifest(Terminal terminal, Path[] dataPaths) throws IOException { terminal.println(Terminal.Verbosity.VERBOSE, "Loading manifest"); - final Manifest manifest = Manifest.FORMAT.loadLatestState(logger, namedXContentRegistry, dataPaths); + final Manifest manifest = Manifest.FORMAT.loadLatestState(logger, NamedXContentRegistry.EMPTY, dataPaths); if (manifest == null) { terminal.println(Terminal.Verbosity.SILENT, PRE_V7_MESSAGE); diff --git a/server/src/main/java/org/elasticsearch/env/OverrideNodeVersionCommand.java b/server/src/main/java/org/elasticsearch/env/OverrideNodeVersionCommand.java index 34c7e9599e07..f50bdf081ef8 100644 --- a/server/src/main/java/org/elasticsearch/env/OverrideNodeVersionCommand.java +++ b/server/src/main/java/org/elasticsearch/env/OverrideNodeVersionCommand.java @@ -25,6 +25,7 @@ import org.elasticsearch.Version; import org.elasticsearch.cli.Terminal; import org.elasticsearch.cluster.coordination.ElasticsearchNodeCommand; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; import java.io.IOException; import java.nio.file.Path; @@ -74,7 +75,7 @@ public OverrideNodeVersionCommand() { protected void processNodePaths(Terminal terminal, Path[] dataPaths, Environment env) throws IOException { final Path[] nodePaths = Arrays.stream(toNodePaths(dataPaths)).map(p -> p.path).toArray(Path[]::new); final NodeMetaData nodeMetaData - = new NodeMetaData.NodeMetaDataStateFormat(true).loadLatestState(logger, namedXContentRegistry, nodePaths); + = new NodeMetaData.NodeMetaDataStateFormat(true).loadLatestState(logger, NamedXContentRegistry.EMPTY, nodePaths); if (nodeMetaData == null) { throw new ElasticsearchException(NO_METADATA_MESSAGE); } diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/MetaDataTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/MetaDataTests.java index 36a78119c766..7d2b10beb327 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/MetaDataTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/MetaDataTests.java @@ -365,7 +365,7 @@ public void testUnknownFieldClusterMetaData() throws IOException { .endObject() .endObject()); try (XContentParser parser = createParser(JsonXContent.jsonXContent, metadata)) { - MetaData.Builder.fromXContent(parser); + MetaData.Builder.fromXContent(parser, randomBoolean()); fail(); } catch (IllegalArgumentException e) { assertEquals("Unexpected field [random]", e.getMessage()); diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/ToAndFromJsonMetaDataTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/ToAndFromJsonMetaDataTests.java index e2d0fcf5188a..0338a64b6fe7 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/ToAndFromJsonMetaDataTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/ToAndFromJsonMetaDataTests.java @@ -140,7 +140,7 @@ public void testSimpleJsonFromAndTo() throws IOException { String metaDataSource = MetaData.Builder.toXContent(metaData); - MetaData parsedMetaData = MetaData.Builder.fromXContent(createParser(JsonXContent.jsonXContent, metaDataSource)); + MetaData parsedMetaData = MetaData.Builder.fromXContent(createParser(JsonXContent.jsonXContent, metaDataSource), false); IndexMetaData indexMetaData = parsedMetaData.index("test1"); assertThat(indexMetaData.primaryTerm(0), equalTo(1L)); diff --git a/server/src/test/java/org/elasticsearch/gateway/MetaDataStateFormatTests.java b/server/src/test/java/org/elasticsearch/gateway/MetaDataStateFormatTests.java index 40f3bd8a0162..c7dab0dc4d4a 100644 --- a/server/src/test/java/org/elasticsearch/gateway/MetaDataStateFormatTests.java +++ b/server/src/test/java/org/elasticsearch/gateway/MetaDataStateFormatTests.java @@ -61,6 +61,8 @@ import java.util.stream.StreamSupport; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.notNullValue; @@ -80,7 +82,7 @@ public void toXContent(XContentBuilder builder, MetaData state) { @Override public MetaData fromXContent(XContentParser parser) throws IOException { - return MetaData.Builder.fromXContent(parser); + return MetaData.Builder.fromXContent(parser, false); } }; Path tmp = createTempDir(); @@ -233,7 +235,23 @@ public static void corruptFile(Path fileToCorrupt, Logger logger) throws IOExcep } } - public void testLoadState() throws IOException { + public void testLoadStateWithoutMissingCustoms() throws IOException { + runLoadStateTest(false, false); + } + + public void testLoadStateWithoutMissingCustomsButPreserved() throws IOException { + runLoadStateTest(false, true); + } + + public void testLoadStateWithMissingCustomsButPreserved() throws IOException { + runLoadStateTest(true, true); + } + + public void testLoadStateWithMissingCustomsAndNotPreserved() throws IOException { + runLoadStateTest(true, false); + } + + private void runLoadStateTest(boolean hasMissingCustoms, boolean preserveUnknownCustoms) throws IOException { final Path[] dirs = new Path[randomIntBetween(1, 5)]; int numStates = randomIntBetween(1, 5); List meta = new ArrayList<>(); @@ -241,7 +259,7 @@ public void testLoadState() throws IOException { meta.add(randomMeta()); } Set corruptedFiles = new HashSet<>(); - MetaDataStateFormat format = metaDataFormat(); + MetaDataStateFormat format = metaDataFormat(preserveUnknownCustoms); for (int i = 0; i < dirs.length; i++) { dirs[i] = createTempDir(); Files.createDirectories(dirs[i].resolve(MetaDataStateFormat.STATE_DIR_NAME)); @@ -258,11 +276,12 @@ public void testLoadState() throws IOException { } List dirList = Arrays.asList(dirs); Collections.shuffle(dirList, random()); - MetaData loadedMetaData = format.loadLatestState(logger, xContentRegistry(), dirList.toArray(new Path[0])); + MetaData loadedMetaData = format.loadLatestState(logger, hasMissingCustoms ? + NamedXContentRegistry.EMPTY : xContentRegistry(), dirList.toArray(new Path[0])); MetaData latestMetaData = meta.get(numStates-1); assertThat(loadedMetaData.clusterUUID(), not(equalTo("_na_"))); assertThat(loadedMetaData.clusterUUID(), equalTo(latestMetaData.clusterUUID())); - ImmutableOpenMap indices = loadedMetaData.indices(); + ImmutableOpenMap indices = loadedMetaData.indices(); assertThat(indices.size(), equalTo(latestMetaData.indices().size())); for (IndexMetaData original : latestMetaData) { IndexMetaData deserialized = indices.get(original.getIndex().getName()); @@ -275,7 +294,23 @@ public void testLoadState() throws IOException { } // make sure the index tombstones are the same too - assertThat(loadedMetaData.indexGraveyard(), equalTo(latestMetaData.indexGraveyard())); + if (hasMissingCustoms) { + if (preserveUnknownCustoms) { + assertNotNull(loadedMetaData.custom(IndexGraveyard.TYPE)); + assertThat(loadedMetaData.custom(IndexGraveyard.TYPE), instanceOf(MetaData.UnknownGatewayOnlyCustom.class)); + + // check that we reserialize unknown metadata correctly again + final Path tempdir = createTempDir(); + metaDataFormat(randomBoolean()).write(loadedMetaData, tempdir); + final MetaData reloadedMetaData = metaDataFormat(randomBoolean()).loadLatestState(logger, xContentRegistry(), tempdir); + assertThat(reloadedMetaData.indexGraveyard(), equalTo(latestMetaData.indexGraveyard())); + } else { + assertNotNull(loadedMetaData.indexGraveyard()); + assertThat(loadedMetaData.indexGraveyard().getTombstones(), hasSize(0)); + } + } else { + assertThat(loadedMetaData.indexGraveyard(), equalTo(latestMetaData.indexGraveyard())); + } // now corrupt all the latest ones and make sure we fail to load the state for (int i = 0; i < dirs.length; i++) { @@ -419,7 +454,7 @@ public void testFailRandomlyAndReadAnyState() throws IOException { writeAndReadStateSuccessfully(format, paths); } - private static MetaDataStateFormat metaDataFormat() { + private static MetaDataStateFormat metaDataFormat(boolean preserveUnknownCustoms) { return new MetaDataStateFormat(MetaData.GLOBAL_STATE_FILE_PREFIX) { @Override public void toXContent(XContentBuilder builder, MetaData state) throws IOException { @@ -428,7 +463,7 @@ public void toXContent(XContentBuilder builder, MetaData state) throws IOExcepti @Override public MetaData fromXContent(XContentParser parser) throws IOException { - return MetaData.Builder.fromXContent(parser); + return MetaData.Builder.fromXContent(parser, preserveUnknownCustoms); } }; } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicensesMetaDataSerializationTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicensesMetaDataSerializationTests.java index d7799959f6cc..084d965a6e74 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicensesMetaDataSerializationTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/license/LicensesMetaDataSerializationTests.java @@ -80,7 +80,7 @@ public void testLicenseMetadataParsingDoesNotSwallowOtherMetaData() throws Excep builder = metaDataBuilder.build().toXContent(builder, params); builder.endObject(); // deserialize metadata again - MetaData metaData = MetaData.Builder.fromXContent(createParser(builder)); + MetaData metaData = MetaData.Builder.fromXContent(createParser(builder), randomBoolean()); // check that custom metadata still present assertThat(metaData.custom(licensesMetaData.getWriteableName()), notNullValue()); assertThat(metaData.custom(repositoriesMetaData.getWriteableName()), notNullValue()); diff --git a/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/WatcherMetaDataSerializationTests.java b/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/WatcherMetaDataSerializationTests.java index 0556b8535e42..75e5bc1073e6 100644 --- a/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/WatcherMetaDataSerializationTests.java +++ b/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/WatcherMetaDataSerializationTests.java @@ -64,7 +64,7 @@ public void testWatcherMetadataParsingDoesNotSwallowOtherMetaData() throws Excep builder = metaDataBuilder.build().toXContent(builder, params); builder.endObject(); // deserialize metadata again - MetaData metaData = MetaData.Builder.fromXContent(createParser(builder)); + MetaData metaData = MetaData.Builder.fromXContent(createParser(builder), randomBoolean()); // check that custom metadata still present assertThat(metaData.custom(watcherMetaData.getWriteableName()), notNullValue()); assertThat(metaData.custom(repositoriesMetaData.getWriteableName()), notNullValue()); From 2605c7c821b5a01a65ad835fc06f53c376fb0759 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 10 Dec 2019 11:04:30 +0100 Subject: [PATCH 05/22] Improve Snapshot Finalization Ex. Handling (#49995) * Improve Snapshot Finalization Ex. Handling Like in #49989 we can get into a situation where the setting of the repository generation (during snapshot finalization) in the cluster state fails due to master failing over. In this case we should not try to execute the next cluster state update that will remove the snapshot from the cluster state. Closes #49989 --- .../elasticsearch/snapshots/SnapshotsService.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index c38462e24074..a48f893dfd40 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -35,6 +35,7 @@ import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterStateApplier; import org.elasticsearch.cluster.ClusterStateUpdateTask; +import org.elasticsearch.cluster.NotMasterException; import org.elasticsearch.cluster.RepositoryCleanupInProgress; import org.elasticsearch.cluster.RestoreInProgress; import org.elasticsearch.cluster.SnapshotDeletionsInProgress; @@ -42,6 +43,7 @@ import org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus; import org.elasticsearch.cluster.SnapshotsInProgress.ShardState; import org.elasticsearch.cluster.SnapshotsInProgress.State; +import org.elasticsearch.cluster.coordination.FailedToCommitClusterStateException; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.metadata.MetaData; @@ -1051,8 +1053,16 @@ protected void doRun() { @Override public void onFailure(final Exception e) { Snapshot snapshot = entry.snapshot(); - logger.warn(() -> new ParameterizedMessage("[{}] failed to finalize snapshot", snapshot), e); - removeSnapshotFromClusterState(snapshot, null, e); + if (ExceptionsHelper.unwrap(e, NotMasterException.class, FailedToCommitClusterStateException.class) != null) { + // Failure due to not being master any more, don't try to remove snapshot from cluster state the next master + // will try ending this snapshot again + logger.debug(() -> new ParameterizedMessage( + "[{}] failed to update cluster state during snapshot finalization", snapshot), e); + endingSnapshots.remove(snapshot); + } else { + logger.warn(() -> new ParameterizedMessage("[{}] failed to finalize snapshot", snapshot), e); + removeSnapshotFromClusterState(snapshot, null, e); + } } }); } From fcae55ae1ec31d95d1984a150c26e4d904558c4c Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 10 Dec 2019 10:44:31 +0000 Subject: [PATCH 06/22] Fix query analyzer logic for mixed conjunctions of terms and ranges (#49803) When the query analyzer examines a conjunction containing both terms and ranges, it should only include ranges in the minimum_should_match calculation if there are no other range queries on that same field within the conjunction. This is because we cannot build a selection query over disjoint ranges on the same field, and it is not easy to check if two range queries have an overlap. The current logic to calculate this just sets minimum_should_match to 1 or 0, dependent on whether or not the current range is over a field that has already been seen. However, this can be incorrect in the case that there are terms in the same match group which adjust the minimum_should_match downwards. Instead, the logic should be changed to match the terms extraction, whereby we adjust minimum_should_match downwards if we have already seen a range field. Fixes #49684 --- .../percolator/QueryAnalyzer.java | 66 +++++---- .../percolator/QueryAnalyzerTests.java | 132 ++++++++++++++++++ 2 files changed, 168 insertions(+), 30 deletions(-) diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java index 362c8870f652..f08600cdfd0e 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java @@ -232,7 +232,7 @@ private static Result handleConjunction(List conjunctionsWithUnknowns) { List conjunctions = conjunctionsWithUnknowns.stream().filter(r -> r.isUnknown() == false).collect(Collectors.toList()); if (conjunctions.isEmpty()) { if (conjunctionsWithUnknowns.isEmpty()) { - throw new IllegalArgumentException("Must have at least on conjunction sub result"); + throw new IllegalArgumentException("Must have at least one conjunction sub result"); } return conjunctionsWithUnknowns.get(0); // all conjunctions are unknown, so just return the first one } @@ -247,47 +247,53 @@ private static Result handleConjunction(List conjunctionsWithUnknowns) { int msm = 0; boolean verified = conjunctionsWithUnknowns.size() == conjunctions.size(); boolean matchAllDocs = true; - boolean hasDuplicateTerms = false; Set extractions = new HashSet<>(); Set seenRangeFields = new HashSet<>(); for (Result result : conjunctions) { - // In case that there are duplicate query extractions we need to be careful with - // incrementing msm, - // because that could lead to valid matches not becoming candidate matches: - // query: (field:val1 AND field:val2) AND (field:val2 AND field:val3) - // doc: field: val1 val2 val3 - // So lets be protective and decrease the msm: + int resultMsm = result.minimumShouldMatch; for (QueryExtraction queryExtraction : result.extractions) { if (queryExtraction.range != null) { // In case of range queries each extraction does not simply increment the - // minimum_should_match - // for that percolator query like for a term based extraction, so that can lead - // to more false - // positives for percolator queries with range queries than term based queries. - // The is because the way number fields are extracted from the document to be - // percolated. - // Per field a single range is extracted and if a percolator query has two or - // more range queries - // on the same field, then the minimum should match can be higher than clauses - // in the CoveringQuery. - // Therefore right now the minimum should match is incremented once per number - // field when processing - // the percolator query at index time. - if (seenRangeFields.add(queryExtraction.range.fieldName)) { - resultMsm = 1; - } else { - resultMsm = 0; + // minimum_should_match for that percolator query like for a term based extraction, + // so that can lead to more false positives for percolator queries with range queries + // than term based queries. + // This is because the way number fields are extracted from the document to be + // percolated. Per field a single range is extracted and if a percolator query has two or + // more range queries on the same field, then the minimum should match can be higher than clauses + // in the CoveringQuery. Therefore right now the minimum should match is only incremented once per + // number field when processing the percolator query at index time. + // For multiple ranges within a single extraction (ie from an existing conjunction or disjunction) + // then this will already have been taken care of, so we only check against fieldnames from + // previously processed extractions, and don't add to the seenRangeFields list until all + // extractions from this result are processed + if (seenRangeFields.contains(queryExtraction.range.fieldName)) { + resultMsm = Math.max(0, resultMsm - 1); + verified = false; } } - - if (extractions.contains(queryExtraction)) { - resultMsm = Math.max(0, resultMsm - 1); - verified = false; + else { + // In case that there are duplicate term query extractions we need to be careful with + // incrementing msm, because that could lead to valid matches not becoming candidate matches: + // query: (field:val1 AND field:val2) AND (field:val2 AND field:val3) + // doc: field: val1 val2 val3 + // So lets be protective and decrease the msm: + if (extractions.contains(queryExtraction)) { + resultMsm = Math.max(0, resultMsm - 1); + verified = false; + } } } msm += resultMsm; + // add range fields from this Result to the seenRangeFields set so that minimumShouldMatch is correctly + // calculated for subsequent Results + result.extractions.stream() + .map(e -> e.range) + .filter(Objects::nonNull) + .map(e -> e.fieldName) + .forEach(seenRangeFields::add); + if (result.verified == false // If some inner extractions are optional, the result can't be verified || result.minimumShouldMatch < result.extractions.size()) { @@ -299,7 +305,7 @@ private static Result handleConjunction(List conjunctionsWithUnknowns) { if (matchAllDocs) { return new Result(matchAllDocs, verified); } else { - return new Result(verified, extractions, hasDuplicateTerms ? 1 : msm); + return new Result(verified, extractions, msm); } } diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java index 91c815c40322..1c00d0555b41 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java @@ -78,6 +78,7 @@ import static org.elasticsearch.percolator.QueryAnalyzer.analyze; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; +import static org.hamcrest.collection.IsCollectionWithSize.hasSize; public class QueryAnalyzerTests extends ESTestCase { @@ -1208,4 +1209,135 @@ public void testIntervalQueries() { assertTermsEqual(result.extractions, new Term("field", "a")); } + public void testCombinedRangeAndTermWithMinimumShouldMatch() { + + Query disj = new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 0, 10), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.SHOULD) + .setMinimumNumberShouldMatch(2) + .build(); + + Result r = analyze(disj, Version.CURRENT); + assertThat(r.minimumShouldMatch, equalTo(1)); + assertThat(r.extractions, hasSize(2)); + assertFalse(r.matchAllDocs); + assertFalse(r.verified); + + Query q = new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 0, 10), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.FILTER) + .setMinimumNumberShouldMatch(2) + .build(); + + Result result = analyze(q, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(1)); + assertThat(result.extractions.size(), equalTo(2)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + q = new BooleanQuery.Builder() + .add(q, Occur.MUST) + .add(q, Occur.MUST) + .build(); + + result = analyze(q, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(1)); + assertThat(result.extractions.size(), equalTo(2)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + Query q2 = new BooleanQuery.Builder() + .add(new TermQuery(new Term("f", "v1")), Occur.FILTER) + .add(IntPoint.newRangeQuery("i", 15, 20), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v2")), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v2")), Occur.MUST) + .setMinimumNumberShouldMatch(1) + .build(); + + result = analyze(q2, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(2)); + assertThat(result.extractions, hasSize(3)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + // multiple range queries on different fields + Query q3 = new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 15, 20), Occur.SHOULD) + .add(IntPoint.newRangeQuery("i2", 15, 20), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v2")), Occur.MUST) + .setMinimumNumberShouldMatch(1) + .build(); + result = analyze(q3, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(2)); + assertThat(result.extractions, hasSize(4)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + // multiple disjoint range queries on the same field + Query q4 = new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 15, 20), Occur.SHOULD) + .add(IntPoint.newRangeQuery("i", 25, 30), Occur.SHOULD) + .add(IntPoint.newRangeQuery("i", 35, 40), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v2")), Occur.MUST) + .setMinimumNumberShouldMatch(1) + .build(); + result = analyze(q4, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(2)); + assertThat(result.extractions, hasSize(5)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + // multiple conjunction range queries on the same field + Query q5 = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 15, 20), Occur.MUST) + .add(IntPoint.newRangeQuery("i", 25, 30), Occur.MUST) + .build(), Occur.MUST) + .add(IntPoint.newRangeQuery("i", 35, 40), Occur.MUST) + .add(new TermQuery(new Term("f", "v2")), Occur.MUST) + .build(); + result = analyze(q5, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(2)); + assertThat(result.extractions, hasSize(4)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + // multiple conjunction range queries on different fields + Query q6 = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 15, 20), Occur.MUST) + .add(IntPoint.newRangeQuery("i2", 25, 30), Occur.MUST) + .build(), Occur.MUST) + .add(IntPoint.newRangeQuery("i", 35, 40), Occur.MUST) + .add(new TermQuery(new Term("f", "v2")), Occur.MUST) + .build(); + result = analyze(q6, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(3)); + assertThat(result.extractions, hasSize(4)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + // mixed term and range conjunctions + Query q7 = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 1, 2), Occur.MUST) + .add(new TermQuery(new Term("f", "1")), Occur.MUST) + .build(), Occur.MUST) + .add(new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 1, 2), Occur.MUST) + .add(new TermQuery(new Term("f", "2")), Occur.MUST) + .build(), Occur.MUST) + .build(); + result = analyze(q7, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(3)); + assertThat(result.extractions, hasSize(3)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + } + } From 90c59a1dbfd22190acfae4aaa29ea2342f6e7cec Mon Sep 17 00:00:00 2001 From: Przemyslaw Gomulka Date: Tue, 10 Dec 2019 14:22:10 +0100 Subject: [PATCH 07/22] Allow skipping ranges of versions (#50014) Multiple version ranges are allowed to be used in section skip in yml tests. This is useful when a bugfix was backported to latest versions and all previous releases contain a wire breaking bug. examples: 6.1.0 - 6.3.0, 6.6.0 - 6.7.9, 7.0 - - 7.2, 8.0.0 - --- .../search/180_locale_dependent_mapping.yml | 3 - .../test/rest/yaml/section/DoSection.java | 7 ++- .../test/rest/yaml/section/SkipSection.java | 58 ++++++++++--------- .../test/rest/yaml/section/VersionRange.java | 49 ++++++++++++++++ .../rest/yaml/section/SkipSectionTests.java | 21 +++++++ 5 files changed, 104 insertions(+), 34 deletions(-) create mode 100644 test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/VersionRange.java diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/180_locale_dependent_mapping.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/180_locale_dependent_mapping.yml index e9ba863675df..c4815304e079 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/180_locale_dependent_mapping.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/180_locale_dependent_mapping.yml @@ -1,8 +1,5 @@ --- "Test Index and Search locale dependent mappings / dates": - - skip: - version: " - 6.1.99" - reason: JDK9 only supports this with a special sysproperty added in 6.2.0 - do: indices.create: index: test_index diff --git a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/DoSection.java b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/DoSection.java index ce94adf73bcd..1b588f554fa5 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/DoSection.java +++ b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/DoSection.java @@ -422,7 +422,7 @@ private static NodeSelector parseVersionSelector(XContentParser parser) throws I if (false == parser.currentToken().isValue()) { throw new XContentParseException(parser.getTokenLocation(), "expected [version] to be a value"); } - Version[] range = SkipSection.parseVersionRange(parser.text()); + List skipVersionRanges = SkipSection.parseVersionRanges(parser.text()); return new NodeSelector() { @Override public void select(Iterable nodes) { @@ -433,7 +433,8 @@ public void select(Iterable nodes) { + node); } Version version = Version.fromString(node.getVersion()); - if (false == (version.onOrAfter(range[0]) && version.onOrBefore(range[1]))) { + boolean skip = skipVersionRanges.stream().anyMatch(v -> v.contains(version)); + if (false == skip) { itr.remove(); } } @@ -441,7 +442,7 @@ public void select(Iterable nodes) { @Override public String toString() { - return "version between [" + range[0] + "] and [" + range[1] + "]"; + return "version ranges "+skipVersionRanges; } }; } diff --git a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/SkipSection.java b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/SkipSection.java index e487f8e74da3..81eb47089201 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/SkipSection.java +++ b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/SkipSection.java @@ -27,6 +27,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; /** @@ -98,33 +99,30 @@ public static SkipSection parse(XContentParser parser) throws IOException { public static final SkipSection EMPTY = new SkipSection(); - private final Version lowerVersion; - private final Version upperVersion; + private final List versionRanges; private final List features; private final String reason; private SkipSection() { - this.lowerVersion = null; - this.upperVersion = null; + this.versionRanges = new ArrayList<>(); this.features = new ArrayList<>(); this.reason = null; } public SkipSection(String versionRange, List features, String reason) { assert features != null; - Version[] versions = parseVersionRange(versionRange); - this.lowerVersion = versions[0]; - this.upperVersion = versions[1]; + this.versionRanges = parseVersionRanges(versionRange); + assert versionRanges.isEmpty() == false; this.features = features; this.reason = reason; } public Version getLowerVersion() { - return lowerVersion; + return versionRanges.get(0).getLower(); } public Version getUpperVersion() { - return upperVersion; + return versionRanges.get(versionRanges.size() - 1).getUpper(); } public List getFeatures() { @@ -139,10 +137,8 @@ public boolean skip(Version currentVersion) { if (isEmpty()) { return false; } - boolean skip = lowerVersion != null && upperVersion != null && currentVersion.onOrAfter(lowerVersion) - && currentVersion.onOrBefore(upperVersion); - skip |= Features.areAllSupported(features) == false; - return skip; + boolean skip = versionRanges.stream().anyMatch(range -> range.contains(currentVersion)); + return skip || Features.areAllSupported(features) == false; } public boolean isVersionCheck() { @@ -153,24 +149,30 @@ public boolean isEmpty() { return EMPTY.equals(this); } - static Version[] parseVersionRange(String versionRange) { - if (versionRange == null) { - return new Version[] { null, null }; + static List parseVersionRanges(String rawRanges) { + if (rawRanges == null) { + return Collections.singletonList(new VersionRange(null, null)); } - if (versionRange.trim().equals("all")) { - return new Version[]{VersionUtils.getFirstVersion(), Version.CURRENT}; - } - String[] skipVersions = versionRange.split("-"); - if (skipVersions.length > 2) { - throw new IllegalArgumentException("version range malformed: " + versionRange); + if (rawRanges.trim().equals("all")) { + return Collections.singletonList(new VersionRange(VersionUtils.getFirstVersion(), Version.CURRENT)); } + String[] ranges = rawRanges.split(","); + List versionRanges = new ArrayList<>(); + for (String rawRange : ranges) { + String[] skipVersions = rawRange.split("-", -1); + if (skipVersions.length > 2) { + throw new IllegalArgumentException("version range malformed: " + rawRanges); + } - String lower = skipVersions[0].trim(); - String upper = skipVersions[1].trim(); - return new Version[] { - lower.isEmpty() ? VersionUtils.getFirstVersion() : Version.fromString(lower), - upper.isEmpty() ? Version.CURRENT : Version.fromString(upper) - }; + String lower = skipVersions[0].trim(); + String upper = skipVersions[1].trim(); + VersionRange versionRange = new VersionRange( + lower.isEmpty() ? VersionUtils.getFirstVersion() : Version.fromString(lower), + upper.isEmpty() ? Version.CURRENT : Version.fromString(upper) + ); + versionRanges.add(versionRange); + } + return versionRanges; } public String getSkipMessage(String description) { diff --git a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/VersionRange.java b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/VersionRange.java new file mode 100644 index 000000000000..f1b1df2a1a16 --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/section/VersionRange.java @@ -0,0 +1,49 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.test.rest.yaml.section; + +import org.elasticsearch.Version; + +public class VersionRange { + private final Version lower; + private final Version upper; + + public VersionRange(Version lower, Version upper) { + this.lower = lower; + this.upper = upper; + } + + public Version getLower() { + return lower; + } + + public Version getUpper() { + return upper; + } + + public boolean contains(Version currentVersion) { + return lower != null && upper != null && currentVersion.onOrAfter(lower) + && currentVersion.onOrBefore(upper); + } + + @Override + public String toString() { + return "[" + lower + " - " + upper + "]"; + } +} diff --git a/test/framework/src/test/java/org/elasticsearch/test/rest/yaml/section/SkipSectionTests.java b/test/framework/src/test/java/org/elasticsearch/test/rest/yaml/section/SkipSectionTests.java index e92ef2ce1357..45273912f1d5 100644 --- a/test/framework/src/test/java/org/elasticsearch/test/rest/yaml/section/SkipSectionTests.java +++ b/test/framework/src/test/java/org/elasticsearch/test/rest/yaml/section/SkipSectionTests.java @@ -33,6 +33,27 @@ public class SkipSectionTests extends AbstractClientYamlTestFragmentParserTestCase { + public void testSkipMultiRange() { + SkipSection section = new SkipSection("6.0.0 - 6.1.0, 7.1.0 - 7.5.0", + Collections.emptyList() , "foobar"); + + assertFalse(section.skip(Version.CURRENT)); + assertFalse(section.skip(Version.fromString("6.2.0"))); + assertFalse(section.skip(Version.fromString("7.0.0"))); + assertFalse(section.skip(Version.fromString("7.6.0"))); + + assertTrue(section.skip(Version.fromString("6.0.0"))); + assertTrue(section.skip(Version.fromString("6.1.0"))); + assertTrue(section.skip(Version.fromString("7.1.0"))); + assertTrue(section.skip(Version.fromString("7.5.0"))); + + section = new SkipSection("- 7.1.0, 7.2.0 - 7.5.0, 8.0.0 -", + Collections.emptyList() , "foobar"); + assertTrue(section.skip(Version.fromString("7.0.0"))); + assertTrue(section.skip(Version.fromString("7.3.0"))); + assertTrue(section.skip(Version.fromString("8.0.0"))); + } + public void testSkip() { SkipSection section = new SkipSection("6.0.0 - 6.1.0", randomBoolean() ? Collections.emptyList() : Collections.singletonList("warnings"), "foobar"); From 0062d5f301a49b0a0c076acfaf2da57753739376 Mon Sep 17 00:00:00 2001 From: James Rodewig Date: Tue, 10 Dec 2019 09:30:04 -0500 Subject: [PATCH 08/22] [DOCS] Remove shadow replica reference (#50029) Removes a reference to shadow replicas from the cat shards API docs and a comment in cluster/routing/UnassignedInfo.java. Shadow replicas were removed with #23906. --- docs/reference/cat/shards.asciidoc | 2 +- .../java/org/elasticsearch/cluster/routing/UnassignedInfo.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/cat/shards.asciidoc b/docs/reference/cat/shards.asciidoc index 61b1c869f042..b26472812f51 100644 --- a/docs/reference/cat/shards.asciidoc +++ b/docs/reference/cat/shards.asciidoc @@ -257,7 +257,7 @@ Reason the shard is unassigned. Returned values are: * `NEW_INDEX_RESTORED`: Unassigned as a result of restoring into a new index. * `NODE_LEFT`: Unassigned as a result of the node hosting it leaving the cluster. * `REALLOCATED_REPLICA`: A better replica location is identified and causes the existing replica allocation to be cancelled. -* `REINITIALIZED`: When a shard moves from started back to initializing, for example, with shadow replicas. +* `REINITIALIZED`: When a shard moves from started back to initializing. * `REPLICA_ADDED`: Unassigned as a result of explicit addition of a replica. * `REROUTE_CANCELLED`: Unassigned as a result of explicit cancel reroute command. diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java b/server/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java index 42b3fde5e0c9..9c862c7a0005 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/UnassignedInfo.java @@ -104,7 +104,7 @@ public enum Reason { */ REROUTE_CANCELLED, /** - * When a shard moves from started back to initializing, for example, during shadow replica + * When a shard moves from started back to initializing. */ REINITIALIZED, /** From 3e6dc03de6af97a825e3f1a2d18328c5d7340622 Mon Sep 17 00:00:00 2001 From: Lisa Cawley Date: Tue, 10 Dec 2019 08:03:43 -0800 Subject: [PATCH 09/22] [DOCS] Removes realm type security setting (#50001) --- docs/reference/settings/security-settings.asciidoc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/reference/settings/security-settings.asciidoc b/docs/reference/settings/security-settings.asciidoc index 6fb5084b94a9..e5fc39ea9036 100644 --- a/docs/reference/settings/security-settings.asciidoc +++ b/docs/reference/settings/security-settings.asciidoc @@ -188,7 +188,7 @@ namespace in `elasticsearch.yml`. For example: ---------------------------------------- xpack.security.authc.realms: - native.realm1: + native.realm1: <1> order: 0 ... @@ -201,6 +201,9 @@ xpack.security.authc.realms: ... ... ---------------------------------------- +<1> Specifies the type of realm (for example, `native`, `ldap`, +`active_directory`, `pki`, `file`, `kerberos`, `saml`) and the realm name. This +information is required. The valid settings vary depending on the realm type. For more information, see <>. @@ -209,9 +212,6 @@ information, see <>. [[ref-realm-settings]] ===== Settings valid for all realms -`type`:: -The type of the realm: `native`, `ldap`, `active_directory`, `pki`, or `file`. Required. - `order`:: The priority of the realm within the realm chain. Realms with a lower order are consulted first. Although not required, use of this setting is strongly From 1329acc094c07a9ba39c7af65b29172fef7052ef Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 10 Dec 2019 17:09:36 +0100 Subject: [PATCH 10/22] Upgrade to lucene 8.4.0-snapshot-662c455. (#50016) Lucene 8.4 is about to be released so we should check it doesn't cause problems with Elasticsearch. --- buildSrc/version.properties | 2 +- .../reference/mapping/params/normalizer.asciidoc | 6 +++--- docs/reference/search/explain.asciidoc | 8 ++++---- docs/reference/search/request-body.asciidoc | 4 ++-- .../search/request/highlighting.asciidoc | 8 ++++---- .../reference/search/request/inner-hits.asciidoc | 8 ++++---- docs/reference/search/search.asciidoc | 4 ++-- docs/reference/search/uri-request.asciidoc | 4 ++-- ...e-expressions-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...pressions-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - ...analyzers-icu-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...yzers-icu-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - ...zers-kuromoji-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...-kuromoji-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - ...nalyzers-nori-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...zers-nori-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - ...zers-phonetic-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...-phonetic-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - ...yzers-smartcn-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...s-smartcn-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - ...yzers-stempel-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...s-stempel-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - ...rs-morfologik-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...orfologik-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - ...lyzers-common-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...rs-common-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - ...ckward-codecs-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...rd-codecs-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - .../lucene-core-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...cene-core-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - ...cene-grouping-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...-grouping-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - ...e-highlighter-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...ghlighter-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - .../lucene-join-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...cene-join-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - ...lucene-memory-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...ne-memory-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - .../lucene-misc-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...cene-misc-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - ...ucene-queries-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...e-queries-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - ...e-queryparser-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...eryparser-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - ...ucene-sandbox-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...e-sandbox-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - ...ucene-spatial-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...e-spatial-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - ...patial-extras-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...al-extras-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - ...ene-spatial3d-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...spatial3d-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - ...ucene-suggest-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...e-suggest-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - .../uhighlight/CustomUnifiedHighlighter.java | 3 +-- .../org/elasticsearch/common/lucene/Lucene.java | 2 +- .../elasticsearch/index/codec/CodecService.java | 6 +++--- .../codec/PerFieldMappingPostingFormatCodec.java | 4 ++-- .../index/mapper/CompletionFieldMapper.java | 4 ++-- .../elasticsearch/indices/IndicesQueryCache.java | 6 +++--- .../elasticsearch/index/codec/CodecTests.java | 16 ++++++++-------- .../lucene-core-8.4.0-snapshot-662c455.jar.sha1 | 1 + ...cene-core-8.4.0-snapshot-e648d601efb.jar.sha1 | 1 - 63 files changed, 66 insertions(+), 67 deletions(-) create mode 100644 modules/lang-expression/licenses/lucene-expressions-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 modules/lang-expression/licenses/lucene-expressions-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 plugins/analysis-icu/licenses/lucene-analyzers-icu-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 plugins/analysis-icu/licenses/lucene-analyzers-icu-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 plugins/analysis-nori/licenses/lucene-analyzers-nori-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 plugins/analysis-nori/licenses/lucene-analyzers-nori-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 plugins/analysis-stempel/licenses/lucene-analyzers-stempel-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 plugins/analysis-stempel/licenses/lucene-analyzers-stempel-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 server/licenses/lucene-analyzers-common-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 server/licenses/lucene-analyzers-common-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 server/licenses/lucene-backward-codecs-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 server/licenses/lucene-backward-codecs-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 server/licenses/lucene-core-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 server/licenses/lucene-core-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 server/licenses/lucene-grouping-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 server/licenses/lucene-grouping-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 server/licenses/lucene-highlighter-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 server/licenses/lucene-highlighter-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 server/licenses/lucene-join-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 server/licenses/lucene-join-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 server/licenses/lucene-memory-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 server/licenses/lucene-memory-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 server/licenses/lucene-misc-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 server/licenses/lucene-misc-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 server/licenses/lucene-queries-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 server/licenses/lucene-queries-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 server/licenses/lucene-queryparser-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 server/licenses/lucene-queryparser-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 server/licenses/lucene-sandbox-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 server/licenses/lucene-sandbox-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 server/licenses/lucene-spatial-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 server/licenses/lucene-spatial-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 server/licenses/lucene-spatial-extras-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 server/licenses/lucene-spatial-extras-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 server/licenses/lucene-spatial3d-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 server/licenses/lucene-spatial3d-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 server/licenses/lucene-suggest-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 server/licenses/lucene-suggest-8.4.0-snapshot-e648d601efb.jar.sha1 create mode 100644 x-pack/plugin/sql/sql-action/licenses/lucene-core-8.4.0-snapshot-662c455.jar.sha1 delete mode 100644 x-pack/plugin/sql/sql-action/licenses/lucene-core-8.4.0-snapshot-e648d601efb.jar.sha1 diff --git a/buildSrc/version.properties b/buildSrc/version.properties index 6c7d6798a65c..ad486276f082 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -1,5 +1,5 @@ elasticsearch = 8.0.0 -lucene = 8.4.0-snapshot-e648d601efb +lucene = 8.4.0-snapshot-662c455 bundled_jdk_vendor = adoptopenjdk bundled_jdk = 13.0.1+9 diff --git a/docs/reference/mapping/params/normalizer.asciidoc b/docs/reference/mapping/params/normalizer.asciidoc index 1e7e6870c302..b218d311c720 100644 --- a/docs/reference/mapping/params/normalizer.asciidoc +++ b/docs/reference/mapping/params/normalizer.asciidoc @@ -90,12 +90,12 @@ both index and query time. "value": 2, "relation": "eq" }, - "max_score": 0.47000363, + "max_score": 0.4700036, "hits": [ { "_index": "index", "_id": "1", - "_score": 0.47000363, + "_score": 0.4700036, "_source": { "foo": "BÀR" } @@ -103,7 +103,7 @@ both index and query time. { "_index": "index", "_id": "2", - "_score": 0.47000363, + "_score": 0.4700036, "_source": { "foo": "bar" } diff --git a/docs/reference/search/explain.asciidoc b/docs/reference/search/explain.asciidoc index a9d431e70228..91654f32adcc 100644 --- a/docs/reference/search/explain.asciidoc +++ b/docs/reference/search/explain.asciidoc @@ -106,12 +106,12 @@ The API returns the following response: "_id":"0", "matched":true, "explanation":{ - "value":1.6943597, + "value":1.6943598, "description":"weight(message:elasticsearch in 0) [PerFieldSimilarity], result of:", "details":[ { - "value":1.6943597, - "description":"score(freq=1.0), product of:", + "value":1.6943598, + "description":"score(freq=1.0), computed as boost * idf * tf from:", "details":[ { "value":2.2, @@ -135,7 +135,7 @@ The API returns the following response: ] }, { - "value":0.5555555, + "value":0.5555556, "description":"tf, computed as freq / (freq + k1 * (1 - b + b * dl / avgdl)) from:", "details":[ { diff --git a/docs/reference/search/request-body.asciidoc b/docs/reference/search/request-body.asciidoc index d236a83c8eac..c3a9fe71e16b 100644 --- a/docs/reference/search/request-body.asciidoc +++ b/docs/reference/search/request-body.asciidoc @@ -118,12 +118,12 @@ The API returns the following response: "value": 1, "relation": "eq" }, - "max_score": 1.3862944, + "max_score": 1.3862942, "hits" : [ { "_index" : "twitter", "_id" : "0", - "_score": 1.3862944, + "_score": 1.3862942, "_source" : { "user" : "kimchy", "message": "trying out Elasticsearch", diff --git a/docs/reference/search/request/highlighting.asciidoc b/docs/reference/search/request/highlighting.asciidoc index e8171d43b17f..cb9b84ad3378 100644 --- a/docs/reference/search/request/highlighting.asciidoc +++ b/docs/reference/search/request/highlighting.asciidoc @@ -840,12 +840,12 @@ Response: "value": 1, "relation": "eq" }, - "max_score": 1.601195, + "max_score": 1.6011951, "hits": [ { "_index": "twitter", "_id": "1", - "_score": 1.601195, + "_score": 1.6011951, "_source": { "user": "test", "message": "some message with the number 1", @@ -897,12 +897,12 @@ Response: "value": 1, "relation": "eq" }, - "max_score": 1.601195, + "max_score": 1.6011951, "hits": [ { "_index": "twitter", "_id": "1", - "_score": 1.601195, + "_score": 1.6011951, "_source": { "user": "test", "message": "some message with the number 1", diff --git a/docs/reference/search/request/inner-hits.asciidoc b/docs/reference/search/request/inner-hits.asciidoc index b356c2cfc2d7..53ae303e484a 100644 --- a/docs/reference/search/request/inner-hits.asciidoc +++ b/docs/reference/search/request/inner-hits.asciidoc @@ -379,12 +379,12 @@ Which would look like: "value": 1, "relation": "eq" }, - "max_score": 0.6931472, + "max_score": 0.6931471, "hits": [ { "_index": "test", "_id": "1", - "_score": 0.6931472, + "_score": 0.6931471, "_source": ..., "inner_hits": { "comments.votes": { <1> @@ -393,7 +393,7 @@ Which would look like: "value": 1, "relation": "eq" }, - "max_score": 0.6931472, + "max_score": 0.6931471, "hits": [ { "_index": "test", @@ -406,7 +406,7 @@ Which would look like: "offset": 0 } }, - "_score": 0.6931472, + "_score": 0.6931471, "_source": { "value": 1, "voter": "kimchy" diff --git a/docs/reference/search/search.asciidoc b/docs/reference/search/search.asciidoc index 85c97f6c74d5..ccec2345a0e5 100644 --- a/docs/reference/search/search.asciidoc +++ b/docs/reference/search/search.asciidoc @@ -360,12 +360,12 @@ The API returns the following response: "value" : 1, "relation" : "eq" }, - "max_score" : 1.3862944, + "max_score" : 1.3862942, "hits" : [ { "_index" : "twitter", "_id" : "0", - "_score" : 1.3862944, + "_score" : 1.3862942, "_source" : { "date" : "2009-11-15T14:12:12", "likes" : 0, diff --git a/docs/reference/search/uri-request.asciidoc b/docs/reference/search/uri-request.asciidoc index ff234f415a3d..695c4a6ada11 100644 --- a/docs/reference/search/uri-request.asciidoc +++ b/docs/reference/search/uri-request.asciidoc @@ -134,12 +134,12 @@ The API returns the following response: "value": 1, "relation": "eq" }, - "max_score": 1.3862944, + "max_score": 1.3862942, "hits" : [ { "_index" : "twitter", "_id" : "0", - "_score": 1.3862944, + "_score": 1.3862942, "_source" : { "user" : "kimchy", "date" : "2009-11-15T14:12:12", diff --git a/modules/lang-expression/licenses/lucene-expressions-8.4.0-snapshot-662c455.jar.sha1 b/modules/lang-expression/licenses/lucene-expressions-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..1c4c5ce2b62d --- /dev/null +++ b/modules/lang-expression/licenses/lucene-expressions-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +4041db9db7c394584571b45812734732912ef8e2 \ No newline at end of file diff --git a/modules/lang-expression/licenses/lucene-expressions-8.4.0-snapshot-e648d601efb.jar.sha1 b/modules/lang-expression/licenses/lucene-expressions-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 7a75661f63f6..000000000000 --- a/modules/lang-expression/licenses/lucene-expressions-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -43b9178f582373f4fcee61837404c0cc8636043e \ No newline at end of file diff --git a/plugins/analysis-icu/licenses/lucene-analyzers-icu-8.4.0-snapshot-662c455.jar.sha1 b/plugins/analysis-icu/licenses/lucene-analyzers-icu-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..0fc96bc500ef --- /dev/null +++ b/plugins/analysis-icu/licenses/lucene-analyzers-icu-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +d5bddd6b7660439e29bbce26ded283931c756d75 \ No newline at end of file diff --git a/plugins/analysis-icu/licenses/lucene-analyzers-icu-8.4.0-snapshot-e648d601efb.jar.sha1 b/plugins/analysis-icu/licenses/lucene-analyzers-icu-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 2765cfafb052..000000000000 --- a/plugins/analysis-icu/licenses/lucene-analyzers-icu-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -8ee342fa6e6306e56b583251639a661250fada46 \ No newline at end of file diff --git a/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-8.4.0-snapshot-662c455.jar.sha1 b/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..388bc9748b7f --- /dev/null +++ b/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +4303858c346c51bbbc68c32eb25f7f372b09331c \ No newline at end of file diff --git a/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-8.4.0-snapshot-e648d601efb.jar.sha1 b/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index f653bf5c3b5d..000000000000 --- a/plugins/analysis-kuromoji/licenses/lucene-analyzers-kuromoji-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -7e31f2a38d1434eb50781efc65b0e028f08d7821 \ No newline at end of file diff --git a/plugins/analysis-nori/licenses/lucene-analyzers-nori-8.4.0-snapshot-662c455.jar.sha1 b/plugins/analysis-nori/licenses/lucene-analyzers-nori-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..07ff7fd907a2 --- /dev/null +++ b/plugins/analysis-nori/licenses/lucene-analyzers-nori-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +b1a9182ed1b92a121c1587fe9710aa7a41f3f77a \ No newline at end of file diff --git a/plugins/analysis-nori/licenses/lucene-analyzers-nori-8.4.0-snapshot-e648d601efb.jar.sha1 b/plugins/analysis-nori/licenses/lucene-analyzers-nori-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 2c3ee0313a9c..000000000000 --- a/plugins/analysis-nori/licenses/lucene-analyzers-nori-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -9079d81a8ea2c7190ef09ca06a987d1cab2fdf17 \ No newline at end of file diff --git a/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-8.4.0-snapshot-662c455.jar.sha1 b/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..95e603ec1888 --- /dev/null +++ b/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +4df747b25286baecf5e790bf76bc40038c059691 \ No newline at end of file diff --git a/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-8.4.0-snapshot-e648d601efb.jar.sha1 b/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 5de2626b6ad2..000000000000 --- a/plugins/analysis-phonetic/licenses/lucene-analyzers-phonetic-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -f253f59d4e8bb6e55eb307b011ddb81ba0ebab92 \ No newline at end of file diff --git a/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-8.4.0-snapshot-662c455.jar.sha1 b/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..4eaf91f30839 --- /dev/null +++ b/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +88d3f8f9134b95884f3b80280b09aa2513b71297 \ No newline at end of file diff --git a/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-8.4.0-snapshot-e648d601efb.jar.sha1 b/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index fcb579806bfe..000000000000 --- a/plugins/analysis-smartcn/licenses/lucene-analyzers-smartcn-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -36547378493e6e84f63dc744df8d414cb2add1a4 \ No newline at end of file diff --git a/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-8.4.0-snapshot-662c455.jar.sha1 b/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..e28b8d87cd55 --- /dev/null +++ b/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +9ddccf575ee03a1329c8d1eb2e4ee7a6e3f3f56f \ No newline at end of file diff --git a/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-8.4.0-snapshot-e648d601efb.jar.sha1 b/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index d26f99ab24e7..000000000000 --- a/plugins/analysis-stempel/licenses/lucene-analyzers-stempel-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -8b15a376efa7d4289b697144f34a819a9f8772f1 \ No newline at end of file diff --git a/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-8.4.0-snapshot-662c455.jar.sha1 b/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..1b8ec8c5831c --- /dev/null +++ b/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +e115e562a42c12a3292fb138607855c1fdfb0772 \ No newline at end of file diff --git a/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-8.4.0-snapshot-e648d601efb.jar.sha1 b/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 43a7650c70d7..000000000000 --- a/plugins/analysis-ukrainian/licenses/lucene-analyzers-morfologik-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -d1bc4170e6981ca9af71d7a4ce46a3feb2f7b613 \ No newline at end of file diff --git a/server/licenses/lucene-analyzers-common-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-analyzers-common-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..d6f8049f7b1e --- /dev/null +++ b/server/licenses/lucene-analyzers-common-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +061fb94ab616492721f8868dcaec3fbc989733be \ No newline at end of file diff --git a/server/licenses/lucene-analyzers-common-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-analyzers-common-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index c2ec1128c174..000000000000 --- a/server/licenses/lucene-analyzers-common-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -1cb225781b19e758d216987e363b77fa4b041174 \ No newline at end of file diff --git a/server/licenses/lucene-backward-codecs-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-backward-codecs-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..243c4420beab --- /dev/null +++ b/server/licenses/lucene-backward-codecs-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +503f3d516889a99e1c0e2dbdba7bf9cc9900c54c \ No newline at end of file diff --git a/server/licenses/lucene-backward-codecs-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-backward-codecs-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index b6486fb3eeba..000000000000 --- a/server/licenses/lucene-backward-codecs-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -cbbf849e24ef0cc61312579acf6d6c5b72c99cf5 \ No newline at end of file diff --git a/server/licenses/lucene-core-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-core-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..d1657fccc5ee --- /dev/null +++ b/server/licenses/lucene-core-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +8ca36adea0a904ec725d57f509a62652a53ecff8 \ No newline at end of file diff --git a/server/licenses/lucene-core-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-core-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 4b736046f3ad..000000000000 --- a/server/licenses/lucene-core-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -aa74590851b6fcf536976f75448be52f6ca18a4a \ No newline at end of file diff --git a/server/licenses/lucene-grouping-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-grouping-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..f1f0684d9b38 --- /dev/null +++ b/server/licenses/lucene-grouping-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +f176fdcf8fc574f4cb1c549aaa4da0301afd34ba \ No newline at end of file diff --git a/server/licenses/lucene-grouping-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-grouping-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 97a3c7b927b8..000000000000 --- a/server/licenses/lucene-grouping-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -1bd113010c183168d79fbc10a6b590fdacc3fa35 \ No newline at end of file diff --git a/server/licenses/lucene-highlighter-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-highlighter-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..a9ad6fb95cb8 --- /dev/null +++ b/server/licenses/lucene-highlighter-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +db5ea7b647309e5d29fa92bcbb6b11286d11436d \ No newline at end of file diff --git a/server/licenses/lucene-highlighter-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-highlighter-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index f2dd654d8d64..000000000000 --- a/server/licenses/lucene-highlighter-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -4e44a435e14d12113ca9193182a302677fda155e \ No newline at end of file diff --git a/server/licenses/lucene-join-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-join-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..6ef1d079f63f --- /dev/null +++ b/server/licenses/lucene-join-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +36329bc2ea6a5640d4128206221456656de7bbe2 \ No newline at end of file diff --git a/server/licenses/lucene-join-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-join-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 9e8d72cc13fc..000000000000 --- a/server/licenses/lucene-join-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -eb8eacd015ef81ef2055ada357a92c9751308ef1 \ No newline at end of file diff --git a/server/licenses/lucene-memory-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-memory-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..eeb424851022 --- /dev/null +++ b/server/licenses/lucene-memory-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +083f492781b3d2c1d470bd1439c875ebf74a14eb \ No newline at end of file diff --git a/server/licenses/lucene-memory-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-memory-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index e6048ffd9122..000000000000 --- a/server/licenses/lucene-memory-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -4dc565203bb1eab0222c52215891e207e7032209 \ No newline at end of file diff --git a/server/licenses/lucene-misc-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-misc-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..6f5d479c76d6 --- /dev/null +++ b/server/licenses/lucene-misc-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +9cd5ea7bc08d93053ca993bd6fc1c9cd0a1b91fd \ No newline at end of file diff --git a/server/licenses/lucene-misc-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-misc-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 480dcc632907..000000000000 --- a/server/licenses/lucene-misc-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -ef596e6d2a7ac9c7dfc6196dad75dc719c81ce85 \ No newline at end of file diff --git a/server/licenses/lucene-queries-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-queries-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..30733a5a5776 --- /dev/null +++ b/server/licenses/lucene-queries-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +89e39f65d1c42b5849ccf3a8e6cc9b3b277c08a6 \ No newline at end of file diff --git a/server/licenses/lucene-queries-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-queries-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 2524672e062b..000000000000 --- a/server/licenses/lucene-queries-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -b0c963e68dd71444f09336258c8f63425514426a \ No newline at end of file diff --git a/server/licenses/lucene-queryparser-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-queryparser-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..98b065176a41 --- /dev/null +++ b/server/licenses/lucene-queryparser-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +651f6a0075ee30b814c8b56020d95155424c0e67 \ No newline at end of file diff --git a/server/licenses/lucene-queryparser-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-queryparser-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 4ab7a7fe6f64..000000000000 --- a/server/licenses/lucene-queryparser-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -bfab3e9b0467662a8ff969da215dc4a999b73076 \ No newline at end of file diff --git a/server/licenses/lucene-sandbox-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-sandbox-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..484ce6b5c00f --- /dev/null +++ b/server/licenses/lucene-sandbox-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +935968488cc2bbcd3ced9c254f690e7c90447d9e \ No newline at end of file diff --git a/server/licenses/lucene-sandbox-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-sandbox-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 9361e9252f21..000000000000 --- a/server/licenses/lucene-sandbox-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -dadfc90e4cd032f8a4db5cc1e0bdddecea635edb \ No newline at end of file diff --git a/server/licenses/lucene-spatial-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-spatial-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..1bb42417cb14 --- /dev/null +++ b/server/licenses/lucene-spatial-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +0bbdd0002d8d87e54b5caff6c77a1627bf449d38 \ No newline at end of file diff --git a/server/licenses/lucene-spatial-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-spatial-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index ce5a13ec8d6b..000000000000 --- a/server/licenses/lucene-spatial-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -e72dd79d30781e4d05bc8397ae61d0b51d7ad522 \ No newline at end of file diff --git a/server/licenses/lucene-spatial-extras-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-spatial-extras-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..2bdbd889b445 --- /dev/null +++ b/server/licenses/lucene-spatial-extras-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +255b547571dcec118ff1a0560bb16e259f96b76a \ No newline at end of file diff --git a/server/licenses/lucene-spatial-extras-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-spatial-extras-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 4530b17e84e2..000000000000 --- a/server/licenses/lucene-spatial-extras-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -e6b6dbd0526287f25d98d7fe354d5e290c875b8a \ No newline at end of file diff --git a/server/licenses/lucene-spatial3d-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-spatial3d-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..e7036243119a --- /dev/null +++ b/server/licenses/lucene-spatial3d-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +739af6d9876f6aa7f2a3d46fa3f236a5d6ee3653 \ No newline at end of file diff --git a/server/licenses/lucene-spatial3d-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-spatial3d-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index a96977cf1340..000000000000 --- a/server/licenses/lucene-spatial3d-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -6351edfc6dde2aefd8f6d8ef33ae5a6e08f88321 \ No newline at end of file diff --git a/server/licenses/lucene-suggest-8.4.0-snapshot-662c455.jar.sha1 b/server/licenses/lucene-suggest-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..72c92c101b05 --- /dev/null +++ b/server/licenses/lucene-suggest-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +20fa11a541a7ca3a50caa443a9abf0276b1194ea \ No newline at end of file diff --git a/server/licenses/lucene-suggest-8.4.0-snapshot-e648d601efb.jar.sha1 b/server/licenses/lucene-suggest-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 090cf9ee734c..000000000000 --- a/server/licenses/lucene-suggest-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -921dd4ab493b9d70a0b1bf7b0fe8a6790b7e8036 \ No newline at end of file diff --git a/server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java b/server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java index 2d35de522b5f..db79122fa3da 100644 --- a/server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java +++ b/server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java @@ -31,7 +31,6 @@ import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; @@ -136,7 +135,7 @@ protected FieldHighlighter getFieldHighlighter(String field, Query query, Set highlightFlags = getFlags(field); PhraseHelper phraseHelper = getPhraseHelper(field, query, highlightFlags); - CharacterRunAutomaton[] automata = getAutomata(field, query, highlightFlags); + LabelledCharArrayMatcher[] automata = getAutomata(field, query, highlightFlags); UHComponents components = new UHComponents(field, fieldMatcher, query, terms, phraseHelper, automata, false , highlightFlags); OffsetSource offsetSource = getOptimizedOffsetSource(components); BreakIterator breakIterator = new SplittingBreakIterator(getBreakIterator(field), diff --git a/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java b/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java index fcab160108b2..ce00c7755205 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java @@ -107,7 +107,7 @@ public class Lucene { public static final String LATEST_DOC_VALUES_FORMAT = "Lucene70"; public static final String LATEST_POSTINGS_FORMAT = "Lucene50"; - public static final String LATEST_CODEC = "Lucene80"; + public static final String LATEST_CODEC = "Lucene84"; static { Deprecated annotation = PostingsFormat.forName(LATEST_POSTINGS_FORMAT).getClass().getAnnotation(Deprecated.class); diff --git a/server/src/main/java/org/elasticsearch/index/codec/CodecService.java b/server/src/main/java/org/elasticsearch/index/codec/CodecService.java index 485c40d5d9bb..0b1c96a6911d 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/CodecService.java +++ b/server/src/main/java/org/elasticsearch/index/codec/CodecService.java @@ -22,7 +22,7 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode; -import org.apache.lucene.codecs.lucene80.Lucene80Codec; +import org.apache.lucene.codecs.lucene84.Lucene84Codec; import org.elasticsearch.common.Nullable; import org.elasticsearch.index.mapper.MapperService; @@ -47,8 +47,8 @@ public class CodecService { public CodecService(@Nullable MapperService mapperService, Logger logger) { final var codecs = new HashMap(); if (mapperService == null) { - codecs.put(DEFAULT_CODEC, new Lucene80Codec()); - codecs.put(BEST_COMPRESSION_CODEC, new Lucene80Codec(Mode.BEST_COMPRESSION)); + codecs.put(DEFAULT_CODEC, new Lucene84Codec()); + codecs.put(BEST_COMPRESSION_CODEC, new Lucene84Codec(Mode.BEST_COMPRESSION)); } else { codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger)); diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java index 4a154abd8ead..ccaa873af279 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMappingPostingFormatCodec.java @@ -23,7 +23,7 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat; -import org.apache.lucene.codecs.lucene80.Lucene80Codec; +import org.apache.lucene.codecs.lucene84.Lucene84Codec; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.index.mapper.CompletionFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; @@ -37,7 +37,7 @@ * per index in real time via the mapping API. If no specific postings format is * configured for a specific field the default postings format is used. */ -public class PerFieldMappingPostingFormatCodec extends Lucene80Codec { +public class PerFieldMappingPostingFormatCodec extends Lucene84Codec { private final Logger logger; private final MapperService mapperService; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java index 5fd06633bcfc..5f6b71d6522c 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java @@ -24,7 +24,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.suggest.document.Completion50PostingsFormat; +import org.apache.lucene.search.suggest.document.Completion84PostingsFormat; import org.apache.lucene.search.suggest.document.CompletionAnalyzer; import org.apache.lucene.search.suggest.document.CompletionQuery; import org.apache.lucene.search.suggest.document.FuzzyCompletionQuery; @@ -265,7 +265,7 @@ public boolean preservePositionIncrements() { */ public static synchronized PostingsFormat postingsFormat() { if (postingsFormat == null) { - postingsFormat = new Completion50PostingsFormat(); + postingsFormat = new Completion84PostingsFormat(); } return postingsFormat; } diff --git a/server/src/main/java/org/elasticsearch/indices/IndicesQueryCache.java b/server/src/main/java/org/elasticsearch/indices/IndicesQueryCache.java index dc054f8b51d3..9183b1a82656 100644 --- a/server/src/main/java/org/elasticsearch/indices/IndicesQueryCache.java +++ b/server/src/main/java/org/elasticsearch/indices/IndicesQueryCache.java @@ -80,7 +80,7 @@ public IndicesQueryCache(Settings settings) { logger.debug("using [node] query cache with size [{}] max filter count [{}]", size, count); if (INDICES_QUERIES_CACHE_ALL_SEGMENTS_SETTING.get(settings)) { - cache = new ElasticsearchLRUQueryCache(count, size.getBytes(), context -> true); + cache = new ElasticsearchLRUQueryCache(count, size.getBytes(), context -> true, 1f); } else { cache = new ElasticsearchLRUQueryCache(count, size.getBytes()); } @@ -250,8 +250,8 @@ public void onClose(ShardId shardId) { private class ElasticsearchLRUQueryCache extends LRUQueryCache { - ElasticsearchLRUQueryCache(int maxSize, long maxRamBytesUsed, Predicate leavesToCache) { - super(maxSize, maxRamBytesUsed, leavesToCache); + ElasticsearchLRUQueryCache(int maxSize, long maxRamBytesUsed, Predicate leavesToCache, float skipFactor) { + super(maxSize, maxRamBytesUsed, leavesToCache, skipFactor); } ElasticsearchLRUQueryCache(int maxSize, long maxRamBytesUsed) { diff --git a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java index fa775a84c72a..dc5b8031a6c5 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java @@ -19,11 +19,16 @@ package org.elasticsearch.index.codec; +import static org.hamcrest.Matchers.instanceOf; + +import java.io.IOException; +import java.util.Collections; + import org.apache.logging.log4j.LogManager; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat; import org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.Mode; -import org.apache.lucene.codecs.lucene80.Lucene80Codec; +import org.apache.lucene.codecs.lucene84.Lucene84Codec; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; @@ -42,19 +47,14 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.IndexSettingsModule; -import java.io.IOException; -import java.util.Collections; - -import static org.hamcrest.Matchers.instanceOf; - @SuppressCodecs("*") // we test against default codec so never get a random one here! public class CodecTests extends ESTestCase { public void testResolveDefaultCodecs() throws Exception { CodecService codecService = createCodecService(); assertThat(codecService.codec("default"), instanceOf(PerFieldMappingPostingFormatCodec.class)); - assertThat(codecService.codec("default"), instanceOf(Lucene80Codec.class)); - assertThat(codecService.codec("Lucene80"), instanceOf(Lucene80Codec.class)); + assertThat(codecService.codec("default"), instanceOf(Lucene84Codec.class)); + assertThat(codecService.codec("Lucene84"), instanceOf(Lucene84Codec.class)); } public void testDefault() throws Exception { diff --git a/x-pack/plugin/sql/sql-action/licenses/lucene-core-8.4.0-snapshot-662c455.jar.sha1 b/x-pack/plugin/sql/sql-action/licenses/lucene-core-8.4.0-snapshot-662c455.jar.sha1 new file mode 100644 index 000000000000..d1657fccc5ee --- /dev/null +++ b/x-pack/plugin/sql/sql-action/licenses/lucene-core-8.4.0-snapshot-662c455.jar.sha1 @@ -0,0 +1 @@ +8ca36adea0a904ec725d57f509a62652a53ecff8 \ No newline at end of file diff --git a/x-pack/plugin/sql/sql-action/licenses/lucene-core-8.4.0-snapshot-e648d601efb.jar.sha1 b/x-pack/plugin/sql/sql-action/licenses/lucene-core-8.4.0-snapshot-e648d601efb.jar.sha1 deleted file mode 100644 index 4b736046f3ad..000000000000 --- a/x-pack/plugin/sql/sql-action/licenses/lucene-core-8.4.0-snapshot-e648d601efb.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -aa74590851b6fcf536976f75448be52f6ca18a4a \ No newline at end of file From 263f5bd6b6a977464eb102af3fb05f389b91c2a8 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Tue, 10 Dec 2019 17:32:42 +0100 Subject: [PATCH 11/22] =?UTF-8?q?[Docs]=C2=A0Fix=20typo=20in=20function-sc?= =?UTF-8?q?ore-query.asciidoc=20(#50030)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/reference/query-dsl/function-score-query.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/query-dsl/function-score-query.asciidoc b/docs/reference/query-dsl/function-score-query.asciidoc index 371ba5e63814..c4924ebd3672 100644 --- a/docs/reference/query-dsl/function-score-query.asciidoc +++ b/docs/reference/query-dsl/function-score-query.asciidoc @@ -415,7 +415,7 @@ GET /_search `offset`:: If an `offset` is defined, the decay function will only compute the - decay function for documents with a distance greater that the defined + decay function for documents with a distance greater than the defined `offset`. The default is 0. `decay`:: From f544a27c43be5761cc8b8364b8bceadafb8212dc Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Tue, 10 Dec 2019 14:18:33 -0500 Subject: [PATCH 12/22] muting test UUIDTests.testCompression (#50050) --- server/src/test/java/org/elasticsearch/common/UUIDTests.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/test/java/org/elasticsearch/common/UUIDTests.java b/server/src/test/java/org/elasticsearch/common/UUIDTests.java index dcc440acbcd1..1d23570064fe 100644 --- a/server/src/test/java/org/elasticsearch/common/UUIDTests.java +++ b/server/src/test/java/org/elasticsearch/common/UUIDTests.java @@ -116,6 +116,7 @@ public void testUUIDThreaded(UUIDGenerator uuidSource) { assertEquals(count*uuids, globalSet.size()); } + @AwaitsFix(bugUrl="https://github.com/elastic/elasticsearch/issues/50048") public void testCompression() throws Exception { Logger logger = LogManager.getLogger(UUIDTests.class); // Low number so that the test runs quickly, but the results are more interesting with larger numbers From de4f701a19d451ead345c17f6581e96d61c8c108 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20Krawaczy=C5=84ski?= Date: Tue, 10 Dec 2019 20:23:14 +0100 Subject: [PATCH 13/22] [DOCS] Document `index.queries.cache.enabled` as a static setting (#49886) --- docs/reference/modules/indices/query_cache.asciidoc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/reference/modules/indices/query_cache.asciidoc b/docs/reference/modules/indices/query_cache.asciidoc index aaa1ab174284..b61c2a6eee04 100644 --- a/docs/reference/modules/indices/query_cache.asciidoc +++ b/docs/reference/modules/indices/query_cache.asciidoc @@ -18,7 +18,8 @@ the cluster: either a percentage value, like `5%`, or an exact value, like `512mb`. The following setting is an _index_ setting that can be configured on a -per-index basis: +per-index basis. Can only be set at index creation time or on a +<>: `index.queries.cache.enabled`:: From e800d7af3396d4af33c3371fa1c72b36b7e18556 Mon Sep 17 00:00:00 2001 From: Jack Conradson Date: Tue, 10 Dec 2019 12:55:06 -0800 Subject: [PATCH 14/22] Update Painless AST Catch Node (#50044) This makes two changes to the catch node: 1. Use SDeclaration to replace independent variable usage. 2. Use a DType to set a "minimum" exception type - this allows us to require users to continue using Exception as "minimum" type for catch blocks, but for us to internally catch Error/Throwable. This is a required step to removing custom try/catch blocks from SClass. --- .../elasticsearch/painless/antlr/Walker.java | 8 +++- .../painless/node/DResolvedType.java | 2 +- .../elasticsearch/painless/node/SCatch.java | 38 +++++++++---------- .../painless/node/SDeclaration.java | 2 +- .../painless/node/NodeToStringTests.java | 14 ++++--- 5 files changed, 36 insertions(+), 28 deletions(-) diff --git a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/Walker.java b/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/Walker.java index 53c98f7589ef..9b4c52dad77b 100644 --- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/Walker.java +++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/antlr/Walker.java @@ -109,6 +109,7 @@ import org.elasticsearch.painless.node.AExpression; import org.elasticsearch.painless.node.ANode; import org.elasticsearch.painless.node.AStatement; +import org.elasticsearch.painless.node.DResolvedType; import org.elasticsearch.painless.node.DUnresolvedType; import org.elasticsearch.painless.node.EAssignment; import org.elasticsearch.painless.node.EBinary; @@ -232,6 +233,10 @@ private Location location(ParserRuleContext ctx) { return new Location(sourceName, ctx.getStart().getStartIndex()); } + private Location location(TerminalNode tn) { + return new Location(sourceName, tn.getSymbol().getStartIndex()); + } + @Override public ANode visitSource(SourceContext ctx) { List functions = new ArrayList<>(); @@ -503,7 +508,8 @@ public ANode visitTrap(TrapContext ctx) { String name = ctx.ID().getText(); SBlock block = (SBlock)visit(ctx.block()); - return new SCatch(location(ctx), type, name, block); + return new SCatch(location(ctx), new DResolvedType(location(ctx), Exception.class), + new SDeclaration(location(ctx.TYPE()), new DUnresolvedType(location(ctx.TYPE()), type), name, null), block); } @Override diff --git a/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/DResolvedType.java b/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/DResolvedType.java index 223b39068673..c1917944f260 100644 --- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/DResolvedType.java +++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/DResolvedType.java @@ -76,6 +76,6 @@ public Class getType() { @Override public String toString() { - return " (DResolvedType [" + PainlessLookupUtility.typeToCanonicalTypeName(type) + "])"; + return "(DResolvedType [" + PainlessLookupUtility.typeToCanonicalTypeName(type) + "])"; } } diff --git a/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/SCatch.java b/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/SCatch.java index ae5e421afa18..9fc6dc29fe21 100644 --- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/SCatch.java +++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/SCatch.java @@ -22,10 +22,10 @@ import org.elasticsearch.painless.ClassWriter; import org.elasticsearch.painless.Globals; import org.elasticsearch.painless.Locals; -import org.elasticsearch.painless.Locals.Variable; import org.elasticsearch.painless.Location; import org.elasticsearch.painless.MethodWriter; import org.elasticsearch.painless.ScriptRoot; +import org.elasticsearch.painless.lookup.PainlessLookupUtility; import org.objectweb.asm.Label; import org.objectweb.asm.Opcodes; @@ -37,27 +37,25 @@ */ public final class SCatch extends AStatement { - private final String type; - private final String name; + private final DType baseException; + private final SDeclaration declaration; private final SBlock block; - private Variable variable = null; - Label begin = null; Label end = null; Label exception = null; - public SCatch(Location location, String type, String name, SBlock block) { + public SCatch(Location location, DType baseException, SDeclaration declaration, SBlock block) { super(location); - this.type = Objects.requireNonNull(type); - this.name = Objects.requireNonNull(name); + this.baseException = Objects.requireNonNull(baseException); + this.declaration = Objects.requireNonNull(declaration); this.block = block; } @Override void extractVariables(Set variables) { - variables.add(name); + declaration.extractVariables(variables); if (block != null) { block.extractVariables(variables); @@ -66,18 +64,17 @@ void extractVariables(Set variables) { @Override void analyze(ScriptRoot scriptRoot, Locals locals) { - Class clazz = scriptRoot.getPainlessLookup().canonicalTypeNameToType(this.type); + declaration.analyze(scriptRoot, locals); - if (clazz == null) { - throw createError(new IllegalArgumentException("Not a type [" + this.type + "].")); - } + Class baseType = baseException.resolveType(scriptRoot.getPainlessLookup()).getType(); + Class type = declaration.variable.clazz; - if (!Exception.class.isAssignableFrom(clazz)) { - throw createError(new ClassCastException("Not an exception type [" + this.type + "].")); + if (baseType.isAssignableFrom(type) == false) { + throw createError(new ClassCastException( + "cannot cast from [" + PainlessLookupUtility.typeToCanonicalTypeName(type) + "] " + + "to [" + PainlessLookupUtility.typeToCanonicalTypeName(baseType) + "]")); } - variable = locals.addVariable(location, clazz, name, true); - if (block != null) { block.lastSource = lastSource; block.inLoop = inLoop; @@ -100,7 +97,8 @@ void write(ClassWriter classWriter, MethodWriter methodWriter, Globals globals) Label jump = new Label(); methodWriter.mark(jump); - methodWriter.visitVarInsn(MethodWriter.getType(variable.clazz).getOpcode(Opcodes.ISTORE), variable.getSlot()); + methodWriter.visitVarInsn( + MethodWriter.getType(declaration.variable.clazz).getOpcode(Opcodes.ISTORE), declaration.variable.getSlot()); if (block != null) { block.continu = continu; @@ -108,7 +106,7 @@ void write(ClassWriter classWriter, MethodWriter methodWriter, Globals globals) block.write(classWriter, methodWriter, globals); } - methodWriter.visitTryCatchBlock(begin, end, jump, MethodWriter.getType(variable.clazz).getInternalName()); + methodWriter.visitTryCatchBlock(begin, end, jump, MethodWriter.getType(declaration.variable.clazz).getInternalName()); if (exception != null && (block == null || !block.allEscape)) { methodWriter.goTo(exception); @@ -117,6 +115,6 @@ void write(ClassWriter classWriter, MethodWriter methodWriter, Globals globals) @Override public String toString() { - return singleLineToString(type, name, block); + return singleLineToString(baseException, declaration, block); } } diff --git a/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/SDeclaration.java b/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/SDeclaration.java index e5d8f1e88117..bcc2036aaffd 100644 --- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/SDeclaration.java +++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/node/SDeclaration.java @@ -40,7 +40,7 @@ public final class SDeclaration extends AStatement { private final String name; private AExpression expression; - private Variable variable = null; + Variable variable = null; public SDeclaration(Location location, DType type, String name, AExpression expression) { super(location); diff --git a/modules/lang-painless/src/test/java/org/elasticsearch/painless/node/NodeToStringTests.java b/modules/lang-painless/src/test/java/org/elasticsearch/painless/node/NodeToStringTests.java index 562b6e1e5e90..60310ab0c4cd 100644 --- a/modules/lang-painless/src/test/java/org/elasticsearch/painless/node/NodeToStringTests.java +++ b/modules/lang-painless/src/test/java/org/elasticsearch/painless/node/NodeToStringTests.java @@ -853,7 +853,8 @@ public void testSFunction() { public void testSTryAndSCatch() { assertToString( "(SClass (STry (SBlock (SReturn (ENumeric 1)))\n" - + " (SCatch Exception e (SBlock (SReturn (ENumeric 2))))))", + + " (SCatch (DResolvedType [java.lang.Exception]) (SDeclaration (DUnresolvedType [Exception]) e) " + + "(SBlock (SReturn (ENumeric 2))))))", "try {\n" + " return 1\n" + "} catch (Exception e) {\n" @@ -863,7 +864,8 @@ public void testSTryAndSCatch() { "(SClass (STry (SBlock\n" + " (SDeclBlock (SDeclaration (DUnresolvedType [int]) i (ENumeric 1)))\n" + " (SReturn (ENumeric 1)))\n" - + " (SCatch Exception e (SBlock (SReturn (ENumeric 2))))))", + + " (SCatch (DResolvedType [java.lang.Exception]) (SDeclaration (DUnresolvedType [Exception]) e) " + + "(SBlock (SReturn (ENumeric 2))))))", "try {\n" + " int i = 1;" + " return 1\n" @@ -872,7 +874,7 @@ public void testSTryAndSCatch() { + "}"); assertToString( "(SClass (STry (SBlock (SReturn (ENumeric 1)))\n" - + " (SCatch Exception e (SBlock\n" + + " (SCatch (DResolvedType [java.lang.Exception]) (SDeclaration (DUnresolvedType [Exception]) e) (SBlock\n" + " (SDeclBlock (SDeclaration (DUnresolvedType [int]) i (ENumeric 1)))\n" + " (SReturn (ENumeric 2))))))", "try {\n" @@ -883,8 +885,10 @@ public void testSTryAndSCatch() { + "}"); assertToString( "(SClass (STry (SBlock (SReturn (ENumeric 1)))\n" - + " (SCatch NullPointerException e (SBlock (SReturn (ENumeric 2))))\n" - + " (SCatch Exception e (SBlock (SReturn (ENumeric 3))))))", + + " (SCatch (DResolvedType [java.lang.Exception]) (SDeclaration (DUnresolvedType [NullPointerException]) e) " + + "(SBlock (SReturn (ENumeric 2))))\n" + + " (SCatch (DResolvedType [java.lang.Exception]) (SDeclaration (DUnresolvedType [Exception]) e) " + + "(SBlock (SReturn (ENumeric 3))))))", "try {\n" + " return 1\n" + "} catch (NullPointerException e) {\n" From 133b34c8355639ae0f699a86ffd9f37d19f73bca Mon Sep 17 00:00:00 2001 From: Stuart Cam Date: Wed, 11 Dec 2019 13:23:00 +1100 Subject: [PATCH 15/22] Add the REST API specifications for SLM Status / Start / Stop endpoints. (#49759) Was originally missed in PR #47710 --- .../rest-api-spec/api/slm.get_status.json | 19 +++++++++++++++++++ .../rest-api-spec/api/slm.start.json | 19 +++++++++++++++++++ .../resources/rest-api-spec/api/slm.stop.json | 19 +++++++++++++++++++ 3 files changed, 57 insertions(+) create mode 100644 x-pack/plugin/src/test/resources/rest-api-spec/api/slm.get_status.json create mode 100644 x-pack/plugin/src/test/resources/rest-api-spec/api/slm.start.json create mode 100644 x-pack/plugin/src/test/resources/rest-api-spec/api/slm.stop.json diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/slm.get_status.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/slm.get_status.json new file mode 100644 index 000000000000..163ad5558c3d --- /dev/null +++ b/x-pack/plugin/src/test/resources/rest-api-spec/api/slm.get_status.json @@ -0,0 +1,19 @@ +{ + "slm.get_status":{ + "documentation":{ + "url":"https://www.elastic.co/guide/en/elasticsearch/reference/current/slm-get-status.html" + }, + "stability":"stable", + "url":{ + "paths":[ + { + "path":"/_slm/status", + "methods":[ + "GET" + ] + } + ] + }, + "params":{} + } +} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/slm.start.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/slm.start.json new file mode 100644 index 000000000000..21ae3d509786 --- /dev/null +++ b/x-pack/plugin/src/test/resources/rest-api-spec/api/slm.start.json @@ -0,0 +1,19 @@ +{ + "slm.start":{ + "documentation":{ + "url":"https://www.elastic.co/guide/en/elasticsearch/reference/current/slm-start.html" + }, + "stability":"stable", + "url":{ + "paths":[ + { + "path":"/_slm/start", + "methods":[ + "POST" + ] + } + ] + }, + "params":{} + } +} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/slm.stop.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/slm.stop.json new file mode 100644 index 000000000000..63b74ab9c2f7 --- /dev/null +++ b/x-pack/plugin/src/test/resources/rest-api-spec/api/slm.stop.json @@ -0,0 +1,19 @@ +{ + "slm.stop":{ + "documentation":{ + "url":"https://www.elastic.co/guide/en/elasticsearch/reference/current/slm-stop.html" + }, + "stability":"stable", + "url":{ + "paths":[ + { + "path":"/_slm/stop", + "methods":[ + "POST" + ] + } + ] + }, + "params":{} + } +} From 022e5f5e32cd18c128a980e94cf89c45ba8a4ece Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20Witek?= Date: Wed, 11 Dec 2019 08:50:17 +0100 Subject: [PATCH 16/22] A few improvements to AnalyticsProcessManager class that make the code more readable. (#50026) --- .../dataframe/DataFrameAnalyticsManager.java | 10 +- .../process/AnalyticsProcessManager.java | 150 +++++++++--------- .../process/AnalyticsProcessManagerTests.java | 96 ++++++----- 3 files changed, 134 insertions(+), 122 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/DataFrameAnalyticsManager.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/DataFrameAnalyticsManager.java index 76fc58802794..8e89113be7eb 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/DataFrameAnalyticsManager.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/DataFrameAnalyticsManager.java @@ -233,15 +233,7 @@ private void startAnalytics(DataFrameAnalyticsTask task, DataFrameAnalyticsConfi DataFrameAnalyticsTaskState analyzingState = new DataFrameAnalyticsTaskState(DataFrameAnalyticsState.ANALYZING, task.getAllocationId(), null); task.updatePersistentTaskState(analyzingState, ActionListener.wrap( - updatedTask -> processManager.runJob(task, config, dataExtractorFactory, - error -> { - if (error != null) { - task.updateState(DataFrameAnalyticsState.FAILED, error.getMessage()); - } else { - auditor.info(config.getId(), Messages.DATA_FRAME_ANALYTICS_AUDIT_FINISHED_ANALYSIS); - task.markAsCompleted(); - } - }), + updatedTask -> processManager.runJob(task, config, dataExtractorFactory), error -> { if (ExceptionsHelper.unwrapCause(error) instanceof ResourceNotFoundException) { // Task has stopped diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/AnalyticsProcessManager.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/AnalyticsProcessManager.java index 815d8478a527..ce981ad17a98 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/AnalyticsProcessManager.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/process/AnalyticsProcessManager.java @@ -8,6 +8,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; +import org.apache.lucene.util.SetOnce; import org.elasticsearch.action.admin.indices.refresh.RefreshAction; import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; import org.elasticsearch.action.search.SearchResponse; @@ -90,19 +91,19 @@ public AnalyticsProcessManager(Client client, this.trainedModelProvider = Objects.requireNonNull(trainedModelProvider); } - public void runJob(DataFrameAnalyticsTask task, DataFrameAnalyticsConfig config, DataFrameDataExtractorFactory dataExtractorFactory, - Consumer finishHandler) { + public void runJob(DataFrameAnalyticsTask task, DataFrameAnalyticsConfig config, DataFrameDataExtractorFactory dataExtractorFactory) { executorServiceForJob.execute(() -> { - ProcessContext processContext = new ProcessContext(config.getId()); + ProcessContext processContext = new ProcessContext(config); synchronized (processContextByAllocation) { if (task.isStopping()) { // The task was requested to stop before we created the process context - finishHandler.accept(null); + auditor.info(config.getId(), Messages.DATA_FRAME_ANALYTICS_AUDIT_FINISHED_ANALYSIS); + task.markAsCompleted(); return; } if (processContextByAllocation.putIfAbsent(task.getAllocationId(), processContext) != null) { - finishHandler.accept( - ExceptionsHelper.serverError("[" + config.getId() + "] Could not create process as one already exists")); + task.updateState( + DataFrameAnalyticsState.FAILED, "[" + config.getId() + "] Could not create process as one already exists"); return; } } @@ -113,13 +114,13 @@ public void runJob(DataFrameAnalyticsTask task, DataFrameAnalyticsConfig config, // Fetch existing model state (if any) BytesReference state = getModelState(config); - if (processContext.startProcess(dataExtractorFactory, config, task, state)) { - executorServiceForProcess.execute(() -> processResults(processContext)); - executorServiceForProcess.execute(() -> processData(task, config, processContext.dataExtractor, - processContext.process, processContext.resultProcessor, finishHandler, state)); + if (processContext.startProcess(dataExtractorFactory, task, state)) { + executorServiceForProcess.execute(() -> processContext.resultProcessor.get().process(processContext.process.get())); + executorServiceForProcess.execute(() -> processData(task, processContext, state)); } else { processContextByAllocation.remove(task.getAllocationId()); - finishHandler.accept(null); + auditor.info(config.getId(), Messages.DATA_FRAME_ANALYTICS_AUDIT_FINISHED_ANALYSIS); + task.markAsCompleted(); } }); } @@ -140,26 +141,18 @@ private BytesReference getModelState(DataFrameAnalyticsConfig config) { } } - private void processResults(ProcessContext processContext) { + private void processData(DataFrameAnalyticsTask task, ProcessContext processContext, BytesReference state) { + DataFrameAnalyticsConfig config = processContext.config; + DataFrameDataExtractor dataExtractor = processContext.dataExtractor.get(); + AnalyticsProcess process = processContext.process.get(); + AnalyticsResultProcessor resultProcessor = processContext.resultProcessor.get(); try { - processContext.resultProcessor.process(processContext.process); - } catch (Exception e) { - processContext.setFailureReason(e.getMessage()); - } - } - - private void processData(DataFrameAnalyticsTask task, DataFrameAnalyticsConfig config, DataFrameDataExtractor dataExtractor, - AnalyticsProcess process, AnalyticsResultProcessor resultProcessor, - Consumer finishHandler, BytesReference state) { - - try { - ProcessContext processContext = processContextByAllocation.get(task.getAllocationId()); writeHeaderRecord(dataExtractor, process); writeDataRows(dataExtractor, process, config.getAnalysis(), task.getProgressTracker()); process.writeEndOfDataMessage(); process.flushStream(); - restoreState(config, state, process, finishHandler); + restoreState(task, config, state, process); LOGGER.info("[{}] Waiting for result processor to complete", config.getId()); resultProcessor.awaitForCompletion(); @@ -168,26 +161,34 @@ private void processData(DataFrameAnalyticsTask task, DataFrameAnalyticsConfig c refreshDest(config); LOGGER.info("[{}] Result processor has completed", config.getId()); } catch (Exception e) { - if (task.isStopping() == false) { - String errorMsg = new ParameterizedMessage("[{}] Error while processing data [{}]", config.getId(), e.getMessage()) - .getFormattedMessage(); + if (task.isStopping()) { + // Errors during task stopping are expected but we still want to log them just in case. + String errorMsg = + new ParameterizedMessage( + "[{}] Error while processing data [{}]; task is stopping", config.getId(), e.getMessage()).getFormattedMessage(); + LOGGER.debug(errorMsg, e); + } else { + String errorMsg = + new ParameterizedMessage("[{}] Error while processing data [{}]", config.getId(), e.getMessage()).getFormattedMessage(); LOGGER.error(errorMsg, e); - processContextByAllocation.get(task.getAllocationId()).setFailureReason(errorMsg); + processContext.setFailureReason(errorMsg); } } finally { closeProcess(task); - ProcessContext processContext = processContextByAllocation.remove(task.getAllocationId()); + processContextByAllocation.remove(task.getAllocationId()); LOGGER.debug("Removed process context for task [{}]; [{}] processes still running", config.getId(), processContextByAllocation.size()); if (processContext.getFailureReason() == null) { // This results in marking the persistent task as complete LOGGER.info("[{}] Marking task completed", config.getId()); - finishHandler.accept(null); + auditor.info(config.getId(), Messages.DATA_FRAME_ANALYTICS_AUDIT_FINISHED_ANALYSIS); + task.markAsCompleted(); } else { LOGGER.error("[{}] Marking task failed; {}", config.getId(), processContext.getFailureReason()); task.updateState(DataFrameAnalyticsState.FAILED, processContext.getFailureReason()); + // Note: We are not marking the task as failed here as we want the user to be able to inspect the failure reason. } } } @@ -239,8 +240,8 @@ private void writeHeaderRecord(DataFrameDataExtractor dataExtractor, AnalyticsPr process.writeRecord(headerRecord); } - private void restoreState(DataFrameAnalyticsConfig config, @Nullable BytesReference state, AnalyticsProcess process, - Consumer failureHandler) { + private void restoreState(DataFrameAnalyticsTask task, DataFrameAnalyticsConfig config, @Nullable BytesReference state, + AnalyticsProcess process) { if (config.getAnalysis().persistsState() == false) { LOGGER.debug("[{}] Analysis does not support state", config.getId()); return; @@ -258,7 +259,7 @@ private void restoreState(DataFrameAnalyticsConfig config, @Nullable BytesRefere process.restoreState(state); } catch (Exception e) { LOGGER.error(new ParameterizedMessage("[{}] Failed to restore state", process.getConfig().jobId()), e); - failureHandler.accept(ExceptionsHelper.serverError("Failed to restore state", e)); + task.updateState(DataFrameAnalyticsState.FAILED, "Failed to restore state: " + e.getMessage()); } } @@ -293,9 +294,10 @@ private void closeProcess(DataFrameAnalyticsTask task) { ProcessContext processContext = processContextByAllocation.get(task.getAllocationId()); try { - processContext.process.close(); + processContext.process.get().close(); LOGGER.info("[{}] Closed process", configId); } catch (Exception e) { + LOGGER.error("[" + configId + "] Error closing data frame analyzer process", e); String errorMsg = new ParameterizedMessage( "[{}] Error closing data frame analyzer process [{}]", configId, e.getMessage()).getFormattedMessage(); processContext.setFailureReason(errorMsg); @@ -323,42 +325,41 @@ int getProcessContextCount() { class ProcessContext { - private final String id; - private volatile AnalyticsProcess process; - private volatile DataFrameDataExtractor dataExtractor; - private volatile AnalyticsResultProcessor resultProcessor; - private volatile boolean processKilled; - private volatile String failureReason; + private final DataFrameAnalyticsConfig config; + private final SetOnce> process = new SetOnce<>(); + private final SetOnce dataExtractor = new SetOnce<>(); + private final SetOnce resultProcessor = new SetOnce<>(); + private final SetOnce failureReason = new SetOnce<>(); - ProcessContext(String id) { - this.id = Objects.requireNonNull(id); + ProcessContext(DataFrameAnalyticsConfig config) { + this.config = Objects.requireNonNull(config); } - synchronized String getFailureReason() { - return failureReason; + String getFailureReason() { + return failureReason.get(); } - synchronized void setFailureReason(String failureReason) { - // Only set the new reason if there isn't one already as we want to keep the first reason - if (this.failureReason == null && failureReason != null) { - this.failureReason = failureReason; + void setFailureReason(String failureReason) { + if (failureReason == null) { + return; } + // Only set the new reason if there isn't one already as we want to keep the first reason (most likely the root cause). + this.failureReason.trySet(failureReason); } synchronized void stop() { - LOGGER.debug("[{}] Stopping process", id); - processKilled = true; - if (dataExtractor != null) { - dataExtractor.cancel(); + LOGGER.debug("[{}] Stopping process", config.getId()); + if (dataExtractor.get() != null) { + dataExtractor.get().cancel(); } - if (resultProcessor != null) { - resultProcessor.cancel(); + if (resultProcessor.get() != null) { + resultProcessor.get().cancel(); } - if (process != null) { + if (process.get() != null) { try { - process.kill(); + process.get().kill(); } catch (IOException e) { - LOGGER.error(new ParameterizedMessage("[{}] Failed to kill process", id), e); + LOGGER.error(new ParameterizedMessage("[{}] Failed to kill process", config.getId()), e); } } } @@ -366,16 +367,17 @@ synchronized void stop() { /** * @return {@code true} if the process was started or {@code false} if it was not because it was stopped in the meantime */ - synchronized boolean startProcess(DataFrameDataExtractorFactory dataExtractorFactory, DataFrameAnalyticsConfig config, - DataFrameAnalyticsTask task, @Nullable BytesReference state) { - if (processKilled) { + synchronized boolean startProcess(DataFrameDataExtractorFactory dataExtractorFactory, + DataFrameAnalyticsTask task, + @Nullable BytesReference state) { + if (task.isStopping()) { // The job was stopped before we started the process so no need to start it return false; } - dataExtractor = dataExtractorFactory.newExtractor(false); + dataExtractor.set(dataExtractorFactory.newExtractor(false)); AnalyticsProcessConfig analyticsProcessConfig = - createProcessConfig(config, dataExtractor, dataExtractorFactory.getExtractedFields()); + createProcessConfig(dataExtractor.get(), dataExtractorFactory.getExtractedFields()); LOGGER.trace("[{}] creating analytics process with config [{}]", config.getId(), Strings.toString(analyticsProcessConfig)); // If we have no rows, that means there is no data so no point in starting the native process // just finish the task @@ -383,19 +385,16 @@ synchronized boolean startProcess(DataFrameDataExtractorFactory dataExtractorFac LOGGER.info("[{}] no data found to analyze. Will not start analytics native process.", config.getId()); return false; } - process = createProcess(task, config, analyticsProcessConfig, state); - DataFrameRowsJoiner dataFrameRowsJoiner = new DataFrameRowsJoiner(config.getId(), client, - dataExtractorFactory.newExtractor(true)); - resultProcessor = new AnalyticsResultProcessor( - config, dataFrameRowsJoiner, task.getProgressTracker(), trainedModelProvider, auditor, dataExtractor.getFieldNames()); + process.set(createProcess(task, config, analyticsProcessConfig, state)); + resultProcessor.set(createResultProcessor(task, dataExtractorFactory)); return true; } - private AnalyticsProcessConfig createProcessConfig( - DataFrameAnalyticsConfig config, DataFrameDataExtractor dataExtractor, ExtractedFields extractedFields) { + private AnalyticsProcessConfig createProcessConfig(DataFrameDataExtractor dataExtractor, + ExtractedFields extractedFields) { DataFrameDataExtractor.DataSummary dataSummary = dataExtractor.collectDataSummary(); Set categoricalFields = dataExtractor.getCategoricalFields(config.getAnalysis()); - AnalyticsProcessConfig processConfig = new AnalyticsProcessConfig( + return new AnalyticsProcessConfig( config.getId(), dataSummary.rows, dataSummary.cols, @@ -405,7 +404,14 @@ private AnalyticsProcessConfig createProcessConfig( categoricalFields, config.getAnalysis(), extractedFields); - return processConfig; + } + + private AnalyticsResultProcessor createResultProcessor(DataFrameAnalyticsTask task, + DataFrameDataExtractorFactory dataExtractorFactory) { + DataFrameRowsJoiner dataFrameRowsJoiner = + new DataFrameRowsJoiner(config.getId(), client, dataExtractorFactory.newExtractor(true)); + return new AnalyticsResultProcessor( + config, dataFrameRowsJoiner, task.getProgressTracker(), trainedModelProvider, auditor, dataExtractor.get().getFieldNames()); } } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/AnalyticsProcessManagerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/AnalyticsProcessManagerTests.java index 4a0d5fa7f36d..915d6c29efb4 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/AnalyticsProcessManagerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/process/AnalyticsProcessManagerTests.java @@ -14,6 +14,7 @@ import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfigTests; +import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsState; import org.elasticsearch.xpack.ml.dataframe.DataFrameAnalyticsTask; import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractor; import org.elasticsearch.xpack.ml.dataframe.extractor.DataFrameDataExtractorFactory; @@ -22,12 +23,10 @@ import org.elasticsearch.xpack.ml.inference.persistence.TrainedModelProvider; import org.elasticsearch.xpack.ml.notifications.DataFrameAnalyticsAuditor; import org.junit.Before; -import org.mockito.ArgumentCaptor; import org.mockito.InOrder; import java.util.List; import java.util.concurrent.ExecutorService; -import java.util.function.Consumer; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; @@ -37,7 +36,6 @@ import static org.mockito.Mockito.inOrder; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyNoMoreInteractions; import static org.mockito.Mockito.when; @@ -66,8 +64,6 @@ public class AnalyticsProcessManagerTests extends ESTestCase { private DataFrameAnalyticsConfig dataFrameAnalyticsConfig; private DataFrameDataExtractorFactory dataExtractorFactory; private DataFrameDataExtractor dataExtractor; - private Consumer finishHandler; - private ArgumentCaptor exceptionCaptor; private AnalyticsProcessManager processManager; @SuppressWarnings("unchecked") @@ -97,9 +93,6 @@ public void setUpMocks() { dataExtractorFactory = mock(DataFrameDataExtractorFactory.class); when(dataExtractorFactory.newExtractor(anyBoolean())).thenReturn(dataExtractor); when(dataExtractorFactory.getExtractedFields()).thenReturn(mock(ExtractedFields.class)); - finishHandler = mock(Consumer.class); - - exceptionCaptor = ArgumentCaptor.forClass(Exception.class); processManager = new AnalyticsProcessManager( client, executorServiceForJob, executorServiceForProcess, processFactory, auditor, trainedModelProvider); @@ -108,54 +101,68 @@ public void setUpMocks() { public void testRunJob_TaskIsStopping() { when(task.isStopping()).thenReturn(true); - processManager.runJob(task, dataFrameAnalyticsConfig, dataExtractorFactory, finishHandler); + processManager.runJob(task, dataFrameAnalyticsConfig, dataExtractorFactory); assertThat(processManager.getProcessContextCount(), equalTo(0)); - verify(finishHandler).accept(null); - verifyNoMoreInteractions(finishHandler); + InOrder inOrder = inOrder(task); + inOrder.verify(task).isStopping(); + inOrder.verify(task).markAsCompleted(); + verifyNoMoreInteractions(task); } public void testRunJob_ProcessContextAlreadyExists() { - processManager.runJob(task, dataFrameAnalyticsConfig, dataExtractorFactory, finishHandler); + processManager.runJob(task, dataFrameAnalyticsConfig, dataExtractorFactory); assertThat(processManager.getProcessContextCount(), equalTo(1)); - processManager.runJob(task, dataFrameAnalyticsConfig, dataExtractorFactory, finishHandler); + processManager.runJob(task, dataFrameAnalyticsConfig, dataExtractorFactory); assertThat(processManager.getProcessContextCount(), equalTo(1)); - verify(finishHandler).accept(exceptionCaptor.capture()); - verifyNoMoreInteractions(finishHandler); - - Exception e = exceptionCaptor.getValue(); - assertThat(e.getMessage(), equalTo("[config-id] Could not create process as one already exists")); + InOrder inOrder = inOrder(task); + inOrder.verify(task).isStopping(); + inOrder.verify(task).getAllocationId(); + inOrder.verify(task).isStopping(); + inOrder.verify(task).getProgressTracker(); + inOrder.verify(task).isStopping(); + inOrder.verify(task).getAllocationId(); + inOrder.verify(task).updateState(DataFrameAnalyticsState.FAILED, "[config-id] Could not create process as one already exists"); + verifyNoMoreInteractions(task); } public void testRunJob_EmptyDataFrame() { when(dataExtractor.collectDataSummary()).thenReturn(new DataFrameDataExtractor.DataSummary(0, NUM_COLS)); - processManager.runJob(task, dataFrameAnalyticsConfig, dataExtractorFactory, finishHandler); + processManager.runJob(task, dataFrameAnalyticsConfig, dataExtractorFactory); assertThat(processManager.getProcessContextCount(), equalTo(0)); // Make sure the process context did not leak - InOrder inOrder = inOrder(dataExtractor, executorServiceForProcess, process, finishHandler); + InOrder inOrder = inOrder(dataExtractor, executorServiceForProcess, process, task); + inOrder.verify(task).isStopping(); + inOrder.verify(task).getAllocationId(); + inOrder.verify(task).isStopping(); inOrder.verify(dataExtractor).collectDataSummary(); inOrder.verify(dataExtractor).getCategoricalFields(dataFrameAnalyticsConfig.getAnalysis()); - inOrder.verify(finishHandler).accept(null); - verifyNoMoreInteractions(dataExtractor, executorServiceForProcess, process, finishHandler); + inOrder.verify(task).getAllocationId(); + inOrder.verify(task).markAsCompleted(); + verifyNoMoreInteractions(dataExtractor, executorServiceForProcess, process, task); } public void testRunJob_Ok() { - processManager.runJob(task, dataFrameAnalyticsConfig, dataExtractorFactory, finishHandler); + processManager.runJob(task, dataFrameAnalyticsConfig, dataExtractorFactory); assertThat(processManager.getProcessContextCount(), equalTo(1)); - InOrder inOrder = inOrder(dataExtractor, executorServiceForProcess, process, finishHandler); + InOrder inOrder = inOrder(dataExtractor, executorServiceForProcess, process, task); + inOrder.verify(task).isStopping(); + inOrder.verify(task).getAllocationId(); + inOrder.verify(task).isStopping(); inOrder.verify(dataExtractor).collectDataSummary(); inOrder.verify(dataExtractor).getCategoricalFields(dataFrameAnalyticsConfig.getAnalysis()); inOrder.verify(process).isProcessAlive(); + inOrder.verify(task).getProgressTracker(); inOrder.verify(dataExtractor).getFieldNames(); inOrder.verify(executorServiceForProcess, times(2)).execute(any()); // 'processData' and 'processResults' threads - verifyNoMoreInteractions(dataExtractor, executorServiceForProcess, process, finishHandler); + verifyNoMoreInteractions(dataExtractor, executorServiceForProcess, process, task); } public void testProcessContext_GetSetFailureReason() { - AnalyticsProcessManager.ProcessContext processContext = processManager.new ProcessContext(CONFIG_ID); + AnalyticsProcessManager.ProcessContext processContext = processManager.new ProcessContext(dataFrameAnalyticsConfig); assertThat(processContext.getFailureReason(), is(nullValue())); processContext.setFailureReason("reason1"); @@ -167,50 +174,57 @@ public void testProcessContext_GetSetFailureReason() { processContext.setFailureReason("reason2"); assertThat(processContext.getFailureReason(), equalTo("reason1")); - verifyNoMoreInteractions(dataExtractor, process, finishHandler); + verifyNoMoreInteractions(dataExtractor, process, task); } - public void testProcessContext_StartProcess_ProcessAlreadyKilled() { - AnalyticsProcessManager.ProcessContext processContext = processManager.new ProcessContext(CONFIG_ID); + public void testProcessContext_StartProcess_TaskAlreadyStopped() { + when(task.isStopping()).thenReturn(true); + + AnalyticsProcessManager.ProcessContext processContext = processManager.new ProcessContext(dataFrameAnalyticsConfig); processContext.stop(); - assertThat(processContext.startProcess(dataExtractorFactory, dataFrameAnalyticsConfig, task, null), is(false)); + assertThat(processContext.startProcess(dataExtractorFactory, task, null), is(false)); - verifyNoMoreInteractions(dataExtractor, process, finishHandler); + InOrder inOrder = inOrder(dataExtractor, process, task); + inOrder.verify(task).isStopping(); + verifyNoMoreInteractions(dataExtractor, process, task); } public void testProcessContext_StartProcess_EmptyDataFrame() { when(dataExtractor.collectDataSummary()).thenReturn(new DataFrameDataExtractor.DataSummary(0, NUM_COLS)); - AnalyticsProcessManager.ProcessContext processContext = processManager.new ProcessContext(CONFIG_ID); - assertThat(processContext.startProcess(dataExtractorFactory, dataFrameAnalyticsConfig, task, null), is(false)); + AnalyticsProcessManager.ProcessContext processContext = processManager.new ProcessContext(dataFrameAnalyticsConfig); + assertThat(processContext.startProcess(dataExtractorFactory, task, null), is(false)); - InOrder inOrder = inOrder(dataExtractor, process, finishHandler); + InOrder inOrder = inOrder(dataExtractor, process, task); + inOrder.verify(task).isStopping(); inOrder.verify(dataExtractor).collectDataSummary(); inOrder.verify(dataExtractor).getCategoricalFields(dataFrameAnalyticsConfig.getAnalysis()); - verifyNoMoreInteractions(dataExtractor, process, finishHandler); + verifyNoMoreInteractions(dataExtractor, process, task); } public void testProcessContext_StartAndStop() throws Exception { - AnalyticsProcessManager.ProcessContext processContext = processManager.new ProcessContext(CONFIG_ID); - assertThat(processContext.startProcess(dataExtractorFactory, dataFrameAnalyticsConfig, task, null), is(true)); + AnalyticsProcessManager.ProcessContext processContext = processManager.new ProcessContext(dataFrameAnalyticsConfig); + assertThat(processContext.startProcess(dataExtractorFactory, task, null), is(true)); processContext.stop(); - InOrder inOrder = inOrder(dataExtractor, process, finishHandler); + InOrder inOrder = inOrder(dataExtractor, process, task); // startProcess + inOrder.verify(task).isStopping(); inOrder.verify(dataExtractor).collectDataSummary(); inOrder.verify(dataExtractor).getCategoricalFields(dataFrameAnalyticsConfig.getAnalysis()); inOrder.verify(process).isProcessAlive(); + inOrder.verify(task).getProgressTracker(); inOrder.verify(dataExtractor).getFieldNames(); // stop inOrder.verify(dataExtractor).cancel(); inOrder.verify(process).kill(); - verifyNoMoreInteractions(dataExtractor, process, finishHandler); + verifyNoMoreInteractions(dataExtractor, process, task); } public void testProcessContext_Stop() { - AnalyticsProcessManager.ProcessContext processContext = processManager.new ProcessContext(CONFIG_ID); + AnalyticsProcessManager.ProcessContext processContext = processManager.new ProcessContext(dataFrameAnalyticsConfig); processContext.stop(); - verifyNoMoreInteractions(dataExtractor, process, finishHandler); + verifyNoMoreInteractions(dataExtractor, process, task); } } From 6790cc2179fd75a84597e166b2969ba69bb1baa7 Mon Sep 17 00:00:00 2001 From: Dimitris Athanasiou Date: Wed, 11 Dec 2019 10:00:41 +0200 Subject: [PATCH 17/22] [ml] Fix randomize_seed versions and unmute BWC tests (#50027) ... now that #49990 has been backported. Relates #49990 --- .../xpack/core/ml/dataframe/analyses/Classification.java | 6 +++--- .../xpack/core/ml/dataframe/analyses/Regression.java | 6 +++--- .../test/mixed_cluster/90_ml_data_frame_analytics_crud.yml | 6 +----- .../test/old_cluster/90_ml_data_frame_analytics_crud.yml | 3 --- .../upgraded_cluster/90_ml_data_frame_analytics_crud.yml | 6 +----- 5 files changed, 8 insertions(+), 19 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Classification.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Classification.java index cd96b815fc11..ed4cb1fe18f8 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Classification.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Classification.java @@ -119,7 +119,7 @@ public Classification(StreamInput in) throws IOException { predictionFieldName = in.readOptionalString(); numTopClasses = in.readOptionalVInt(); trainingPercent = in.readDouble(); - if (in.getVersion().onOrAfter(Version.CURRENT)) { + if (in.getVersion().onOrAfter(Version.V_7_6_0)) { randomizeSeed = in.readOptionalLong(); } else { randomizeSeed = Randomness.get().nextLong(); @@ -163,7 +163,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeOptionalString(predictionFieldName); out.writeOptionalVInt(numTopClasses); out.writeDouble(trainingPercent); - if (out.getVersion().onOrAfter(Version.CURRENT)) { + if (out.getVersion().onOrAfter(Version.V_7_6_0)) { out.writeOptionalLong(randomizeSeed); } } @@ -180,7 +180,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(PREDICTION_FIELD_NAME.getPreferredName(), predictionFieldName); } builder.field(TRAINING_PERCENT.getPreferredName(), trainingPercent); - if (version.onOrAfter(Version.CURRENT)) { + if (version.onOrAfter(Version.V_7_6_0)) { builder.field(RANDOMIZE_SEED.getPreferredName(), randomizeSeed); } builder.endObject(); diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Regression.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Regression.java index dd8f6a91272c..8fffcd0f573d 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Regression.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/dataframe/analyses/Regression.java @@ -91,7 +91,7 @@ public Regression(StreamInput in) throws IOException { boostedTreeParams = new BoostedTreeParams(in); predictionFieldName = in.readOptionalString(); trainingPercent = in.readDouble(); - if (in.getVersion().onOrAfter(Version.CURRENT)) { + if (in.getVersion().onOrAfter(Version.V_7_6_0)) { randomizeSeed = in.readOptionalLong(); } else { randomizeSeed = Randomness.get().nextLong(); @@ -130,7 +130,7 @@ public void writeTo(StreamOutput out) throws IOException { boostedTreeParams.writeTo(out); out.writeOptionalString(predictionFieldName); out.writeDouble(trainingPercent); - if (out.getVersion().onOrAfter(Version.CURRENT)) { + if (out.getVersion().onOrAfter(Version.V_7_6_0)) { out.writeOptionalLong(randomizeSeed); } } @@ -146,7 +146,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(PREDICTION_FIELD_NAME.getPreferredName(), predictionFieldName); } builder.field(TRAINING_PERCENT.getPreferredName(), trainingPercent); - if (version.onOrAfter(Version.CURRENT)) { + if (version.onOrAfter(Version.V_7_6_0)) { builder.field(RANDOMIZE_SEED.getPreferredName(), randomizeSeed); } builder.endObject(); diff --git a/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/mixed_cluster/90_ml_data_frame_analytics_crud.yml b/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/mixed_cluster/90_ml_data_frame_analytics_crud.yml index 808214716071..7780691b2bbb 100644 --- a/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/mixed_cluster/90_ml_data_frame_analytics_crud.yml +++ b/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/mixed_cluster/90_ml_data_frame_analytics_crud.yml @@ -1,8 +1,3 @@ -setup: - - skip: - version: "all" - reason: "Until backport of https://github.com/elastic/elasticsearch/issues/49690" - --- "Get old outlier_detection job": @@ -65,6 +60,7 @@ setup: - match: { data_frame_analytics.0.dest.index: "old_cluster_regression_job_results" } - match: { data_frame_analytics.0.analysis.regression.dependent_variable: "foo" } - match: { data_frame_analytics.0.analysis.regression.training_percent: 100.0 } + - is_true: data_frame_analytics.0.analysis.regression.randomize_seed --- "Get old regression job stats": diff --git a/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/old_cluster/90_ml_data_frame_analytics_crud.yml b/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/old_cluster/90_ml_data_frame_analytics_crud.yml index ba2cf4041167..fe160bba15f2 100644 --- a/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/old_cluster/90_ml_data_frame_analytics_crud.yml +++ b/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/old_cluster/90_ml_data_frame_analytics_crud.yml @@ -1,7 +1,4 @@ setup: - - skip: - version: "all" - reason: "Until backport of https://github.com/elastic/elasticsearch/issues/49690" - do: index: diff --git a/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/upgraded_cluster/90_ml_data_frame_analytics_crud.yml b/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/upgraded_cluster/90_ml_data_frame_analytics_crud.yml index 462a1fd76c01..14438883f0da 100644 --- a/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/upgraded_cluster/90_ml_data_frame_analytics_crud.yml +++ b/x-pack/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/upgraded_cluster/90_ml_data_frame_analytics_crud.yml @@ -1,8 +1,3 @@ -setup: - - skip: - version: "all" - reason: "Until backport of https://github.com/elastic/elasticsearch/issues/49690" - --- "Get old cluster outlier_detection job": @@ -45,6 +40,7 @@ setup: - match: { data_frame_analytics.0.dest.index: "old_cluster_regression_job_results" } - match: { data_frame_analytics.0.analysis.regression.dependent_variable: "foo" } - match: { data_frame_analytics.0.analysis.regression.training_percent: 100.0 } + - is_true: data_frame_analytics.0.analysis.regression.randomize_seed --- "Get old cluster regression job stats": From cdcf1326783df7c909ef2df54bdfd0fdf04b4189 Mon Sep 17 00:00:00 2001 From: Dimitris Athanasiou Date: Wed, 11 Dec 2019 15:14:33 +0200 Subject: [PATCH 18/22] [ML] Avoid classification integ test training on single class (#50072) The `ClassificationIT.testTwoJobsWithSameRandomizeSeedUseSameTrainingSet` test was previously set up to just have 10 rows. With `training_percent` of 50%, only 5 rows will be used for training. There is a good chance that all 5 rows will be of one class which results to failure. This commit increases the rows to 100. Now 50 rows should be used for training and the chance of failure should be very small. --- .../elasticsearch/xpack/ml/integration/ClassificationIT.java | 5 ++++- .../integration/MlNativeDataFrameAnalyticsIntegTestCase.java | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/ClassificationIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/ClassificationIT.java index e7c0ccd0e055..0e49043fcfbe 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/ClassificationIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/ClassificationIT.java @@ -274,7 +274,10 @@ public void testDependentVariableCardinalityTooHighButWithQueryMakesItWithinRang public void testTwoJobsWithSameRandomizeSeedUseSameTrainingSet() throws Exception { String sourceIndex = "classification_two_jobs_with_same_randomize_seed_source"; String dependentVariable = KEYWORD_FIELD; - indexData(sourceIndex, 10, 0, dependentVariable); + + // We use 100 rows as we can't set this too low. If too low it is possible + // we only train with rows of one of the two classes which leads to a failure. + indexData(sourceIndex, 100, 0, dependentVariable); String firstJobId = "classification_two_jobs_with_same_randomize_seed_1"; String firstJobDestIndex = firstJobId + "_dest"; diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/MlNativeDataFrameAnalyticsIntegTestCase.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/MlNativeDataFrameAnalyticsIntegTestCase.java index 99223247d730..8ff82c28b36e 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/MlNativeDataFrameAnalyticsIntegTestCase.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/test/java/org/elasticsearch/xpack/ml/integration/MlNativeDataFrameAnalyticsIntegTestCase.java @@ -259,7 +259,7 @@ private static List fetchAllAuditMessages(String dataFrameAnalyticsId) { protected static Set getTrainingRowsIds(String index) { Set trainingRowsIds = new HashSet<>(); - SearchResponse hits = client().prepareSearch(index).get(); + SearchResponse hits = client().prepareSearch(index).setSize(10000).get(); for (SearchHit hit : hits.getHits()) { Map sourceAsMap = hit.getSourceAsMap(); assertThat(sourceAsMap.containsKey("ml"), is(true)); From 64e1a774fcbf5061d981b3e8b5f8dda00fa0c93b Mon Sep 17 00:00:00 2001 From: Przemko Robakowski Date: Wed, 11 Dec 2019 14:52:04 +0100 Subject: [PATCH 19/22] CSV ingest processor (#49509) * CSV Processor for Ingest This change adds new ingest processor that breaks line from CSV file into separate fields. By default it conforms to RFC 4180 but can be tweaked. Closes #49113 --- docs/reference/ingest/ingest-node.asciidoc | 1 + docs/reference/ingest/processors/csv.asciidoc | 33 +++ .../ingest/common/CsvParser.java | 206 ++++++++++++++++ .../ingest/common/CsvProcessor.java | 108 +++++++++ .../ingest/common/IngestCommonPlugin.java | 3 +- .../ingest/common/CsvProcessorTests.java | 221 ++++++++++++++++++ .../rest-api-spec/test/ingest/250_csv.yml | 164 +++++++++++++ 7 files changed, 735 insertions(+), 1 deletion(-) create mode 100644 docs/reference/ingest/processors/csv.asciidoc create mode 100644 modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CsvParser.java create mode 100644 modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CsvProcessor.java create mode 100644 modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/CsvProcessorTests.java create mode 100644 modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/250_csv.yml diff --git a/docs/reference/ingest/ingest-node.asciidoc b/docs/reference/ingest/ingest-node.asciidoc index 0da0fd19e16e..596bda67d3ed 100644 --- a/docs/reference/ingest/ingest-node.asciidoc +++ b/docs/reference/ingest/ingest-node.asciidoc @@ -825,6 +825,7 @@ include::processors/append.asciidoc[] include::processors/bytes.asciidoc[] include::processors/circle.asciidoc[] include::processors/convert.asciidoc[] +include::processors/csv.asciidoc[] include::processors/date.asciidoc[] include::processors/date-index-name.asciidoc[] include::processors/dissect.asciidoc[] diff --git a/docs/reference/ingest/processors/csv.asciidoc b/docs/reference/ingest/processors/csv.asciidoc new file mode 100644 index 000000000000..c589c9eb4361 --- /dev/null +++ b/docs/reference/ingest/processors/csv.asciidoc @@ -0,0 +1,33 @@ +[[csv-processor]] +=== CSV Processor +Extracts fields from CSV line out of a single text field within a document. Any empty field in CSV will be skipped. + +[[csv-options]] +.CSV Options +[options="header"] +|====== +| Name | Required | Default | Description +| `field` | yes | - | The field to extract data from +| `target_fields` | yes | - | The array of fields to assign extracted values to +| `separator` | no | , | Separator used in CSV, has to be single character string +| `quote` | no | " | Quote used in CSV, has to be single character string +| `ignore_missing` | no | `true` | If `true` and `field` does not exist, the processor quietly exits without modifying the document +| `trim` | no | `false` | Trim whitespaces in unquoted fields +include::common-options.asciidoc[] +|====== + +[source,js] +-------------------------------------------------- +{ + "csv": { + "field": "my_field", + "target_fields": ["field1, field2"], + } +} +-------------------------------------------------- +// NOTCONSOLE + +If the `trim` option is enabled then any whitespace in the beginning and in the end of each unquoted field will be trimmed. +For example with configuration above, a value of `A, B` will result in field `field2` +having value `{nbsp}B` (with space at the beginning). If `trim` is enabled `A, B` will result in field `field2` +having value `B` (no whitespace). Quoted fields will be left untouched. diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CsvParser.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CsvParser.java new file mode 100644 index 000000000000..077d12684e9a --- /dev/null +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CsvParser.java @@ -0,0 +1,206 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.ingest.common; + +import org.elasticsearch.ingest.IngestDocument; + +final class CsvParser { + + private static final char LF = '\n'; + private static final char CR = '\r'; + private static final char SPACE = ' '; + private static final char TAB = '\t'; + + private enum State { + START, UNQUOTED, QUOTED, QUOTED_END + } + + private final char quote; + private final char separator; + private final boolean trim; + private final String[] headers; + private final IngestDocument ingestDocument; + private final StringBuilder builder = new StringBuilder(); + private State state = State.START; + private String line; + private int currentHeader = 0; + private int startIndex = 0; + private int length; + private int currentIndex; + + CsvParser(IngestDocument ingestDocument, char quote, char separator, boolean trim, String[] headers) { + this.ingestDocument = ingestDocument; + this.quote = quote; + this.separator = separator; + this.trim = trim; + this.headers = headers; + } + + void process(String line) { + this.line = line; + length = line.length(); + for (currentIndex = 0; currentIndex < length; currentIndex++) { + switch (state) { + case START: + if (processStart()) { + return; + } + break; + case UNQUOTED: + if (processUnquoted()) { + return; + } + break; + case QUOTED: + processQuoted(); + break; + case QUOTED_END: + if (processQuotedEnd()) { + return; + } + break; + } + } + + //we've reached end of string, we need to handle last field + switch (state) { + case UNQUOTED: + setField(length); + break; + case QUOTED_END: + setField(length - 1); + break; + case QUOTED: + throw new IllegalArgumentException("Unmatched quote"); + } + } + + private boolean processStart() { + for (; currentIndex < length; currentIndex++) { + char c = currentChar(); + if (c == quote) { + state = State.QUOTED; + builder.setLength(0); + startIndex = currentIndex + 1; + return false; + } else if (c == separator) { + startIndex++; + if (nextHeader()) { + return true; + } + } else if (isWhitespace(c)) { + if (trim) { + startIndex++; + } + } else { + state = State.UNQUOTED; + builder.setLength(0); + return false; + } + } + return true; + } + + private boolean processUnquoted() { + int spaceCount = 0; + for (; currentIndex < length; currentIndex++) { + char c = currentChar(); + if (c == LF || c == CR || c == quote) { + throw new IllegalArgumentException("Illegal character inside unquoted field at " + currentIndex); + } else if (trim && isWhitespace(c)) { + spaceCount++; + } else if (c == separator) { + state = State.START; + if (setField(currentIndex - spaceCount)) { + return true; + } + startIndex = currentIndex + 1; + return false; + } else { + spaceCount = 0; + } + } + return false; + } + + private void processQuoted() { + for (; currentIndex < length; currentIndex++) { + if (currentChar() == quote) { + state = State.QUOTED_END; + break; + } + } + } + + private boolean processQuotedEnd() { + char c = currentChar(); + if (c == quote) { + builder.append(line, startIndex, currentIndex - 1).append(quote); + startIndex = currentIndex + 1; + state = State.QUOTED; + return false; + } + boolean shouldSetField = true; + for (; currentIndex < length; currentIndex++) { + c = currentChar(); + if (isWhitespace(c)) { + if (shouldSetField) { + if (setField(currentIndex - 1)) { + return true; + } + shouldSetField = false; + } + } else if (c == separator) { + if (shouldSetField && setField(currentIndex - 1)) { + return true; + } + startIndex = currentIndex + 1; + state = State.START; + return false; + } else { + throw new IllegalArgumentException("character '" + c + "' after quoted field at " + currentIndex); + } + } + return true; + } + + private char currentChar() { + return line.charAt(currentIndex); + } + + private boolean isWhitespace(char c) { + return c == SPACE || c == TAB; + } + + private boolean setField(int endIndex) { + if (builder.length() == 0) { + ingestDocument.setFieldValue(headers[currentHeader], line.substring(startIndex, endIndex)); + } else { + builder.append(line, startIndex, endIndex); + ingestDocument.setFieldValue(headers[currentHeader], builder.toString()); + } + return nextHeader(); + } + + private boolean nextHeader() { + currentHeader++; + return currentHeader == headers.length; + } +} diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CsvProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CsvProcessor.java new file mode 100644 index 000000000000..66d10cc239e4 --- /dev/null +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/CsvProcessor.java @@ -0,0 +1,108 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.ingest.common; + +import org.elasticsearch.ingest.AbstractProcessor; +import org.elasticsearch.ingest.ConfigurationUtils; +import org.elasticsearch.ingest.IngestDocument; + +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException; + +/** + * A processor that breaks line from CSV file into separate fields. + * If there's more fields requested than there is in the CSV, extra field will not be present in the document after processing. + * In the same way this processor will skip any field that is empty in CSV. + * + * By default it uses rules according to RCF 4180 with one exception: whitespaces are + * allowed before or after quoted field. Processor can be tweaked with following parameters: + * + * quote: set custom quote character (defaults to ") + * separator: set custom separator (defaults to ,) + * trim: trim leading and trailing whitespaces in unquoted fields + */ +public final class CsvProcessor extends AbstractProcessor { + + public static final String TYPE = "csv"; + + private final String field; + private final String[] headers; + private final boolean trim; + private final char quote; + private final char separator; + private final boolean ignoreMissing; + + CsvProcessor(String tag, String field, String[] headers, boolean trim, char separator, char quote, boolean ignoreMissing) { + super(tag); + this.field = field; + this.headers = headers; + this.trim = trim; + this.quote = quote; + this.separator = separator; + this.ignoreMissing = ignoreMissing; + } + + @Override + public IngestDocument execute(IngestDocument ingestDocument) { + if (headers.length == 0) { + return ingestDocument; + } + + String line = ingestDocument.getFieldValue(field, String.class, ignoreMissing); + if (line == null && ignoreMissing == false) { + return ingestDocument; + } else if (line == null) { + throw new IllegalArgumentException("field [" + field + "] is null, cannot process it."); + } + new CsvParser(ingestDocument, quote, separator, trim, headers).process(line); + return ingestDocument; + } + + @Override + public String getType() { + return TYPE; + } + + public static final class Factory implements org.elasticsearch.ingest.Processor.Factory { + @Override + public CsvProcessor create(Map registry, String processorTag, + Map config) { + String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field"); + String quote = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "quote", "\""); + if (quote.length() != 1) { + throw newConfigurationException(TYPE, processorTag, "quote", "quote has to be single character like \" or '"); + } + String separator = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "separator", ","); + if (separator.length() != 1) { + throw newConfigurationException(TYPE, processorTag, "separator", "separator has to be single character like , or ;"); + } + boolean trim = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "trim", false); + boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false); + List targetFields = ConfigurationUtils.readList(TYPE, processorTag, config, "target_fields"); + if (targetFields.isEmpty()) { + throw newConfigurationException(TYPE, processorTag, "target_fields", "target fields list can't be empty"); + } + return new CsvProcessor(processorTag, field, targetFields.toArray(String[]::new), trim, separator.charAt(0), quote.charAt(0), + ignoreMissing); + } + } +} diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java index 4f99c850e5bd..b37e5d13e460 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/IngestCommonPlugin.java @@ -88,7 +88,8 @@ public Map getProcessors(Processor.Parameters paramet entry(PipelineProcessor.TYPE, new PipelineProcessor.Factory(parameters.ingestService)), entry(DissectProcessor.TYPE, new DissectProcessor.Factory()), entry(DropProcessor.TYPE, new DropProcessor.Factory()), - entry(HtmlStripProcessor.TYPE, new HtmlStripProcessor.Factory())); + entry(HtmlStripProcessor.TYPE, new HtmlStripProcessor.Factory()), + entry(CsvProcessor.TYPE, new CsvProcessor.Factory())); } @Override diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/CsvProcessorTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/CsvProcessorTests.java new file mode 100644 index 000000000000..87da73cce129 --- /dev/null +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/CsvProcessorTests.java @@ -0,0 +1,221 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.ingest.common; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import org.elasticsearch.ingest.IngestDocument; +import org.elasticsearch.ingest.RandomDocumentPicks; +import org.elasticsearch.test.ESTestCase; +import org.junit.Before; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.stream.Collectors; + +public class CsvProcessorTests extends ESTestCase { + + private static final Character[] SEPARATORS = new Character[]{',', ';', '|', '.'}; + private final String quote; + private char separator; + + + public CsvProcessorTests(@Name("quote") String quote) { + this.quote = quote; + } + + @ParametersFactory + public static Iterable parameters() { + return Arrays.asList(new Object[]{"'"}, new Object[]{"\""}, new Object[]{""}); + } + + @Before + public void setup() { + separator = randomFrom(SEPARATORS); + } + + public void testExactNumberOfFields() throws Exception { + int numItems = randomIntBetween(2, 10); + Map items = new LinkedHashMap<>(); + for (int i = 0; i < numItems; i++) { + items.put(randomAlphaOfLengthBetween(5, 10), randomAlphaOfLengthBetween(5, 10)); + } + String[] headers = items.keySet().toArray(new String[numItems]); + String csv = items.values().stream().map(v -> quote + v + quote).collect(Collectors.joining(separator + "")); + + IngestDocument ingestDocument = processDocument(headers, csv); + + items.forEach((key, value) -> assertEquals(value, ingestDocument.getFieldValue(key, String.class))); + } + + public void testLessFieldsThanHeaders() throws Exception { + int numItems = randomIntBetween(4, 10); + Map items = new LinkedHashMap<>(); + for (int i = 0; i < numItems; i++) { + items.put(randomAlphaOfLengthBetween(5, 10), randomAlphaOfLengthBetween(5, 10)); + } + String[] headers = items.keySet().toArray(new String[numItems]); + String csv = items.values().stream().map(v -> quote + v + quote).limit(3).collect(Collectors.joining(separator + "")); + + IngestDocument ingestDocument = processDocument(headers, csv); + + items.keySet().stream().skip(3).forEach(key -> assertFalse(ingestDocument.hasField(key))); + items.entrySet().stream().limit(3).forEach(e -> assertEquals(e.getValue(), ingestDocument.getFieldValue(e.getKey(), String.class))); + } + + public void testLessHeadersThanFields() throws Exception { + int numItems = randomIntBetween(5, 10); + Map items = new LinkedHashMap<>(); + for (int i = 0; i < numItems; i++) { + items.put(randomAlphaOfLengthBetween(5, 10), randomAlphaOfLengthBetween(5, 10)); + } + String[] headers = items.keySet().stream().limit(3).toArray(String[]::new); + String csv = items.values().stream().map(v -> quote + v + quote).collect(Collectors.joining(separator + "")); + + IngestDocument ingestDocument = processDocument(headers, csv); + + items.entrySet().stream().limit(3).forEach(e -> assertEquals(e.getValue(), ingestDocument.getFieldValue(e.getKey(), String.class))); + } + + public void testSingleField() throws Exception { + String[] headers = new String[]{randomAlphaOfLengthBetween(5, 10)}; + String value = randomAlphaOfLengthBetween(5, 10); + String csv = quote + value + quote; + + IngestDocument ingestDocument = processDocument(headers, csv); + + assertEquals(value, ingestDocument.getFieldValue(headers[0], String.class)); + } + + public void testEscapedQuote() throws Exception { + int numItems = randomIntBetween(2, 10); + Map items = new LinkedHashMap<>(); + for (int i = 0; i < numItems; i++) { + items.put(randomAlphaOfLengthBetween(5, 10), randomAlphaOfLengthBetween(5, 10) + quote + quote + randomAlphaOfLengthBetween(5 + , 10) + quote + quote); + } + String[] headers = items.keySet().toArray(new String[numItems]); + String csv = items.values().stream().map(v -> quote + v + quote).collect(Collectors.joining(separator + "")); + + IngestDocument ingestDocument = processDocument(headers, csv); + + items.forEach((key, value) -> assertEquals(value.replace(quote + quote, quote), ingestDocument.getFieldValue(key, String.class))); + } + + public void testQuotedStrings() throws Exception { + assumeFalse("quote needed", quote.isEmpty()); + int numItems = randomIntBetween(2, 10); + Map items = new LinkedHashMap<>(); + for (int i = 0; i < numItems; i++) { + items.put(randomAlphaOfLengthBetween(5, 10), + separator + randomAlphaOfLengthBetween(5, 10) + separator + "\n\r" + randomAlphaOfLengthBetween(5, 10)); + } + String[] headers = items.keySet().toArray(new String[numItems]); + String csv = items.values().stream().map(v -> quote + v + quote).collect(Collectors.joining(separator + "")); + + IngestDocument ingestDocument = processDocument(headers, csv); + + items.forEach((key, value) -> assertEquals(value.replace(quote + quote, quote), ingestDocument.getFieldValue(key, + String.class))); + } + + public void testEmptyFields() throws Exception { + int numItems = randomIntBetween(5, 10); + Map items = new LinkedHashMap<>(); + for (int i = 0; i < numItems; i++) { + items.put(randomAlphaOfLengthBetween(5, 10), randomAlphaOfLengthBetween(5, 10)); + } + String[] headers = items.keySet().toArray(new String[numItems]); + String csv = + items.values().stream().map(v -> quote + v + quote).limit(numItems - 1).skip(3).collect(Collectors.joining(separator + "")); + + IngestDocument ingestDocument = processDocument(headers, + "" + separator + "" + separator + "" + separator + csv + separator + separator + + "abc"); + + items.keySet().stream().limit(3).forEach(key -> assertFalse(ingestDocument.hasField(key))); + items.entrySet().stream().limit(numItems - 1).skip(3).forEach(e -> assertEquals(e.getValue(), + ingestDocument.getFieldValue(e.getKey(), String.class))); + items.keySet().stream().skip(numItems - 1).forEach(key -> assertFalse(ingestDocument.hasField(key))); + } + + public void testWrongStings() throws Exception { + assumeTrue("single run only", quote.isEmpty()); + expectThrows(IllegalArgumentException.class, () -> processDocument(new String[]{"a"}, "abc\"abc")); + expectThrows(IllegalArgumentException.class, () -> processDocument(new String[]{"a"}, "\"abc\"asd")); + expectThrows(IllegalArgumentException.class, () -> processDocument(new String[]{"a"}, "\"abcasd")); + expectThrows(IllegalArgumentException.class, () -> processDocument(new String[]{"a"}, "abc\nabc")); + expectThrows(IllegalArgumentException.class, () -> processDocument(new String[]{"a"}, "abc\rabc")); + } + + public void testQuotedWhitespaces() throws Exception { + assumeFalse("quote needed", quote.isEmpty()); + IngestDocument document = processDocument(new String[]{"a", "b", "c", "d"}, + " abc " + separator + " def" + separator + "ghi " + separator + " " + quote + " ooo " + quote); + assertEquals("abc", document.getFieldValue("a", String.class)); + assertEquals("def", document.getFieldValue("b", String.class)); + assertEquals("ghi", document.getFieldValue("c", String.class)); + assertEquals(" ooo ", document.getFieldValue("d", String.class)); + } + + public void testUntrimmed() throws Exception { + assumeFalse("quote needed", quote.isEmpty()); + IngestDocument document = processDocument(new String[]{"a", "b", "c", "d", "e", "f"}, + " abc " + separator + " def" + separator + "ghi " + separator + " " + + quote + "ooo" + quote + " " + separator + " " + quote + "jjj" + quote + " ", false); + assertEquals(" abc ", document.getFieldValue("a", String.class)); + assertEquals(" def", document.getFieldValue("b", String.class)); + assertEquals("ghi ", document.getFieldValue("c", String.class)); + assertEquals("ooo", document.getFieldValue("d", String.class)); + assertEquals("jjj", document.getFieldValue("e", String.class)); + assertFalse(document.hasField("f")); + } + + public void testEmptyHeaders() throws Exception { + assumeTrue("single run only", quote.isEmpty()); + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); + String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, "abc,abc"); + HashMap metadata = new HashMap<>(ingestDocument.getSourceAndMetadata()); + + CsvProcessor processor = new CsvProcessor(randomAlphaOfLength(5), fieldName, new String[0], false, ',', '"', false); + + processor.execute(ingestDocument); + + assertEquals(metadata, ingestDocument.getSourceAndMetadata()); + } + + private IngestDocument processDocument(String[] headers, String csv) throws Exception { + return processDocument(headers, csv, true); + } + + private IngestDocument processDocument(String[] headers, String csv, boolean trim) throws Exception { + IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); + + String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, csv); + char quoteChar = quote.isEmpty() ? '"' : quote.charAt(0); + CsvProcessor processor = new CsvProcessor(randomAlphaOfLength(5), fieldName, headers, trim, separator, quoteChar, false); + + processor.execute(ingestDocument); + + return ingestDocument; + } +} diff --git a/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/250_csv.yml b/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/250_csv.yml new file mode 100644 index 000000000000..a38805fb1fec --- /dev/null +++ b/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/250_csv.yml @@ -0,0 +1,164 @@ +--- +teardown: + - do: + ingest.delete_pipeline: + id: "my_pipeline" + ignore: 404 + +--- +"Test CSV Processor defaults": + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "csv": { + "field": "value", + "target_fields":["a","b","c"] + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "my_pipeline" + body: > + { + "value": "aa,bb,cc" + } + + - do: + get: + index: test + id: 1 + - match: { _source.a: "aa" } + - match: { _source.b: "bb" } + - match: { _source.c: "cc" } + +--- +"Test CSV Processor quote and separator": + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "csv": { + "field": "value", + "target_fields":["a","b","c","d","e"], + "quote": "'", + "separator": ";" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "my_pipeline" + body: > + { + "value": "'aa';'b;b';'cc';d,d;'ee''ee'" + } + + - do: + get: + index: test + id: 1 + - match: { _source.a: "aa" } + - match: { _source.b: "b;b" } + - match: { _source.c: "cc" } + - match: { _source.d: "d,d" } + - match: { _source.e: "ee'ee" } + +--- +"Test CSV Processor trim": + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "csv": { + "field": "value", + "target_fields":["a","b","c"], + "trim": true, + "quote": "'" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "my_pipeline" + body: > + { + "value": " aa, bb , 'cc'" + } + + - do: + get: + index: test + id: 1 + - match: { _source.a: "aa" } + - match: { _source.b: "bb" } + - match: { _source.c: "cc" } + +--- +"Test CSV Processor trim log": + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "csv": { + "field": "value", + "target_fields":["date","level","server","id","msg"], + "trim": true, + "separator": "|" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "my_pipeline" + body: > + { + "value": "2018-01-06 16:56:14.295748|INFO |VirtualServer |1 |listening on 0.0.0.0:9987, :::9987" + } + + - do: + get: + index: test + id: 1 + - match: { _source.date: "2018-01-06 16:56:14.295748" } + - match: { _source.level: "INFO" } + - match: { _source.server: "VirtualServer" } + - match: { _source.id: "1" } + - match: { _source.msg: "listening on 0.0.0.0:9987, :::9987" } From 926d142c0dbea0c1f49007c6f2fa12a8f290f9b0 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Wed, 11 Dec 2019 16:45:44 +0100 Subject: [PATCH 20/22] Fix GCS Mock Batch Delete Behavior (#50034) Batch deletes get a response for every delete request, not just those that actually hit an existing blob. The fact that we only responded for existing blobs leads to a degenerate response that throws a parse exception if a batch delete only contains non-existant blobs. --- .../GoogleCloudStorageBlobStoreRepositoryTests.java | 13 +++++++++++++ .../fixture/gcs/GoogleCloudStorageHttpHandler.java | 7 +++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/plugins/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java b/plugins/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java index 7a2c3d780123..d8926b25e2c4 100644 --- a/plugins/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java +++ b/plugins/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java @@ -26,6 +26,8 @@ import com.sun.net.httpserver.HttpHandler; import fixture.gcs.FakeOAuth2HttpHandler; import fixture.gcs.GoogleCloudStorageHttpHandler; +import org.elasticsearch.action.ActionRunnable; +import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.cluster.metadata.RepositoryMetaData; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.SuppressForbidden; @@ -37,7 +39,9 @@ import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.env.Environment; import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.repositories.RepositoriesService; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.blobstore.BlobStoreRepository; import org.elasticsearch.repositories.blobstore.ESMockAPIBasedRepositoryIntegTestCase; import org.threeten.bp.Duration; @@ -101,6 +105,15 @@ protected Settings nodeSettings(int nodeOrdinal) { return settings.build(); } + public void testDeleteSingleItem() { + final String repoName = createRepository(randomName()); + final RepositoriesService repositoriesService = internalCluster().getMasterNodeInstance(RepositoriesService.class); + final BlobStoreRepository repository = (BlobStoreRepository) repositoriesService.repository(repoName); + PlainActionFuture.get(f -> repository.threadPool().generic().execute(ActionRunnable.run(f, () -> + repository.blobStore().blobContainer(repository.basePath()).deleteBlobsIgnoringIfNotExists(Collections.singletonList("foo")) + ))); + } + public void testChunkSize() { // default chunk size RepositoryMetaData repositoryMetaData = new RepositoryMetaData("repo", GoogleCloudStorageRepository.TYPE, Settings.EMPTY); diff --git a/test/fixtures/gcs-fixture/src/main/java/fixture/gcs/GoogleCloudStorageHttpHandler.java b/test/fixtures/gcs-fixture/src/main/java/fixture/gcs/GoogleCloudStorageHttpHandler.java index ba2a725fed29..a374a745909a 100644 --- a/test/fixtures/gcs-fixture/src/main/java/fixture/gcs/GoogleCloudStorageHttpHandler.java +++ b/test/fixtures/gcs-fixture/src/main/java/fixture/gcs/GoogleCloudStorageHttpHandler.java @@ -167,10 +167,9 @@ public void handle(final HttpExchange exchange) throws IOException { } else if (line.startsWith("DELETE")) { final String name = line.substring(line.indexOf(uri) + uri.length(), line.lastIndexOf(" HTTP")); if (Strings.hasText(name)) { - if (blobs.entrySet().removeIf(blob -> blob.getKey().equals(URLDecoder.decode(name, UTF_8)))) { - batch.append("HTTP/1.1 204 NO_CONTENT").append('\n'); - batch.append('\n'); - } + blobs.remove(URLDecoder.decode(name, UTF_8)); + batch.append("HTTP/1.1 204 NO_CONTENT").append('\n'); + batch.append('\n'); } } } From a3dd2f38cbf19ce4235e8de7c87ee54bd8b4ebf6 Mon Sep 17 00:00:00 2001 From: Henning Andersen <33268011+henningandersen@users.noreply.github.com> Date: Wed, 11 Dec 2019 17:18:51 +0100 Subject: [PATCH 21/22] Log attachment generation failures (#50080) Watcher logs when actions fail in ActionWrapper, but failures to generate an email attachment are not logged and we thus only know the type of the exception and not where/how it occurred. --- .../xpack/watcher/actions/email/ExecutableEmailAction.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/actions/email/ExecutableEmailAction.java b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/actions/email/ExecutableEmailAction.java index fcc4eb0e9422..1f8e87cad1f9 100644 --- a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/actions/email/ExecutableEmailAction.java +++ b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/actions/email/ExecutableEmailAction.java @@ -6,6 +6,8 @@ package org.elasticsearch.xpack.watcher.actions.email; import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.apache.logging.log4j.util.Supplier; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.xpack.core.watcher.actions.Action; import org.elasticsearch.xpack.core.watcher.actions.ExecutableAction; @@ -57,6 +59,8 @@ public Action.Result execute(String actionId, WatchExecutionContext ctx, Payload Attachment attachment = parser.toAttachment(ctx, payload, emailAttachment); attachments.put(attachment.id(), attachment); } catch (ElasticsearchException | IOException e) { + logger().error( + (Supplier) () -> new ParameterizedMessage("failed to execute action [{}/{}]", ctx.watch().id(), actionId), e); return new EmailAction.Result.FailureWithException(action.type(), e); } } From 3d96e6b68e3769d4cef393db934f63edc4ebf3a1 Mon Sep 17 00:00:00 2001 From: Lisa Cawley Date: Wed, 11 Dec 2019 09:50:41 -0800 Subject: [PATCH 22/22] [DOCS] Move datafeed resource definitions into APIs (#50005) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: István Zoltán Szabó --- .../apis/datafeedresource.asciidoc | 161 ------------------ .../apis/delete-datafeed.asciidoc | 7 +- .../apis/get-datafeed-stats.asciidoc | 109 +++++++----- .../apis/get-datafeed.asciidoc | 117 +++++++++---- .../apis/preview-datafeed.asciidoc | 39 +++-- .../apis/put-datafeed.asciidoc | 66 +++---- .../apis/start-datafeed.asciidoc | 3 +- .../apis/stop-datafeed.asciidoc | 18 +- .../apis/update-datafeed.asciidoc | 83 ++++----- .../delayed-data-detection.asciidoc | 20 +-- docs/reference/ml/ml-shared.asciidoc | 153 +++++++++++++++++ docs/reference/redirects.asciidoc | 13 +- docs/reference/rest-api/defs.asciidoc | 3 - 13 files changed, 415 insertions(+), 377 deletions(-) delete mode 100644 docs/reference/ml/anomaly-detection/apis/datafeedresource.asciidoc diff --git a/docs/reference/ml/anomaly-detection/apis/datafeedresource.asciidoc b/docs/reference/ml/anomaly-detection/apis/datafeedresource.asciidoc deleted file mode 100644 index 864e71e35bdb..000000000000 --- a/docs/reference/ml/anomaly-detection/apis/datafeedresource.asciidoc +++ /dev/null @@ -1,161 +0,0 @@ -[role="xpack"] -[testenv="platinum"] -[[ml-datafeed-resource]] -=== {dfeed-cap} resources - -A {dfeed} resource has the following properties: - -`aggregations`:: - (object) If set, the {dfeed} performs aggregation searches. - Support for aggregations is limited and should only be used with - low cardinality data. For more information, see - {stack-ov}/ml-configuring-aggregation.html[Aggregating Data for Faster Performance]. - -`chunking_config`:: - (object) Specifies how data searches are split into time chunks. - See <>. - For example: `{"mode": "manual", "time_span": "3h"}` - -`datafeed_id`:: - (string) A numerical character string that uniquely identifies the {dfeed}. - This property is informational; you cannot change the identifier for existing - {dfeeds}. - -`frequency`:: - (time units) The interval at which scheduled queries are made while the - {dfeed} runs in real time. The default value is either the bucket span for short - bucket spans, or, for longer bucket spans, a sensible fraction of the bucket - span. For example: `150s`. - -`indices`:: - (array) An array of index names. For example: `["it_ops_metrics"]` - -`job_id`:: - (string) The unique identifier for the job to which the {dfeed} sends data. - -`query`:: - (object) The {es} query domain-specific language (DSL). This value - corresponds to the query object in an {es} search POST body. All the - options that are supported by {es} can be used, as this object is - passed verbatim to {es}. By default, this property has the following - value: `{"match_all": {"boost": 1}}`. - -`query_delay`:: - (time units) The number of seconds behind real time that data is queried. For - example, if data from 10:04 a.m. might not be searchable in {es} until - 10:06 a.m., set this property to 120 seconds. The default value is randomly - selected between `60s` and `120s`. This randomness improves the query - performance when there are multiple jobs running on the same node. - -`script_fields`:: - (object) Specifies scripts that evaluate custom expressions and returns - script fields to the {dfeed}. - The detector configuration objects in a job can contain - functions that use these script fields. - For more information, see - {stack-ov}/ml-configuring-transform.html[Transforming Data With Script Fields]. - -`scroll_size`:: - (unsigned integer) The `size` parameter that is used in {es} searches. - The default value is `1000`. - -`delayed_data_check_config`:: - (object) Specifies whether the data feed checks for missing data and - the size of the window. For example: - `{"enabled": true, "check_window": "1h"}` See - <>. - -`max_empty_searches`:: - (integer) If a real-time {dfeed} has never seen any data (including during - any initial training period) then it will automatically stop itself and - close its associated job after this many real-time searches that return no - documents. In other words, it will stop after `frequency` times - `max_empty_searches` of real-time operation. If not set - then a {dfeed} with no end time that sees no data will remain started until - it is explicitly stopped. By default this setting is not set. - -[[ml-datafeed-chunking-config]] -==== Chunking configuration objects - -{dfeeds-cap} might be required to search over long time periods, for several months -or years. This search is split into time chunks in order to ensure the load -on {es} is managed. Chunking configuration controls how the size of these time -chunks are calculated and is an advanced configuration option. - -A chunking configuration object has the following properties: - -`mode`:: - There are three available modes: + - `auto`::: The chunk size will be dynamically calculated. This is the default - and recommended value. - `manual`::: Chunking will be applied according to the specified `time_span`. - `off`::: No chunking will be applied. - -`time_span`:: - (time units) The time span that each search will be querying. - This setting is only applicable when the mode is set to `manual`. - For example: `3h`. - -[[ml-datafeed-delayed-data-check-config]] -==== Delayed data check configuration objects - -The {dfeed} can optionally search over indices that have already been read in -an effort to determine whether any data has subsequently been added to the index. -If missing data is found, it is a good indication that the `query_delay` option -is set too low and the data is being indexed after the {dfeed} has passed that -moment in time. See -{stack-ov}/ml-delayed-data-detection.html[Working with delayed data]. - -This check runs only on real-time {dfeeds}. - -The configuration object has the following properties: - -`enabled`:: - (boolean) Specifies whether the {dfeed} periodically checks for delayed data. - Defaults to `true`. - -`check_window`:: - (time units) The window of time that is searched for late data. This window of - time ends with the latest finalized bucket. It defaults to `null`, which - causes an appropriate `check_window` to be calculated when the real-time - {dfeed} runs. In particular, the default `check_window` span calculation is - based on the maximum of `2h` or `8 * bucket_span`. - -[float] -[[ml-datafeed-counts]] -==== {dfeed-cap} counts - -The get {dfeed} statistics API provides information about the operational -progress of a {dfeed}. All of these properties are informational; you cannot -update their values: - -`assignment_explanation`:: - (string) For started {dfeeds} only, contains messages relating to the - selection of a node. - -`datafeed_id`:: - (string) A numerical character string that uniquely identifies the {dfeed}. - -`node`:: - (object) The node upon which the {dfeed} is started. The {dfeed} and job will - be on the same node. - `id`::: The unique identifier of the node. For example, - "0-o0tOoRTwKFZifatTWKNw". - `name`::: The node name. For example, `0-o0tOo`. - `ephemeral_id`::: The node ephemeral ID. - `transport_address`::: The host and port where transport HTTP connections are - accepted. For example, `127.0.0.1:9300`. - `attributes`::: For example, `{"ml.machine_memory": "17179869184"}`. - -`state`:: - (string) The status of the {dfeed}, which can be one of the following values: + - `started`::: The {dfeed} is actively receiving data. - `stopped`::: The {dfeed} is stopped and will not receive data until it is - re-started. - -`timing_stats`:: - (object) An object that provides statistical information about timing aspect of this datafeed. + - `job_id`::: A numerical character string that uniquely identifies the job. - `search_count`::: Number of searches performed by this datafeed. - `total_search_time_ms`::: Total time the datafeed spent searching in milliseconds. - diff --git a/docs/reference/ml/anomaly-detection/apis/delete-datafeed.asciidoc b/docs/reference/ml/anomaly-detection/apis/delete-datafeed.asciidoc index 21b4eb75bef0..d933afe4f9a4 100644 --- a/docs/reference/ml/anomaly-detection/apis/delete-datafeed.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/delete-datafeed.asciidoc @@ -28,14 +28,15 @@ can delete it. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {dfeed}. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id] [[ml-delete-datafeed-query-parms]] ==== {api-query-parms-title} `force`:: - (Optional, boolean) Use to forcefully delete a started {dfeed}; this method is - quicker than stopping and deleting the {dfeed}. +(Optional, boolean) Use to forcefully delete a started {dfeed}; this method is +quicker than stopping and deleting the {dfeed}. [[ml-delete-datafeed-example]] ==== {api-examples-title} diff --git a/docs/reference/ml/anomaly-detection/apis/get-datafeed-stats.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-datafeed-stats.asciidoc index bd126a651e26..feccd52364f4 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-datafeed-stats.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-datafeed-stats.asciidoc @@ -45,36 +45,61 @@ IMPORTANT: This API returns a maximum of 10,000 {dfeeds}. ==== {api-path-parms-title} ``:: - (Optional, string) Identifier for the {dfeed}. It can be a {dfeed} identifier - or a wildcard expression. If you do not specify one of these options, the API - returns statistics for all {dfeeds}. +(Optional, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id-wildcard] +If you do not specify one of these options, the API returns information about +all {dfeeds}. [[ml-get-datafeed-stats-query-parms]] ==== {api-query-parms-title} `allow_no_datafeeds`:: - (Optional, boolean) Specifies what to do when the request: -+ --- -* Contains wildcard expressions and there are no {datafeeds} that match. -* Contains the `_all` string or no identifiers and there are no matches. -* Contains wildcard expressions and there are only partial matches. - -The default value is `true`, which returns an empty `datafeeds` array when -there are no matches and the subset of results when there are partial matches. -If this parameter is `false`, the request returns a `404` status code when there -are no matches or only partial matches. --- +(Optional, boolean) +include::{docdir}/ml/ml-shared.asciidoc[tag=allow-no-datafeeds] [[ml-get-datafeed-stats-results]] ==== {api-response-body-title} -The API returns the following information: +The API returns an array of {dfeed} count objects. All of these properties are +informational; you cannot update their values. + +`assignment_explanation`:: +(string) For started {dfeeds} only, contains messages relating to the selection of a node. + +`datafeed_id`:: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id] + +`node`:: +(object) For started {dfeeds} only, the node upon which the {dfeed} is started. The {dfeed} and job will be on the same node. +`id`::: The unique identifier of the node. For example, "0-o0tOoRTwKFZifatTWKNw". +`name`::: The node name. For example, `0-o0tOo`. +`ephemeral_id`::: The node ephemeral ID. +`transport_address`::: The host and port where transport HTTP connections are +accepted. For example, `127.0.0.1:9300`. +`attributes`::: For example, `{"ml.machine_memory": "17179869184"}`. + +`state`:: +(string) The status of the {dfeed}, which can be one of the following values: ++ +-- +* `started`: The {dfeed} is actively receiving data. +* `stopped`: The {dfeed} is stopped and will not receive data until it is +re-started. +-- + +`timing_stats`:: +(object) An object that provides statistical information about timing aspect of +this {dfeed}. +//average_search_time_per_bucket_ms +//bucket_count +//exponential_average_search_time_per_hour_ms +`job_id`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] +`search_count`::: Number of searches performed by this {dfeed}. +`total_search_time_ms`::: Total time the {dfeed} spent searching in milliseconds. -`datafeeds`:: - (array) An array of {dfeed} count objects. - For more information, see <>. [[ml-get-datafeed-stats-response-codes]] ==== {api-response-codes-title} @@ -86,46 +111,46 @@ The API returns the following information: [[ml-get-datafeed-stats-example]] ==== {api-examples-title} -The following example gets usage information for the -`datafeed-total-requests` {dfeed}: - [source,console] -------------------------------------------------- -GET _ml/datafeeds/datafeed-total-requests/_stats +GET _ml/datafeeds/datafeed-high_sum_total_sales/_stats -------------------------------------------------- -// TEST[skip:setup:server_metrics_startdf] +// TEST[skip:Kibana sample data started datafeed] The API returns the following results: [source,console-result] ---- { - "count": 1, - "datafeeds": [ + "count" : 1, + "datafeeds" : [ { - "datafeed_id": "datafeed-total-requests", - "state": "started", - "node": { - "id": "2spCyo1pRi2Ajo-j-_dnPX", - "name": "node-0", - "ephemeral_id": "hoXMLZB0RWKfR9UPPUCxXX", - "transport_address": "127.0.0.1:9300", - "attributes": { - "ml.machine_memory": "17179869184", - "ml.max_open_jobs": "20" + "datafeed_id" : "datafeed-high_sum_total_sales", + "state" : "started", + "node" : { + "id" : "7bmMXyWCRs-TuPfGJJ_yMw", + "name" : "node-0", + "ephemeral_id" : "hoXMLZB0RWKfR9UPPUCxXX", + "transport_address" : "127.0.0.1:9300", + "attributes" : { + "ml.machine_memory" : "17179869184", + "ml.max_open_jobs" : "20" } }, - "assignment_explanation": "", - "timing_stats": { - "job_id": "job-total-requests", - "search_count": 20, - "total_search_time_ms": 120.5 + "assignment_explanation" : "", + "timing_stats" : { + "job_id" : "high_sum_total_sales", + "search_count" : 7, + "bucket_count" : 743, + "total_search_time_ms" : 134.0, + "average_search_time_per_bucket_ms" : 0.180349932705249, + "exponential_average_search_time_per_hour_ms" : 11.514712961628677 } } ] } ---- -// TESTRESPONSE[s/"2spCyo1pRi2Ajo-j-_dnPX"/$body.$_path/] +// TESTRESPONSE[s/"7bmMXyWCRs-TuPfGJJ_yMw"/$body.$_path/] // TESTRESPONSE[s/"node-0"/$body.$_path/] // TESTRESPONSE[s/"hoXMLZB0RWKfR9UPPUCxXX"/$body.$_path/] // TESTRESPONSE[s/"127.0.0.1:9300"/$body.$_path/] diff --git a/docs/reference/ml/anomaly-detection/apis/get-datafeed.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-datafeed.asciidoc index 3330ae7b821d..11aca1edd95e 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-datafeed.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-datafeed.asciidoc @@ -42,35 +42,71 @@ IMPORTANT: This API returns a maximum of 10,000 {dfeeds}. ==== {api-path-parms-title} ``:: - (Optional, string) Identifier for the {dfeed}. It can be a {dfeed} identifier - or a wildcard expression. If you do not specify one of these options, the API - returns information about all {dfeeds}. +(Optional, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id-wildcard] +If you do not specify one of these options, the API returns information about +all {dfeeds}. [[ml-get-datafeed-query-parms]] ==== {api-query-parms-title} `allow_no_datafeeds`:: - (Optional, boolean) Specifies what to do when the request: -+ --- -* Contains wildcard expressions and there are no {datafeeds} that match. -* Contains the `_all` string or no identifiers and there are no matches. -* Contains wildcard expressions and there are only partial matches. - -The default value is `true`, which returns an empty `datafeeds` array when -there are no matches and the subset of results when there are partial matches. -If this parameter is `false`, the request returns a `404` status code when there -are no matches or only partial matches. --- +(Optional, boolean) +include::{docdir}/ml/ml-shared.asciidoc[tag=allow-no-datafeeds] [[ml-get-datafeed-results]] ==== {api-response-body-title} -The API returns the following information: +The API returns an array of {dfeed} resources, which have the following +properties: -`datafeeds`:: - (array) An array of {dfeed} objects. - For more information, see <>. +`aggregations`:: +(object) +include::{docdir}/ml/ml-shared.asciidoc[tag=aggregations] + +`chunking_config`:: +(object) +include::{docdir}/ml/ml-shared.asciidoc[tag=chunking-config] + +`datafeed_id`:: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id] + +`delayed_data_check_config`:: +(object) +include::{docdir}/ml/ml-shared.asciidoc[tag=delayed-data-check-config] + +`frequency`:: +(<>) +include::{docdir}/ml/ml-shared.asciidoc[tag=frequency] + +`indices`:: +(array) +include::{docdir}/ml/ml-shared.asciidoc[tag=indices] + +`job_id`:: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-datafeed] + +`max_empty_searches`:: +(integer) +include::{docdir}/ml/ml-shared.asciidoc[tag=max-empty-searches] + +`query`:: +(object) +include::{docdir}/ml/ml-shared.asciidoc[tag=query] + +`query_delay`:: +(<>) +include::{docdir}/ml/ml-shared.asciidoc[tag=query-delay] + +`script_fields`:: +(object) +include::{docdir}/ml/ml-shared.asciidoc[tag=script-fields] + +`scroll_size`:: +(unsigned integer) +include::{docdir}/ml/ml-shared.asciidoc[tag=scroll-size] [[ml-get-datafeed-response-codes]] ==== {api-response-codes-title} @@ -83,39 +119,48 @@ The API returns the following information: ==== {api-examples-title} The following example gets configuration information for the -`datafeed-total-requests` {dfeed}: +`datafeed-high_sum_total_sales` {dfeed}: [source,console] -------------------------------------------------- -GET _ml/datafeeds/datafeed-total-requests +GET _ml/datafeeds/datafeed-high_sum_total_sales -------------------------------------------------- -// TEST[skip:setup:server_metrics_datafeed] +// TEST[skip:kibana sample data] The API returns the following results: [source,console-result] ---- { - "count": 1, - "datafeeds": [ + "count" : 1, + "datafeeds" : [ { - "datafeed_id": "datafeed-total-requests", - "job_id": "total-requests", - "query_delay": "83474ms", - "indices": [ - "server-metrics" + "datafeed_id" : "datafeed-high_sum_total_sales", + "job_id" : "high_sum_total_sales", + "query_delay" : "93169ms", + "indices" : [ + "kibana_sample_data_ecommerce" ], - "query": { - "match_all": { - "boost": 1.0 + "query" : { + "bool" : { + "filter" : [ + { + "term" : { + "_index" : "kibana_sample_data_ecommerce" + } + } + ] } }, - "scroll_size": 1000, - "chunking_config": { - "mode": "auto" + "scroll_size" : 1000, + "chunking_config" : { + "mode" : "auto" + }, + "delayed_data_check_config" : { + "enabled" : true } } ] } ---- -// TESTRESPONSE[s/"query.boost": "1.0"/"query.boost": $body.query.boost/] +// TESTRESPONSE[s/"query.boost": "93169ms"/"query.boost": $body.query.boost/] diff --git a/docs/reference/ml/anomaly-detection/apis/preview-datafeed.asciidoc b/docs/reference/ml/anomaly-detection/apis/preview-datafeed.asciidoc index c3afca8b03c6..6220d8a1de24 100644 --- a/docs/reference/ml/anomaly-detection/apis/preview-datafeed.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/preview-datafeed.asciidoc @@ -41,18 +41,17 @@ it to ensure it is returning the expected data. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {dfeed}. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id] [[ml-preview-datafeed-example]] ==== {api-examples-title} -The following example obtains a preview of the `datafeed-farequote` {dfeed}: - [source,console] -------------------------------------------------- -GET _ml/datafeeds/datafeed-farequote/_preview +GET _ml/datafeeds/datafeed-high_sum_total_sales/_preview -------------------------------------------------- -// TEST[skip:setup:farequote_datafeed] +// TEST[skip:set up Kibana sample data] The data that is returned for this example is as follows: @@ -60,22 +59,28 @@ The data that is returned for this example is as follows: ---- [ { - "time": 1454803200000, - "airline": "JZA", - "doc_count": 5, - "responsetime": 990.4628295898438 + "order_date" : 1574294659000, + "category.keyword" : "Men's Clothing", + "customer_full_name.keyword" : "Sultan Al Benson", + "taxful_total_price" : 35.96875 }, { - "time": 1454803200000, - "airline": "JBU", - "doc_count": 23, - "responsetime": 877.5927124023438 + "order_date" : 1574294918000, + "category.keyword" : [ + "Women's Accessories", + "Women's Clothing" + ], + "customer_full_name.keyword" : "Pia Webb", + "taxful_total_price" : 83.0 }, { - "time": 1454803200000, - "airline": "KLM", - "doc_count": 42, - "responsetime": 1355.481201171875 + "order_date" : 1574295782000, + "category.keyword" : [ + "Women's Accessories", + "Women's Shoes" + ], + "customer_full_name.keyword" : "Brigitte Graham", + "taxful_total_price" : 72.0 } ] ---- diff --git a/docs/reference/ml/anomaly-detection/apis/put-datafeed.asciidoc b/docs/reference/ml/anomaly-detection/apis/put-datafeed.asciidoc index ca3b9d61ba7a..cb3765a86c97 100644 --- a/docs/reference/ml/anomaly-detection/apis/put-datafeed.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/put-datafeed.asciidoc @@ -43,70 +43,52 @@ those same roles. ==== {api-path-parms-title} ``:: - (Required, string) A numerical character string that uniquely identifies the - {dfeed}. This identifier can contain lowercase alphanumeric characters (a-z - and 0-9), hyphens, and underscores. It must start and end with alphanumeric - characters. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id] [[ml-put-datafeed-request-body]] ==== {api-request-body-title} `aggregations`:: - (Optional, object) If set, the {dfeed} performs aggregation searches. For more - information, see <>. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=aggregations] `chunking_config`:: - (Optional, object) Specifies how data searches are split into time chunks. See - <>. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=chunking-config] `delayed_data_check_config`:: - (Optional, object) Specifies whether the data feed checks for missing data and - the size of the window. See <>. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=delayed-data-check-config] `frequency`:: - (Optional, <>) The interval at which scheduled queries - are made while the {dfeed} runs in real time. The default value is either the - bucket span for short bucket spans, or, for longer bucket spans, a sensible - fraction of the bucket span. For example: `150s`. +(Optional, <>) +include::{docdir}/ml/ml-shared.asciidoc[tag=frequency] `indices`:: - (Required, array) An array of index names. Wildcards are supported. For - example: `["it_ops_metrics", "server*"]`. -+ --- -NOTE: If any indices are in remote clusters then `cluster.remote.connect` must -not be set to `false` on any ML node. --- +(Required, array) +include::{docdir}/ml/ml-shared.asciidoc[tag=indices] -`job_id`:: - (Required, string) A numerical character string that uniquely identifies the - {anomaly-job}. +`job_id`:: +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] + `query`:: - (Optional, object) The {es} query domain-specific language (DSL). This value - corresponds to the query object in an {es} search POST body. All the options - that are supported by {Es} can be used, as this object is passed verbatim to - {es}. By default, this property has the following value: - `{"match_all": {"boost": 1}}`. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=query] `query_delay`:: - (Optional, <>) The number of seconds behind real time - that data is queried. For example, if data from 10:04 a.m. might not be - searchable in {es} until 10:06 a.m., set this property to 120 seconds. The - default value is `60s`. +(Optional, <>) +include::{docdir}/ml/ml-shared.asciidoc[tag=query-delay] `script_fields`:: - (Optional, object) Specifies scripts that evaluate custom expressions and - returns script fields to the {dfeed}. The detector configuration objects in a - job can contain functions that use these script fields. For more information, - see <>. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=script-fields] `scroll_size`:: - (Optional, unsigned integer) The `size` parameter that is used in {es} - searches. The default value is `1000`. - -For more information about these properties, -see <>. +(Optional, unsigned integer) +include::{docdir}/ml/ml-shared.asciidoc[tag=scroll-size] [[ml-put-datafeed-example]] ==== {api-examples-title} diff --git a/docs/reference/ml/anomaly-detection/apis/start-datafeed.asciidoc b/docs/reference/ml/anomaly-detection/apis/start-datafeed.asciidoc index 7faba863774d..dd3e6bbdfff5 100644 --- a/docs/reference/ml/anomaly-detection/apis/start-datafeed.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/start-datafeed.asciidoc @@ -74,7 +74,8 @@ creation/update and runs the query using those same roles. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {dfeed}. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id] [[ml-start-datafeed-request-body]] ==== {api-request-body-title} diff --git a/docs/reference/ml/anomaly-detection/apis/stop-datafeed.asciidoc b/docs/reference/ml/anomaly-detection/apis/stop-datafeed.asciidoc index cde9f16c384a..f115d8657f7e 100644 --- a/docs/reference/ml/anomaly-detection/apis/stop-datafeed.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/stop-datafeed.asciidoc @@ -40,25 +40,15 @@ comma-separated list of {dfeeds} or a wildcard expression. You can close all ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {dfeed}. It can be a {dfeed} identifier - or a wildcard expression. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id-wildcard] [[ml-stop-datafeed-query-parms]] ==== {api-query-parms-title} `allow_no_datafeeds`:: - (Optional, boolean) Specifies what to do when the request: -+ --- -* Contains wildcard expressions and there are no {datafeeds} that match. -* Contains the `_all` string or no identifiers and there are no matches. -* Contains wildcard expressions and there are only partial matches. - -The default value is `true`, which returns an empty `datafeeds` array when -there are no matches and the subset of results when there are partial matches. -If this parameter is `false`, the request returns a `404` status code when there -are no matches or only partial matches. --- +(Optional, boolean) +include::{docdir}/ml/ml-shared.asciidoc[tag=allow-no-datafeeds] [[ml-stop-datafeed-request-body]] ==== {api-request-body-title} diff --git a/docs/reference/ml/anomaly-detection/apis/update-datafeed.asciidoc b/docs/reference/ml/anomaly-detection/apis/update-datafeed.asciidoc index d201d6cd093b..1336f71fcff7 100644 --- a/docs/reference/ml/anomaly-detection/apis/update-datafeed.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/update-datafeed.asciidoc @@ -39,7 +39,8 @@ using those same roles. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {dfeed}. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id] [[ml-update-datafeed-request-body]] ==== {api-request-body-title} @@ -47,70 +48,58 @@ using those same roles. The following properties can be updated after the {dfeed} is created: `aggregations`:: - (Optional, object) If set, the {dfeed} performs aggregation searches. For more - information, see <>. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=aggregations] `chunking_config`:: - (Optional, object) Specifies how data searches are split into time chunks. See - <>. - +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=chunking-config] + `delayed_data_check_config`:: - (Optional, object) Specifies whether the data feed checks for missing data and - the size of the window. See <>. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=delayed-data-check-config] `frequency`:: - (Optional, <>) The interval at which scheduled queries - are made while the {dfeed} runs in real time. The default value is either the - bucket span for short bucket spans, or, for longer bucket spans, a sensible - fraction of the bucket span. For example: `150s`. +(Optional, <>) +include::{docdir}/ml/ml-shared.asciidoc[tag=frequency] `indices`:: - (Optional, array) An array of index names. Wildcards are supported. For - example: `["it_ops_metrics", "server*"]`. +(Optional, array) +include::{docdir}/ml/ml-shared.asciidoc[tag=indices] + +`max_empty_searches`:: +(Optional, integer) +include::{docdir}/ml/ml-shared.asciidoc[tag=max-empty-searches] ++ +-- +The special value `-1` unsets this setting. +-- `query`:: - (Optional, object) The {es} query domain-specific language (DSL). This value - corresponds to the query object in an {es} search POST body. All the options - that are supported by {es} can be used, as this object is passed verbatim to - {es}. By default, this property has the following value: - `{"match_all": {"boost": 1}}`. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=query] + -- -WARNING: If you change the query, then the analyzed data will also be changed, -therefore the required time to learn might be long and the understandability of -the results is unpredictable. -If you want to make significant changes to the source data, we would recommend -you clone it and create a second job containing the amendments. Let both run in -parallel and close one when you are satisfied with the results of the other job. +WARNING: If you change the query, the analyzed data is also changed. Therefore, +the required time to learn might be long and the understandability of the +results is unpredictable. If you want to make significant changes to the source +data, we would recommend you clone it and create a second job containing the +amendments. Let both run in parallel and close one when you are satisfied with +the results of the other job. + -- `query_delay`:: - (Optional, <>) The number of seconds behind real-time - that data is queried. For example, if data from 10:04 a.m. might not be - searchable in {es} until 10:06 a.m., set this property to 120 seconds. The - default value is `60s`. +(Optional, <>) +include::{docdir}/ml/ml-shared.asciidoc[tag=query-delay] `script_fields`:: - (Optional, object) Specifies scripts that evaluate custom expressions and - returns script fields to the {dfeed}. The detector configuration objects in a - job can contain functions that use these script fields. For more information, - see <>. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=script-fields] `scroll_size`:: - (Optional, unsigned integer) The `size` parameter that is used in {es} - searches. The default value is `1000`. - -`max_empty_searches`:: - (Optional, integer) If a real-time {dfeed} has never seen any data (including - during any initial training period) then it will automatically stop itself - and close its associated job after this many real-time searches that return - no documents. In other words, it will stop after `frequency` times - `max_empty_searches` of real-time operation. If not set - then a {dfeed} with no end time that sees no data will remain started until - it is explicitly stopped. The special value `-1` unsets this setting. - -For more information about these properties, see <>. - +(Optional, unsigned integer) +include::{docdir}/ml/ml-shared.asciidoc[tag=scroll-size] [[ml-update-datafeed-example]] ==== {api-examples-title} diff --git a/docs/reference/ml/anomaly-detection/delayed-data-detection.asciidoc b/docs/reference/ml/anomaly-detection/delayed-data-detection.asciidoc index 625f839a8683..53f1756a4ec9 100644 --- a/docs/reference/ml/anomaly-detection/delayed-data-detection.asciidoc +++ b/docs/reference/ml/anomaly-detection/delayed-data-detection.asciidoc @@ -5,14 +5,15 @@ Delayed data are documents that are indexed late. That is to say, it is data related to a time that the {dfeed} has already processed. -When you create a datafeed, you can specify a -{ref}/ml-datafeed-resource.html[`query_delay`] setting. This setting enables the -datafeed to wait for some time past real-time, which means any "late" data in -this period is fully indexed before the datafeed tries to gather it. However, if -the setting is set too low, the datafeed may query for data before it has been -indexed and consequently miss that document. Conversely, if it is set too high, -analysis drifts farther away from real-time. The balance that is struck depends -upon each use case and the environmental factors of the cluster. +When you create a {dfeed}, you can specify a +{ref}/ml-put-datafeed.html#ml-put-datafeed-request-body[`query_delay`] setting. +This setting enables the {dfeed} to wait for some time past real-time, which +means any "late" data in this period is fully indexed before the {dfeed} tries +to gather it. However, if the setting is set too low, the {dfeed} may query for +data before it has been indexed and consequently miss that document. Conversely, +if it is set too high, analysis drifts farther away from real-time. The balance +that is struck depends upon each use case and the environmental factors of the +cluster. ==== Why worry about delayed data? @@ -28,8 +29,7 @@ recorded so that you can determine a next course of action. ==== How do we detect delayed data? -In addition to the `query_delay` field, there is a -{ref}/ml-datafeed-resource.html#ml-datafeed-delayed-data-check-config[delayed data check config], +In addition to the `query_delay` field, there is a delayed data check config, which enables you to configure the datafeed to look in the past for delayed data. Every 15 minutes or every `check_window`, whichever is smaller, the datafeed triggers a document search over the configured indices. This search looks over a diff --git a/docs/reference/ml/ml-shared.asciidoc b/docs/reference/ml/ml-shared.asciidoc index bea970078d06..f277e6ab2e4a 100644 --- a/docs/reference/ml/ml-shared.asciidoc +++ b/docs/reference/ml/ml-shared.asciidoc @@ -1,3 +1,10 @@ +tag::aggregations[] +If set, the {dfeed} performs aggregation searches. Support for aggregations is +limited and should only be used with low cardinality data. For more information, +see +{stack-ov}/ml-configuring-aggregation.html[Aggregating data for faster performance]. +end::aggregations[] + tag::allow-lazy-open[] Advanced configuration option. Specifies whether this job can open when there is insufficient {ml} node capacity for it to be immediately assigned to a node. The @@ -9,6 +16,21 @@ return an error and the job waits in the `opening` state until sufficient {ml} node capacity is available. end::allow-lazy-open[] +tag::allow-no-datafeeds[] +Specifies what to do when the request: ++ +-- +* Contains wildcard expressions and there are no {dfeeds} that match. +* Contains the `_all` string or no identifiers and there are no matches. +* Contains wildcard expressions and there are only partial matches. + +The default value is `true`, which returns an empty `datafeeds` array when +there are no matches and the subset of results when there are partial matches. +If this parameter is `false`, the request returns a `404` status code when there +are no matches or only partial matches. +-- +end::allow-no-datafeeds[] + tag::allow-no-jobs[] Specifies what to do when the request: + @@ -207,6 +229,22 @@ add them here as <>. end::char-filter[] +tag::chunking-config[] +{dfeeds-cap} might be required to search over long time periods, for several months +or years. This search is split into time chunks in order to ensure the load +on {es} is managed. Chunking configuration controls how the size of these time +chunks are calculated and is an advanced configuration option. +A chunking configuration object has the following properties: + +`mode`::: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=mode] + +`time_span`::: +(<>) +include::{docdir}/ml/ml-shared.asciidoc[tag=time-span] +end::chunking-config[] + tag::custom-rules[] An array of custom rule objects, which enable you to customize the way detectors operate. For example, a rule may dictate to the detector conditions under which @@ -301,6 +339,47 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=time-format] -- end::data-description[] +tag::datafeed-id[] +A numerical character string that uniquely identifies the +{dfeed}. This identifier can contain lowercase alphanumeric characters (a-z +and 0-9), hyphens, and underscores. It must start and end with alphanumeric +characters. +end::datafeed-id[] + +tag::datafeed-id-wildcard[] +Identifier for the {dfeed}. It can be a {dfeed} identifier or a wildcard +expression. +end::datafeed-id-wildcard[] + +tag::delayed-data-check-config[] +Specifies whether the {dfeed} checks for missing data and the size of the +window. For example: `{"enabled": true, "check_window": "1h"}`. ++ +-- +The {dfeed} can optionally search over indices that have already been read in +an effort to determine whether any data has subsequently been added to the index. +If missing data is found, it is a good indication that the `query_delay` option +is set too low and the data is being indexed after the {dfeed} has passed that +moment in time. See +{stack-ov}/ml-delayed-data-detection.html[Working with delayed data]. + +This check runs only on real-time {dfeeds}. + +The configuration object has the following properties: + +`enabled`:: +(boolean) Specifies whether the {dfeed} periodically checks for delayed data. +Defaults to `true`. + +`check_window`:: +(<>) The window of time that is searched for late data. +This window of time ends with the latest finalized bucket. It defaults to +`null`, which causes an appropriate `check_window` to be calculated when the +real-time {dfeed} runs. In particular, the default `check_window` span +calculation is based on the maximum of `2h` or `8 * bucket_span`. +-- +end::delayed-data-check-config[] + tag::dependent_variable[] `dependent_variable`:: (Required, string) Defines which field of the document is to be predicted. @@ -405,6 +484,13 @@ optional. If it is not specified, no token filters are applied prior to categorization. end::filter[] +tag::frequency[] +The interval at which scheduled queries are made while the {dfeed} runs in real +time. The default value is either the bucket span for short bucket spans, or, +for longer bucket spans, a sensible fraction of the bucket span. For example: +`150s`. +end::frequency[] + tag::function[] The analysis function that is used. For example, `count`, `rare`, `mean`, `min`, `max`, and `sum`. For more information, see @@ -424,6 +510,17 @@ tag::groups[] A list of job groups. A job can belong to no groups or many. end::groups[] +tag::indices[] +An array of index names. Wildcards are supported. For example: +`["it_ops_metrics", "server*"]`. ++ +-- +NOTE: If any indices are in remote clusters then `cluster.remote.connect` must +not be set to `false` on any {ml} nodes. + +-- +end::indices[] + tag::influencers[] A comma separated list of influencer field names. Typically these can be the by, over, or partition fields that are used in the detector configuration. You might @@ -475,6 +572,10 @@ alphanumeric characters (a-z and 0-9), hyphens, and underscores. It must start and end with alphanumeric characters. end::job-id-data-frame-analytics-define[] +tag::job-id-datafeed[] +The unique identifier for the job to which the {dfeed} sends data. +end::job-id-datafeed[] + tag::jobs-stats-anomaly-detection[] An array of {anomaly-job} statistics objects. For more information, see <>. @@ -502,12 +603,32 @@ the <> API. -- end::latency[] +tag::max-empty-searches[] +If a real-time {dfeed} has never seen any data (including during any initial +training period) then it will automatically stop itself and close its associated +job after this many real-time searches that return no documents. In other words, +it will stop after `frequency` times `max_empty_searches` of real-time operation. +If not set then a {dfeed} with no end time that sees no data will remain started +until it is explicitly stopped. By default this setting is not set. +end::max-empty-searches[] + tag::maximum_number_trees[] `maximum_number_trees`:: (Optional, integer) Defines the maximum number of trees the forest is allowed to contain. The maximum value is 2000. end::maximum_number_trees[] +tag::mode[] +There are three available modes: ++ +-- +* `auto`: The chunk size is dynamically calculated. This is the default and +recommended value. +* `manual`: Chunking is applied according to the specified `time_span`. +* `off`: No chunking is applied. +-- +end::mode[] + tag::model-memory-limit[] The approximate maximum amount of memory resources that are required for analytical processing. Once this limit is approached, data pruning becomes @@ -615,6 +736,21 @@ tag::prediction_field_name[] Defaults to `_prediction`. end::prediction_field_name[] +tag::query[] +The {es} query domain-specific language (DSL). This value corresponds to the +query object in an {es} search POST body. All the options that are supported by +{es} can be used, as this object is passed verbatim to {es}. By default, this +property has the following value: `{"match_all": {"boost": 1}}`. +end::query[] + +tag::query-delay[] +The number of seconds behind real time that data is queried. For example, if +data from 10:04 a.m. might not be searchable in {es} until 10:06 a.m., set this +property to 120 seconds. The default value is randomly selected between `60s` +and `120s`. This randomness improves the query performance when there are +multiple jobs running on the same node. +end::query-delay[] + tag::renormalization-window-days[] Advanced configuration option. The period over which adjustments to the score are applied, as new data is seen. The default value is the longer of 30 days or @@ -633,6 +769,18 @@ are deleted from {es}. The default value is null, which means results are retained. end::results-retention-days[] +tag::script-fields[] +Specifies scripts that evaluate custom expressions and returns script fields to +the {dfeed}. The detector configuration objects in a job can contain functions +that use these script fields. For more information, see +{stack-ov}/ml-configuring-transform.html[Transforming data with script fields] +and <>. +end::script-fields[] + +tag::scroll-size[] +The `size` parameter that is used in {es} searches. The default value is `1000`. +end::scroll-size[] + tag::summary-count-field-name[] If this property is specified, the data that is fed to the job is expected to be pre-summarized. This property value is the name of the field that contains the @@ -663,6 +811,11 @@ job creation fails. -- end::time-format[] +tag::time-span[] +The time span that each search will be querying. This setting is only applicable +when the mode is set to `manual`. For example: `3h`. +end::time-span[] + tag::tokenizer[] The name or definition of the <> to use after character filters are applied. This property is compulsory if diff --git a/docs/reference/redirects.asciidoc b/docs/reference/redirects.asciidoc index 26694e9aa05a..afc10a4493b1 100644 --- a/docs/reference/redirects.asciidoc +++ b/docs/reference/redirects.asciidoc @@ -1054,4 +1054,15 @@ This page was deleted. [[ml-analysisconfig]] See the details in [[ml-apimodelplotconfig]] -<>, <>, and <>. \ No newline at end of file +<>, <>, and <>. + +[role="exclude",id="ml-datafeed-resource"] +=== {dfeed-cap} resources + +This page was deleted. +[[ml-datafeed-chunking-config]] +See the details in <>, <>, +[[ml-datafeed-delayed-data-check-config]] +<>, +[[ml-datafeed-counts]] +<>. \ No newline at end of file diff --git a/docs/reference/rest-api/defs.asciidoc b/docs/reference/rest-api/defs.asciidoc index ec1a5a0e4154..8bdf35e62f11 100644 --- a/docs/reference/rest-api/defs.asciidoc +++ b/docs/reference/rest-api/defs.asciidoc @@ -5,8 +5,6 @@ These resource definitions are used in APIs related to {ml-features} and {security-features} and in {kib} advanced {ml} job configuration options. -* <> -* <> * <> * <> * <> @@ -15,7 +13,6 @@ These resource definitions are used in APIs related to {ml-features} and * <> * <> -include::{es-repo-dir}/ml/anomaly-detection/apis/datafeedresource.asciidoc[] include::{es-repo-dir}/ml/df-analytics/apis/dfanalyticsresources.asciidoc[] include::{es-repo-dir}/ml/df-analytics/apis/evaluateresources.asciidoc[] include::{es-repo-dir}/ml/anomaly-detection/apis/jobcounts.asciidoc[]