diff --git a/.github/workflows/pull-request-checks.yml b/.github/workflows/pull-request-checks.yml new file mode 100644 index 0000000000000..11998e36c2dbb --- /dev/null +++ b/.github/workflows/pull-request-checks.yml @@ -0,0 +1,28 @@ +name: Pull Request Checks + +on: + pull_request: + types: + [ + opened, + edited, + review_requested, + synchronize, + reopened, + ready_for_review, + ] + +jobs: + verify-description-checklist: + name: Verify Description Checklist + runs-on: ubuntu-latest + steps: + - uses: peternied/check-pull-request-description-checklist@v1 + with: + checklist-items: | + New functionality includes testing. + All tests pass + New functionality has been documented. + New functionality has javadoc added + Commits are signed per the DCO using --signoff + Commit changes are listed out in CHANGELOG.md file (See: [Changelog](../blob/main/CONTRIBUTING.md#changelog)) diff --git a/CHANGELOG.md b/CHANGELOG.md index 68c37687a8dda..a05c08d196ddd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Remote cluster state] Download functionality of global metadata from remote store ([#10535](https://github.com/opensearch-project/OpenSearch/pull/10535)) - [Remote cluster state] Restore global metadata from remote store when local state is lost after quorum loss ([#10404](https://github.com/opensearch-project/OpenSearch/pull/10404)) - [AdmissionControl] Added changes for AdmissionControl Interceptor and AdmissionControlService for RateLimiting ([#9286](https://github.com/opensearch-project/OpenSearch/pull/9286)) +- GHA to verify checklist items completion in PR descriptions ([#10800](https://github.com/opensearch-project/OpenSearch/pull/10800)) +- [Remote cluster state] Restore cluster state version during remote state auto restore ([#10853](https://github.com/opensearch-project/OpenSearch/pull/10853)) ### Dependencies - Bump `log4j-core` from 2.18.0 to 2.19.0 @@ -95,7 +97,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add search query categorizer ([#10255](https://github.com/opensearch-project/OpenSearch/pull/10255)) - Introduce ConcurrentQueryProfiler to profile query using concurrent segment search path and support concurrency during rewrite and create weight ([10352](https://github.com/opensearch-project/OpenSearch/pull/10352)) - [Streaming Indexing] Introduce new experimental server HTTP transport based on Netty 4 and Project Reactor (Reactor Netty) ([#9672](https://github.com/opensearch-project/OpenSearch/pull/9672)) +- Update the indexRandom function to create more segments for concurrent search tests ([10247](https://github.com/opensearch-project/OpenSearch/pull/10247)) - [Remote cluster state] Make index and global metadata upload timeout dynamic cluster settings ([#10814](https://github.com/opensearch-project/OpenSearch/pull/10814)) +- Added cluster setting cluster.restrict.index.replication_type to restrict setting of index setting replication type ([#10866](https://github.com/opensearch-project/OpenSearch/pull/10866)) +- Add cluster state stats ([#10670](https://github.com/opensearch-project/OpenSearch/pull/10670)) ### Dependencies - Bump `com.google.api.grpc:proto-google-common-protos` from 2.10.0 to 2.25.1 ([#10208](https://github.com/opensearch-project/OpenSearch/pull/10208), [#10298](https://github.com/opensearch-project/OpenSearch/pull/10298)) @@ -118,10 +123,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [BUG] Fix java.lang.SecurityException in repository-gcs plugin ([#10642](https://github.com/opensearch-project/OpenSearch/pull/10642)) - Add telemetry tracer/metric enable flag and integ test. ([#10395](https://github.com/opensearch-project/OpenSearch/pull/10395)) - Add instrumentation for indexing in transport bulk action and transport shard bulk action. ([#10273](https://github.com/opensearch-project/OpenSearch/pull/10273)) +- [BUG] Disable sort optimization for HALF_FLOAT ([#10999](https://github.com/opensearch-project/OpenSearch/pull/10999)) ### Deprecated ### Removed +- Remove deprecated classes for Rounding ([#10956](https://github.com/opensearch-project/OpenSearch/issues/10956)) ### Fixed - Fix failure in dissect ingest processor parsing empty brackets ([#9225](https://github.com/opensearch-project/OpenSearch/pull/9255)) @@ -132,4 +139,4 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Security [Unreleased 3.0]: https://github.com/opensearch-project/OpenSearch/compare/2.x...HEAD -[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.12...2.x \ No newline at end of file +[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.12...2.x diff --git a/benchmarks/src/main/java/org/opensearch/benchmark/time/RoundingBenchmark.java b/benchmarks/src/main/java/org/opensearch/benchmark/time/RoundingBenchmark.java deleted file mode 100644 index cdbcbfc163191..0000000000000 --- a/benchmarks/src/main/java/org/opensearch/benchmark/time/RoundingBenchmark.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - -package org.opensearch.benchmark.time; - -import org.opensearch.common.Rounding; -import org.opensearch.common.rounding.DateTimeUnit; -import org.opensearch.common.time.DateUtils; -import org.opensearch.common.unit.TimeValue; -import org.joda.time.DateTimeZone; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Measurement; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.Warmup; - -import java.time.ZoneId; -import java.time.ZoneOffset; -import java.util.concurrent.TimeUnit; - -import static org.opensearch.common.Rounding.DateTimeUnit.DAY_OF_MONTH; -import static org.opensearch.common.Rounding.DateTimeUnit.MONTH_OF_YEAR; -import static org.opensearch.common.Rounding.DateTimeUnit.QUARTER_OF_YEAR; -import static org.opensearch.common.Rounding.DateTimeUnit.YEAR_OF_CENTURY; - -@Fork(3) -@Warmup(iterations = 10) -@Measurement(iterations = 10) -@BenchmarkMode(Mode.AverageTime) -@OutputTimeUnit(TimeUnit.NANOSECONDS) -@State(Scope.Benchmark) -@SuppressWarnings("unused") // invoked by benchmarking framework -public class RoundingBenchmark { - - private final ZoneId zoneId = ZoneId.of("Europe/Amsterdam"); - private final DateTimeZone timeZone = DateUtils.zoneIdToDateTimeZone(zoneId); - - private long timestamp = 1548879021354L; - - private final org.opensearch.common.rounding.Rounding jodaRounding = org.opensearch.common.rounding.Rounding.builder( - DateTimeUnit.HOUR_OF_DAY - ).timeZone(timeZone).build(); - private final Rounding javaRounding = Rounding.builder(Rounding.DateTimeUnit.HOUR_OF_DAY).timeZone(zoneId).build(); - - @Benchmark - public long timeRoundingDateTimeUnitJoda() { - return jodaRounding.round(timestamp); - } - - @Benchmark - public long timeRoundingDateTimeUnitJava() { - return javaRounding.round(timestamp); - } - - private final org.opensearch.common.rounding.Rounding jodaDayOfMonthRounding = org.opensearch.common.rounding.Rounding.builder( - DateTimeUnit.DAY_OF_MONTH - ).timeZone(timeZone).build(); - private final Rounding javaDayOfMonthRounding = Rounding.builder(DAY_OF_MONTH).timeZone(zoneId).build(); - - @Benchmark - public long timeRoundingDateTimeUnitDayOfMonthJoda() { - return jodaDayOfMonthRounding.round(timestamp); - } - - @Benchmark - public long timeRoundingDateTimeUnitDayOfMonthJava() { - return javaDayOfMonthRounding.round(timestamp); - } - - private final org.opensearch.common.rounding.Rounding timeIntervalRoundingJoda = org.opensearch.common.rounding.Rounding.builder( - TimeValue.timeValueMinutes(60) - ).timeZone(timeZone).build(); - private final Rounding timeIntervalRoundingJava = Rounding.builder(TimeValue.timeValueMinutes(60)).timeZone(zoneId).build(); - - @Benchmark - public long timeIntervalRoundingJava() { - return timeIntervalRoundingJava.round(timestamp); - } - - @Benchmark - public long timeIntervalRoundingJoda() { - return timeIntervalRoundingJoda.round(timestamp); - } - - private final org.opensearch.common.rounding.Rounding timeUnitRoundingUtcDayOfMonthJoda = org.opensearch.common.rounding.Rounding - .builder(DateTimeUnit.DAY_OF_MONTH) - .timeZone(DateTimeZone.UTC) - .build(); - private final Rounding timeUnitRoundingUtcDayOfMonthJava = Rounding.builder(DAY_OF_MONTH).timeZone(ZoneOffset.UTC).build(); - - @Benchmark - public long timeUnitRoundingUtcDayOfMonthJava() { - return timeUnitRoundingUtcDayOfMonthJava.round(timestamp); - } - - @Benchmark - public long timeUnitRoundingUtcDayOfMonthJoda() { - return timeUnitRoundingUtcDayOfMonthJoda.round(timestamp); - } - - private final org.opensearch.common.rounding.Rounding timeUnitRoundingUtcQuarterOfYearJoda = org.opensearch.common.rounding.Rounding - .builder(DateTimeUnit.QUARTER) - .timeZone(DateTimeZone.UTC) - .build(); - private final Rounding timeUnitRoundingUtcQuarterOfYearJava = Rounding.builder(QUARTER_OF_YEAR).timeZone(ZoneOffset.UTC).build(); - - @Benchmark - public long timeUnitRoundingUtcQuarterOfYearJava() { - return timeUnitRoundingUtcQuarterOfYearJava.round(timestamp); - } - - @Benchmark - public long timeUnitRoundingUtcQuarterOfYearJoda() { - return timeUnitRoundingUtcQuarterOfYearJoda.round(timestamp); - } - - private final org.opensearch.common.rounding.Rounding timeUnitRoundingUtcMonthOfYearJoda = org.opensearch.common.rounding.Rounding - .builder(DateTimeUnit.MONTH_OF_YEAR) - .timeZone(DateTimeZone.UTC) - .build(); - private final Rounding timeUnitRoundingUtcMonthOfYearJava = Rounding.builder(MONTH_OF_YEAR).timeZone(ZoneOffset.UTC).build(); - - @Benchmark - public long timeUnitRoundingUtcMonthOfYearJava() { - return timeUnitRoundingUtcMonthOfYearJava.round(timestamp); - } - - @Benchmark - public long timeUnitRoundingUtcMonthOfYearJoda() { - return timeUnitRoundingUtcMonthOfYearJoda.round(timestamp); - } - - private final org.opensearch.common.rounding.Rounding timeUnitRoundingUtcYearOfCenturyJoda = org.opensearch.common.rounding.Rounding - .builder(DateTimeUnit.YEAR_OF_CENTURY) - .timeZone(DateTimeZone.UTC) - .build(); - private final Rounding timeUnitRoundingUtcYearOfCenturyJava = Rounding.builder(YEAR_OF_CENTURY).timeZone(ZoneOffset.UTC).build(); - - @Benchmark - public long timeUnitRoundingUtcYearOfCenturyJava() { - return timeUnitRoundingUtcYearOfCenturyJava.round(timestamp); - } - - @Benchmark - public long timeUnitRoundingUtcYearOfCenturyJoda() { - return timeUnitRoundingUtcYearOfCenturyJoda.round(timestamp); - } -} diff --git a/buildSrc/src/main/java/org/opensearch/gradle/test/DistroTestPlugin.java b/buildSrc/src/main/java/org/opensearch/gradle/test/DistroTestPlugin.java index a420c8b63b02c..1ad7e056b6ae6 100644 --- a/buildSrc/src/main/java/org/opensearch/gradle/test/DistroTestPlugin.java +++ b/buildSrc/src/main/java/org/opensearch/gradle/test/DistroTestPlugin.java @@ -77,9 +77,9 @@ import java.util.stream.Stream; public class DistroTestPlugin implements Plugin { - private static final String SYSTEM_JDK_VERSION = "11.0.20+8"; + private static final String SYSTEM_JDK_VERSION = "17.0.9+9"; private static final String SYSTEM_JDK_VENDOR = "adoptium"; - private static final String GRADLE_JDK_VERSION = "17.0.8+7"; + private static final String GRADLE_JDK_VERSION = "17.0.9+9"; private static final String GRADLE_JDK_VENDOR = "adoptium"; // all distributions used by distro tests. this is temporary until tests are per distribution diff --git a/buildSrc/version.properties b/buildSrc/version.properties index 8f0d6436eaac5..f19437979c852 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -2,9 +2,7 @@ opensearch = 3.0.0 lucene = 9.8.0 bundled_jdk_vendor = adoptium -bundled_jdk = 20.0.2+9 -# See please https://github.com/adoptium/temurin-build/issues/3371 -bundled_jdk_linux_ppc64le = 20+36 +bundled_jdk = 21.0.1+12 # optional dependencies spatial4j = 0.7 diff --git a/distribution/src/config/jvm.options b/distribution/src/config/jvm.options index 952110c6c0289..1a0abcbaf9c88 100644 --- a/distribution/src/config/jvm.options +++ b/distribution/src/config/jvm.options @@ -81,7 +81,7 @@ ${error.file} # JDK 20+ Incubating Vector Module for SIMD optimizations; # disabling may reduce performance on vector optimized lucene -20:--add-modules=jdk.incubator.vector +20-:--add-modules=jdk.incubator.vector # HDFS ForkJoinPool.common() support by SecurityManager -Djava.util.concurrent.ForkJoinPool.common.threadFactory=org.opensearch.secure_sm.SecuredForkJoinWorkerThreadFactory diff --git a/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java b/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java index 4df30bfd2169e..da2c6e8c1b0ee 100644 --- a/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java +++ b/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java @@ -249,7 +249,7 @@ protected S3Repository createRepository( ClusterService clusterService, RecoverySettings recoverySettings ) { - return new S3Repository(metadata, registry, service, clusterService, recoverySettings, null, null, null, null, false) { + return new S3Repository(metadata, registry, service, clusterService, recoverySettings, null, null, null, null, null, false) { @Override public BlobStore blobStore() { diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3Reference.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3Reference.java index 0b5fcb6df280e..45170ea1ad209 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3Reference.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3Reference.java @@ -29,6 +29,7 @@ public class AmazonAsyncS3Reference extends RefCountedReleasable { client.client().close(); client.priorityClient().close(); + client.urgentClient().close(); AwsCredentialsProvider credentials = client.credentials(); if (credentials instanceof Closeable) { try { diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3WithCredentials.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3WithCredentials.java index fa2db83729d25..f8a313b55d945 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3WithCredentials.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3WithCredentials.java @@ -19,16 +19,19 @@ final class AmazonAsyncS3WithCredentials { private final S3AsyncClient client; private final S3AsyncClient priorityClient; + private final S3AsyncClient urgentClient; private final AwsCredentialsProvider credentials; private AmazonAsyncS3WithCredentials( final S3AsyncClient client, final S3AsyncClient priorityClient, + final S3AsyncClient urgentClient, @Nullable final AwsCredentialsProvider credentials ) { this.client = client; this.credentials = credentials; this.priorityClient = priorityClient; + this.urgentClient = urgentClient; } S3AsyncClient client() { @@ -39,6 +42,10 @@ S3AsyncClient priorityClient() { return priorityClient; } + S3AsyncClient urgentClient() { + return urgentClient; + } + AwsCredentialsProvider credentials() { return credentials; } @@ -46,8 +53,9 @@ AwsCredentialsProvider credentials() { static AmazonAsyncS3WithCredentials create( final S3AsyncClient client, final S3AsyncClient priorityClient, + final S3AsyncClient urgentClient, @Nullable final AwsCredentialsProvider credentials ) { - return new AmazonAsyncS3WithCredentials(client, priorityClient, credentials); + return new AmazonAsyncS3WithCredentials(client, priorityClient, urgentClient, credentials); } } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3AsyncService.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3AsyncService.java index 08215ebdd45e0..262304029a0d3 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3AsyncService.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3AsyncService.java @@ -103,6 +103,7 @@ public synchronized void refreshAndClearCache(Map clie */ public AmazonAsyncS3Reference client( RepositoryMetadata repositoryMetadata, + AsyncExecutorContainer urgentExecutorBuilder, AsyncExecutorContainer priorityExecutorBuilder, AsyncExecutorContainer normalExecutorBuilder ) { @@ -119,7 +120,7 @@ public AmazonAsyncS3Reference client( return existing; } final AmazonAsyncS3Reference clientReference = new AmazonAsyncS3Reference( - buildClient(clientSettings, priorityExecutorBuilder, normalExecutorBuilder) + buildClient(clientSettings, urgentExecutorBuilder, priorityExecutorBuilder, normalExecutorBuilder) ); clientReference.incRef(); clientsCache = MapBuilder.newMapBuilder(clientsCache).put(clientSettings, clientReference).immutableMap(); @@ -165,6 +166,7 @@ S3ClientSettings settings(RepositoryMetadata repositoryMetadata) { // proxy for testing synchronized AmazonAsyncS3WithCredentials buildClient( final S3ClientSettings clientSettings, + AsyncExecutorContainer urgentExecutorBuilder, AsyncExecutorContainer priorityExecutorBuilder, AsyncExecutorContainer normalExecutorBuilder ) { @@ -195,6 +197,17 @@ synchronized AmazonAsyncS3WithCredentials buildClient( builder.forcePathStyle(true); } + builder.httpClient(buildHttpClient(clientSettings, urgentExecutorBuilder.getAsyncTransferEventLoopGroup())); + builder.asyncConfiguration( + ClientAsyncConfiguration.builder() + .advancedOption( + SdkAdvancedAsyncClientOption.FUTURE_COMPLETION_EXECUTOR, + urgentExecutorBuilder.getFutureCompletionExecutor() + ) + .build() + ); + final S3AsyncClient urgentClient = SocketAccess.doPrivileged(builder::build); + builder.httpClient(buildHttpClient(clientSettings, priorityExecutorBuilder.getAsyncTransferEventLoopGroup())); builder.asyncConfiguration( ClientAsyncConfiguration.builder() @@ -217,7 +230,7 @@ synchronized AmazonAsyncS3WithCredentials buildClient( ); final S3AsyncClient client = SocketAccess.doPrivileged(builder::build); - return AmazonAsyncS3WithCredentials.create(client, priorityClient, credentials); + return AmazonAsyncS3WithCredentials.create(client, priorityClient, urgentClient, credentials); } static ClientOverrideConfiguration buildOverrideConfiguration(final S3ClientSettings clientSettings) { diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java index 24aee99242957..c1180aab0e0c7 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java @@ -195,9 +195,14 @@ public void asyncBlobUpload(WriteContext writeContext, ActionListener comp StreamContext streamContext = SocketAccess.doPrivileged(() -> writeContext.getStreamProvider(partSize)); try (AmazonAsyncS3Reference amazonS3Reference = SocketAccess.doPrivileged(blobStore::asyncClientReference)) { - S3AsyncClient s3AsyncClient = writeContext.getWritePriority() == WritePriority.HIGH - ? amazonS3Reference.get().priorityClient() - : amazonS3Reference.get().client(); + S3AsyncClient s3AsyncClient; + if (writeContext.getWritePriority() == WritePriority.URGENT) { + s3AsyncClient = amazonS3Reference.get().urgentClient(); + } else if (writeContext.getWritePriority() == WritePriority.HIGH) { + s3AsyncClient = amazonS3Reference.get().priorityClient(); + } else { + s3AsyncClient = amazonS3Reference.get().client(); + } CompletableFuture completableFuture = blobStore.getAsyncTransferManager() .uploadObject(s3AsyncClient, uploadRequest, streamContext, blobStore.getStatsMetricPublisher()); completableFuture.whenComplete((response, throwable) -> { diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java index f568d871dd31a..e8e043357e126 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java @@ -84,6 +84,7 @@ class S3BlobStore implements BlobStore { private final StatsMetricPublisher statsMetricPublisher = new StatsMetricPublisher(); private final AsyncTransferManager asyncTransferManager; + private final AsyncExecutorContainer urgentExecutorBuilder; private final AsyncExecutorContainer priorityExecutorBuilder; private final AsyncExecutorContainer normalExecutorBuilder; private final boolean multipartUploadEnabled; @@ -100,6 +101,7 @@ class S3BlobStore implements BlobStore { int bulkDeletesSize, RepositoryMetadata repositoryMetadata, AsyncTransferManager asyncTransferManager, + AsyncExecutorContainer urgentExecutorBuilder, AsyncExecutorContainer priorityExecutorBuilder, AsyncExecutorContainer normalExecutorBuilder ) { @@ -116,6 +118,7 @@ class S3BlobStore implements BlobStore { this.asyncTransferManager = asyncTransferManager; this.normalExecutorBuilder = normalExecutorBuilder; this.priorityExecutorBuilder = priorityExecutorBuilder; + this.urgentExecutorBuilder = urgentExecutorBuilder; } @Override @@ -139,7 +142,7 @@ public AmazonS3Reference clientReference() { } public AmazonAsyncS3Reference asyncClientReference() { - return s3AsyncService.client(repositoryMetadata, priorityExecutorBuilder, normalExecutorBuilder); + return s3AsyncService.client(repositoryMetadata, urgentExecutorBuilder, priorityExecutorBuilder, normalExecutorBuilder); } int getMaxRetries() { diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3Repository.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3Repository.java index aaf5b79891cdc..728a99b1220a6 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3Repository.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3Repository.java @@ -234,6 +234,7 @@ class S3Repository extends MeteredBlobStoreRepository { private final AsyncTransferManager asyncUploadUtils; private final S3AsyncService s3AsyncService; private final boolean multipartUploadEnabled; + private final AsyncExecutorContainer urgentExecutorBuilder; private final AsyncExecutorContainer priorityExecutorBuilder; private final AsyncExecutorContainer normalExecutorBuilder; private final Path pluginConfigPath; @@ -248,6 +249,7 @@ class S3Repository extends MeteredBlobStoreRepository { final ClusterService clusterService, final RecoverySettings recoverySettings, final AsyncTransferManager asyncUploadUtils, + final AsyncExecutorContainer urgentExecutorBuilder, final AsyncExecutorContainer priorityExecutorBuilder, final AsyncExecutorContainer normalExecutorBuilder, final S3AsyncService s3AsyncService, @@ -260,6 +262,7 @@ class S3Repository extends MeteredBlobStoreRepository { clusterService, recoverySettings, asyncUploadUtils, + urgentExecutorBuilder, priorityExecutorBuilder, normalExecutorBuilder, s3AsyncService, @@ -278,6 +281,7 @@ class S3Repository extends MeteredBlobStoreRepository { final ClusterService clusterService, final RecoverySettings recoverySettings, final AsyncTransferManager asyncUploadUtils, + final AsyncExecutorContainer urgentExecutorBuilder, final AsyncExecutorContainer priorityExecutorBuilder, final AsyncExecutorContainer normalExecutorBuilder, final S3AsyncService s3AsyncService, @@ -290,6 +294,7 @@ class S3Repository extends MeteredBlobStoreRepository { this.multipartUploadEnabled = multipartUploadEnabled; this.pluginConfigPath = pluginConfigPath; this.asyncUploadUtils = asyncUploadUtils; + this.urgentExecutorBuilder = urgentExecutorBuilder; this.priorityExecutorBuilder = priorityExecutorBuilder; this.normalExecutorBuilder = normalExecutorBuilder; @@ -352,6 +357,7 @@ protected S3BlobStore createBlobStore() { bulkDeletesSize, metadata, asyncUploadUtils, + urgentExecutorBuilder, priorityExecutorBuilder, normalExecutorBuilder ); diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java index c6450e49d08e2..9ed232464d080 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java @@ -75,6 +75,9 @@ * A plugin to add a repository type that writes to and from the AWS S3. */ public class S3RepositoryPlugin extends Plugin implements RepositoryPlugin, ReloadablePlugin { + + private static final String URGENT_FUTURE_COMPLETION = "urgent_future_completion"; + private static final String URGENT_STREAM_READER = "urgent_stream_reader"; private static final String PRIORITY_FUTURE_COMPLETION = "priority_future_completion"; private static final String PRIORITY_STREAM_READER = "priority_stream_reader"; private static final String FUTURE_COMPLETION = "future_completion"; @@ -85,6 +88,7 @@ public class S3RepositoryPlugin extends Plugin implements RepositoryPlugin, Relo private final Path configPath; + private AsyncExecutorContainer urgentExecutorBuilder; private AsyncExecutorContainer priorityExecutorBuilder; private AsyncExecutorContainer normalExecutorBuilder; @@ -96,6 +100,10 @@ public S3RepositoryPlugin(final Settings settings, final Path configPath) { public List> getExecutorBuilders(Settings settings) { List> executorBuilders = new ArrayList<>(); int halfProcMaxAt5 = halfAllocatedProcessorsMaxFive(allocatedProcessors(settings)); + executorBuilders.add( + new FixedExecutorBuilder(settings, URGENT_FUTURE_COMPLETION, urgentPoolCount(settings), 10_000, URGENT_FUTURE_COMPLETION) + ); + executorBuilders.add(new ScalingExecutorBuilder(URGENT_STREAM_READER, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5))); executorBuilders.add( new FixedExecutorBuilder(settings, PRIORITY_FUTURE_COMPLETION, priorityPoolCount(settings), 10_000, PRIORITY_FUTURE_COMPLETION) ); @@ -128,6 +136,10 @@ private static int allocatedProcessors(Settings settings) { return OpenSearchExecutors.allocatedProcessors(settings); } + private static int urgentPoolCount(Settings settings) { + return boundedBy((allocatedProcessors(settings) + 7) / 8, 1, 2); + } + private static int priorityPoolCount(Settings settings) { return boundedBy((allocatedProcessors(settings) + 1) / 2, 2, 4); } @@ -150,8 +162,14 @@ public Collection createComponents( final IndexNameExpressionResolver expressionResolver, final Supplier repositoriesServiceSupplier ) { + int urgentEventLoopThreads = urgentPoolCount(clusterService.getSettings()); int priorityEventLoopThreads = priorityPoolCount(clusterService.getSettings()); int normalEventLoopThreads = normalPoolCount(clusterService.getSettings()); + this.urgentExecutorBuilder = new AsyncExecutorContainer( + threadPool.executor(URGENT_FUTURE_COMPLETION), + threadPool.executor(URGENT_STREAM_READER), + new AsyncTransferEventLoopGroup(urgentEventLoopThreads) + ); this.priorityExecutorBuilder = new AsyncExecutorContainer( threadPool.executor(PRIORITY_FUTURE_COMPLETION), threadPool.executor(PRIORITY_STREAM_READER), @@ -176,7 +194,8 @@ protected S3Repository createRepository( AsyncTransferManager asyncUploadUtils = new AsyncTransferManager( S3Repository.PARALLEL_MULTIPART_UPLOAD_MINIMUM_PART_SIZE_SETTING.get(clusterService.getSettings()).getBytes(), normalExecutorBuilder.getStreamReader(), - priorityExecutorBuilder.getStreamReader() + priorityExecutorBuilder.getStreamReader(), + urgentExecutorBuilder.getStreamReader() ); return new S3Repository( metadata, @@ -185,6 +204,7 @@ protected S3Repository createRepository( clusterService, recoverySettings, asyncUploadUtils, + urgentExecutorBuilder, priorityExecutorBuilder, normalExecutorBuilder, s3AsyncService, diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java index 6007d9f9c8a1c..933ee6dc29513 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java @@ -48,6 +48,7 @@ public class AsyncPartsHandler { * @param s3AsyncClient S3 client to use for upload * @param executorService Thread pool for regular upload * @param priorityExecutorService Thread pool for priority uploads + * @param urgentExecutorService Thread pool for urgent uploads * @param uploadRequest request for upload * @param streamContext Stream context used in supplying individual file parts * @param uploadId Upload Id against which multi-part is being performed @@ -60,6 +61,7 @@ public static List> uploadParts( S3AsyncClient s3AsyncClient, ExecutorService executorService, ExecutorService priorityExecutorService, + ExecutorService urgentExecutorService, UploadRequest uploadRequest, StreamContext streamContext, String uploadId, @@ -83,6 +85,7 @@ public static List> uploadParts( s3AsyncClient, executorService, priorityExecutorService, + urgentExecutorService, completedParts, inputStreamContainers, futures, @@ -129,6 +132,7 @@ private static void uploadPart( S3AsyncClient s3AsyncClient, ExecutorService executorService, ExecutorService priorityExecutorService, + ExecutorService urgentExecutorService, AtomicReferenceArray completedParts, AtomicReferenceArray inputStreamContainers, List> futures, @@ -138,9 +142,14 @@ private static void uploadPart( ) { Integer partNumber = uploadPartRequest.partNumber(); - ExecutorService streamReadExecutor = uploadRequest.getWritePriority() == WritePriority.HIGH - ? priorityExecutorService - : executorService; + ExecutorService streamReadExecutor; + if (uploadRequest.getWritePriority() == WritePriority.URGENT) { + streamReadExecutor = urgentExecutorService; + } else if (uploadRequest.getWritePriority() == WritePriority.HIGH) { + streamReadExecutor = priorityExecutorService; + } else { + streamReadExecutor = executorService; + } // Buffered stream is needed to allow mark and reset ops during IO errors so that only buffered // data can be retried instead of retrying whole file by the application. InputStream inputStream = new BufferedInputStream(inputStreamContainer.getInputStream(), (int) (ByteSizeUnit.MB.toBytes(1) + 1)); diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java index a52745e33073e..4f1ab9764702e 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java @@ -61,6 +61,7 @@ public final class AsyncTransferManager { private static final Logger log = LogManager.getLogger(AsyncTransferManager.class); private final ExecutorService executorService; private final ExecutorService priorityExecutorService; + private final ExecutorService urgentExecutorService; private final long minimumPartSize; /** @@ -75,10 +76,16 @@ public final class AsyncTransferManager { * @param executorService The stream reader {@link ExecutorService} for normal priority uploads * @param priorityExecutorService The stream read {@link ExecutorService} for high priority uploads */ - public AsyncTransferManager(long minimumPartSize, ExecutorService executorService, ExecutorService priorityExecutorService) { + public AsyncTransferManager( + long minimumPartSize, + ExecutorService executorService, + ExecutorService priorityExecutorService, + ExecutorService urgentExecutorService + ) { this.executorService = executorService; this.priorityExecutorService = priorityExecutorService; this.minimumPartSize = minimumPartSize; + this.urgentExecutorService = urgentExecutorService; } /** @@ -162,6 +169,7 @@ private void doUploadInParts( s3AsyncClient, executorService, priorityExecutorService, + urgentExecutorService, uploadRequest, streamContext, uploadId, @@ -308,9 +316,14 @@ private void uploadInOneChunk( putObjectRequestBuilder.checksumAlgorithm(ChecksumAlgorithm.CRC32); putObjectRequestBuilder.checksumCRC32(base64StringFromLong(uploadRequest.getExpectedChecksum())); } - ExecutorService streamReadExecutor = uploadRequest.getWritePriority() == WritePriority.HIGH - ? priorityExecutorService - : executorService; + ExecutorService streamReadExecutor; + if (uploadRequest.getWritePriority() == WritePriority.URGENT) { + streamReadExecutor = urgentExecutorService; + } else if (uploadRequest.getWritePriority() == WritePriority.HIGH) { + streamReadExecutor = priorityExecutorService; + } else { + streamReadExecutor = executorService; + } // Buffered stream is needed to allow mark and reset ops during IO errors so that only buffered // data can be retried instead of retrying whole file by the application. InputStream inputStream = new BufferedInputStream(inputStreamContainer.getInputStream(), (int) (ByteSizeUnit.MB.toBytes(1) + 1)); diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/RepositoryCredentialsTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/RepositoryCredentialsTests.java index a4bfe11383b4f..8e1926d40302f 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/RepositoryCredentialsTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/RepositoryCredentialsTests.java @@ -302,7 +302,7 @@ protected S3Repository createRepository( ClusterService clusterService, RecoverySettings recoverySettings ) { - return new S3Repository(metadata, registry, service, clusterService, recoverySettings, null, null, null, null, false) { + return new S3Repository(metadata, registry, service, clusterService, recoverySettings, null, null, null, null, null, false) { @Override protected void assertSnapshotOrGenericThread() { // eliminate thread name check as we create repo manually on test/main threads diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3AsyncServiceTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3AsyncServiceTests.java index e9fe557ab751a..de9ad46bb222d 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3AsyncServiceTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3AsyncServiceTests.java @@ -44,12 +44,12 @@ public void testCachedClientsAreReleased() { final S3ClientSettings otherClientSettings = s3AsyncService.settings(metadata2); assertSame(clientSettings, otherClientSettings); final AmazonAsyncS3Reference reference = SocketAccess.doPrivileged( - () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer) + () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer, asyncExecutorContainer) ); reference.close(); s3AsyncService.close(); final AmazonAsyncS3Reference referenceReloaded = SocketAccess.doPrivileged( - () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer) + () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer, asyncExecutorContainer) ); assertNotSame(referenceReloaded, reference); referenceReloaded.close(); @@ -79,12 +79,12 @@ public void testCachedClientsWithCredentialsAreReleased() { final S3ClientSettings otherClientSettings = s3AsyncService.settings(metadata2); assertSame(clientSettings, otherClientSettings); final AmazonAsyncS3Reference reference = SocketAccess.doPrivileged( - () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer) + () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer, asyncExecutorContainer) ); reference.close(); s3AsyncService.close(); final AmazonAsyncS3Reference referenceReloaded = SocketAccess.doPrivileged( - () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer) + () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer, asyncExecutorContainer) ); assertNotSame(referenceReloaded, reference); referenceReloaded.close(); diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerMockClientTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerMockClientTests.java index 6eb8faa746d34..7c67519f2f3b0 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerMockClientTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerMockClientTests.java @@ -266,10 +266,11 @@ public void verifySingleChunkUploadCallCount(boolean finalizeUploadFailure) { @Override public AmazonAsyncS3Reference client( RepositoryMetadata repositoryMetadata, + AsyncExecutorContainer urgentExecutorBuilder, AsyncExecutorContainer priorityExecutorBuilder, AsyncExecutorContainer normalExecutorBuilder ) { - return new AmazonAsyncS3Reference(AmazonAsyncS3WithCredentials.create(asyncClient, asyncClient, null)); + return new AmazonAsyncS3Reference(AmazonAsyncS3WithCredentials.create(asyncClient, asyncClient, asyncClient, null)); } } @@ -393,9 +394,11 @@ private S3BlobStore createBlobStore() { new AsyncTransferManager( S3Repository.PARALLEL_MULTIPART_UPLOAD_MINIMUM_PART_SIZE_SETTING.getDefault(Settings.EMPTY).getBytes(), asyncExecutorContainer.getStreamReader(), + asyncExecutorContainer.getStreamReader(), asyncExecutorContainer.getStreamReader() ), asyncExecutorContainer, + asyncExecutorContainer, asyncExecutorContainer ); } diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerRetriesTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerRetriesTests.java index a2214f5218991..ceab06bd051e9 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerRetriesTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerRetriesTests.java @@ -221,9 +221,11 @@ protected AsyncMultiStreamBlobContainer createBlobContainer( new AsyncTransferManager( S3Repository.PARALLEL_MULTIPART_UPLOAD_MINIMUM_PART_SIZE_SETTING.getDefault(Settings.EMPTY).getBytes(), asyncExecutorContainer.getStreamReader(), + asyncExecutorContainer.getStreamReader(), asyncExecutorContainer.getStreamReader() ), asyncExecutorContainer, + asyncExecutorContainer, asyncExecutorContainer ) ) { diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobStoreContainerTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobStoreContainerTests.java index 2701cae6a733b..58ad290a31e85 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobStoreContainerTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobStoreContainerTests.java @@ -935,7 +935,7 @@ public void testReadBlobAsyncMultiPart() throws Exception { final S3AsyncClient s3AsyncClient = mock(S3AsyncClient.class); final AmazonAsyncS3Reference amazonAsyncS3Reference = new AmazonAsyncS3Reference( - AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, null) + AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, s3AsyncClient, null) ); final S3BlobStore blobStore = mock(S3BlobStore.class); @@ -993,7 +993,7 @@ public void testReadBlobAsyncSinglePart() throws Exception { final S3AsyncClient s3AsyncClient = mock(S3AsyncClient.class); final AmazonAsyncS3Reference amazonAsyncS3Reference = new AmazonAsyncS3Reference( - AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, null) + AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, s3AsyncClient, null) ); final S3BlobStore blobStore = mock(S3BlobStore.class); final BlobPath blobPath = new BlobPath(); @@ -1048,7 +1048,7 @@ public void testReadBlobAsyncFailure() throws Exception { final S3AsyncClient s3AsyncClient = mock(S3AsyncClient.class); final AmazonAsyncS3Reference amazonAsyncS3Reference = new AmazonAsyncS3Reference( - AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, null) + AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, s3AsyncClient, null) ); final S3BlobStore blobStore = mock(S3BlobStore.class); @@ -1091,7 +1091,7 @@ public void testReadBlobAsyncOnCompleteFailureMissingData() throws Exception { final S3AsyncClient s3AsyncClient = mock(S3AsyncClient.class); final AmazonAsyncS3Reference amazonAsyncS3Reference = new AmazonAsyncS3Reference( - AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, null) + AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, s3AsyncClient, null) ); final S3BlobStore blobStore = mock(S3BlobStore.class); diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RepositoryTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RepositoryTests.java index e65ca69a5047b..6fec535ae6301 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RepositoryTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RepositoryTests.java @@ -168,6 +168,7 @@ private S3Repository createS3Repo(RepositoryMetadata metadata) { null, null, null, + null, false ) { @Override diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java index 97a746cdeed93..2437547a80a6f 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java @@ -64,6 +64,7 @@ public void setUp() throws Exception { asyncTransferManager = new AsyncTransferManager( ByteSizeUnit.MB.toBytes(5), Executors.newSingleThreadExecutor(), + Executors.newSingleThreadExecutor(), Executors.newSingleThreadExecutor() ); super.setUp(); diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteCloneIndexIT.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteCloneIndexIT.java new file mode 100644 index 0000000000000..a081110e6c5a1 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteCloneIndexIT.java @@ -0,0 +1,133 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.indices.create; + +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +import org.opensearch.Version; +import org.opensearch.action.admin.indices.settings.get.GetSettingsResponse; +import org.opensearch.action.admin.indices.shrink.ResizeType; +import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; +import org.opensearch.cluster.routing.allocation.decider.EnableAllocationDecider; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.index.query.TermsQueryBuilder; +import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; +import org.opensearch.test.VersionUtils; + +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; +import static org.hamcrest.Matchers.equalTo; + +public class RemoteCloneIndexIT extends RemoteStoreBaseIntegTestCase { + + @Override + protected boolean forbidPrivateIndexSettings() { + return false; + } + + public void testCreateCloneIndex() { + Version version = VersionUtils.randomIndexCompatibleVersion(random()); + int numPrimaryShards = randomIntBetween(1, 5); + prepareCreate("source").setSettings( + Settings.builder().put(indexSettings()).put("number_of_shards", numPrimaryShards).put("index.version.created", version) + ).get(); + final int docs = randomIntBetween(0, 128); + for (int i = 0; i < docs; i++) { + client().prepareIndex("source").setSource("{\"foo\" : \"bar\", \"i\" : " + i + "}", MediaTypeRegistry.JSON).get(); + } + internalCluster().ensureAtLeastNumDataNodes(2); + // ensure all shards are allocated otherwise the ensure green below might not succeed since we require the merge node + // if we change the setting too quickly we will end up with one replica unassigned which can't be assigned anymore due + // to the require._name below. + ensureGreen(); + // relocate all shards to one node such that we can merge it. + client().admin().indices().prepareUpdateSettings("source").setSettings(Settings.builder().put("index.blocks.write", true)).get(); + ensureGreen(); + + final IndicesStatsResponse sourceStats = client().admin().indices().prepareStats("source").setSegments(true).get(); + + // disable rebalancing to be able to capture the right stats. balancing can move the target primary + // making it hard to pin point the source shards. + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), "none")) + .get(); + try { + assertAcked( + client().admin() + .indices() + .prepareResizeIndex("source", "target") + .setResizeType(ResizeType.CLONE) + .setSettings(Settings.builder().put("index.number_of_replicas", 0).putNull("index.blocks.write").build()) + .get() + ); + ensureGreen(); + + final IndicesStatsResponse targetStats = client().admin().indices().prepareStats("target").get(); + assertThat(targetStats.getIndex("target").getIndexShards().keySet().size(), equalTo(numPrimaryShards)); + + final int size = docs > 0 ? 2 * docs : 1; + assertHitCount(client().prepareSearch("target").setSize(size).setQuery(new TermsQueryBuilder("foo", "bar")).get(), docs); + + for (int i = docs; i < 2 * docs; i++) { + client().prepareIndex("target").setSource("{\"foo\" : \"bar\", \"i\" : " + i + "}", MediaTypeRegistry.JSON).get(); + } + flushAndRefresh(); + assertHitCount( + client().prepareSearch("target").setSize(2 * size).setQuery(new TermsQueryBuilder("foo", "bar")).get(), + 2 * docs + ); + assertHitCount(client().prepareSearch("source").setSize(size).setQuery(new TermsQueryBuilder("foo", "bar")).get(), docs); + GetSettingsResponse target = client().admin().indices().prepareGetSettings("target").get(); + assertEquals(version, target.getIndexToSettings().get("target").getAsVersion("index.version.created", null)); + } finally { + // clean up + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings( + Settings.builder().put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), (String) null) + ) + .get(); + } + + } + +} diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteShrinkIndexIT.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteShrinkIndexIT.java new file mode 100644 index 0000000000000..282eb9c6ad95e --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteShrinkIndexIT.java @@ -0,0 +1,545 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.admin.indices.create; + +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.SortedSetSelector; +import org.apache.lucene.search.SortedSetSortField; +import org.apache.lucene.util.Constants; +import org.opensearch.Version; +import org.opensearch.action.admin.cluster.reroute.ClusterRerouteResponse; +import org.opensearch.action.admin.cluster.state.ClusterStateRequest; +import org.opensearch.action.admin.cluster.state.ClusterStateResponse; +import org.opensearch.action.admin.indices.settings.get.GetSettingsResponse; +import org.opensearch.action.admin.indices.stats.CommonStats; +import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; +import org.opensearch.action.admin.indices.stats.ShardStats; +import org.opensearch.action.index.IndexRequest; +import org.opensearch.action.support.ActiveShardCount; +import org.opensearch.client.Client; +import org.opensearch.cluster.ClusterInfoService; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.InternalClusterInfoService; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.Murmur3HashFunction; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.UnassignedInfo; +import org.opensearch.cluster.routing.allocation.decider.EnableAllocationDecider; +import org.opensearch.common.Priority; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.index.Index; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.index.IndexModule; +import org.opensearch.index.IndexService; +import org.opensearch.index.engine.SegmentsStats; +import org.opensearch.index.query.TermsQueryBuilder; +import org.opensearch.index.seqno.SeqNoStats; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.indices.IndicesService; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; +import org.opensearch.test.VersionUtils; + +import java.util.Arrays; +import java.util.Map; +import java.util.stream.IntStream; + +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; + +public class RemoteShrinkIndexIT extends RemoteStoreBaseIntegTestCase { + @Override + protected boolean forbidPrivateIndexSettings() { + return false; + } + + public Settings indexSettings() { + return Settings.builder() + .put(super.indexSettings()) + .put(IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING.getKey(), false) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .build(); + } + + public void testCreateShrinkIndexToN() { + + assumeFalse("https://github.com/elastic/elasticsearch/issues/34080", Constants.WINDOWS); + + int[][] possibleShardSplits = new int[][] { { 8, 4, 2 }, { 9, 3, 1 }, { 4, 2, 1 }, { 15, 5, 1 } }; + int[] shardSplits = randomFrom(possibleShardSplits); + assertEquals(shardSplits[0], (shardSplits[0] / shardSplits[1]) * shardSplits[1]); + assertEquals(shardSplits[1], (shardSplits[1] / shardSplits[2]) * shardSplits[2]); + internalCluster().ensureAtLeastNumDataNodes(2); + prepareCreate("source").setSettings(Settings.builder().put(indexSettings()).put("number_of_shards", shardSplits[0])).get(); + for (int i = 0; i < 20; i++) { + client().prepareIndex("source") + .setId(Integer.toString(i)) + .setSource("{\"foo\" : \"bar\", \"i\" : " + i + "}", MediaTypeRegistry.JSON) + .get(); + } + final Map dataNodes = client().admin().cluster().prepareState().get().getState().nodes().getDataNodes(); + assertTrue("at least 2 nodes but was: " + dataNodes.size(), dataNodes.size() >= 2); + DiscoveryNode[] discoveryNodes = dataNodes.values().toArray(new DiscoveryNode[0]); + String mergeNode = discoveryNodes[0].getName(); + // ensure all shards are allocated otherwise the ensure green below might not succeed since we require the merge node + // if we change the setting too quickly we will end up with one replica unassigned which can't be assigned anymore due + // to the require._name below. + ensureGreen(); + // relocate all shards to one node such that we can merge it. + client().admin() + .indices() + .prepareUpdateSettings("source") + .setSettings(Settings.builder().put("index.routing.allocation.require._name", mergeNode).put("index.blocks.write", true)) + .get(); + ensureGreen(); + // now merge source into a 4 shard index + assertAcked( + client().admin() + .indices() + .prepareResizeIndex("source", "first_shrink") + .setSettings( + Settings.builder() + .put("index.number_of_replicas", 0) + .put("index.number_of_shards", shardSplits[1]) + .putNull("index.blocks.write") + .build() + ) + .get() + ); + ensureGreen(); + assertHitCount(client().prepareSearch("first_shrink").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), 20); + + for (int i = 0; i < 20; i++) { // now update + client().prepareIndex("first_shrink") + .setId(Integer.toString(i)) + .setSource("{\"foo\" : \"bar\", \"i\" : " + i + "}", MediaTypeRegistry.JSON) + .get(); + } + flushAndRefresh(); + assertHitCount(client().prepareSearch("first_shrink").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), 20); + assertHitCount(client().prepareSearch("source").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), 20); + + // relocate all shards to one node such that we can merge it. + client().admin() + .indices() + .prepareUpdateSettings("first_shrink") + .setSettings(Settings.builder().put("index.routing.allocation.require._name", mergeNode).put("index.blocks.write", true)) + .get(); + ensureGreen(); + // now merge source into a 2 shard index + assertAcked( + client().admin() + .indices() + .prepareResizeIndex("first_shrink", "second_shrink") + .setSettings( + Settings.builder() + .put("index.number_of_replicas", 0) + .put("index.number_of_shards", shardSplits[2]) + .putNull("index.blocks.write") + .putNull("index.routing.allocation.require._name") + .build() + ) + .get() + ); + ensureGreen(); + assertHitCount(client().prepareSearch("second_shrink").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), 20); + // let it be allocated anywhere and bump replicas + client().admin() + .indices() + .prepareUpdateSettings("second_shrink") + .setSettings(Settings.builder().putNull("index.routing.allocation.include._id").put("index.number_of_replicas", 0)) + .get(); + ensureGreen(); + assertHitCount(client().prepareSearch("second_shrink").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), 20); + + for (int i = 0; i < 20; i++) { // now update + client().prepareIndex("second_shrink") + .setId(Integer.toString(i)) + .setSource("{\"foo\" : \"bar\", \"i\" : " + i + "}", MediaTypeRegistry.JSON) + .get(); + } + flushAndRefresh(); + assertHitCount(client().prepareSearch("second_shrink").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), 20); + assertHitCount(client().prepareSearch("first_shrink").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), 20); + assertHitCount(client().prepareSearch("source").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), 20); + } + + public void testShrinkIndexPrimaryTerm() throws Exception { + int numberOfShards = randomIntBetween(2, 20); + int numberOfTargetShards = randomValueOtherThanMany(n -> numberOfShards % n != 0, () -> randomIntBetween(1, numberOfShards - 1)); + internalCluster().ensureAtLeastNumDataNodes(2); + prepareCreate("source").setSettings(Settings.builder().put(indexSettings()).put("number_of_shards", numberOfShards)).get(); + + final Map dataNodes = client().admin().cluster().prepareState().get().getState().nodes().getDataNodes(); + assertThat(dataNodes.size(), greaterThanOrEqualTo(2)); + final DiscoveryNode[] discoveryNodes = dataNodes.values().toArray(new DiscoveryNode[0]); + final String mergeNode = discoveryNodes[0].getName(); + // This needs more than the default timeout if a large number of shards were created. + ensureGreen(TimeValue.timeValueSeconds(120)); + + // fail random primary shards to force primary terms to increase + final Index source = resolveIndex("source"); + final int iterations = scaledRandomIntBetween(0, 16); + for (int i = 0; i < iterations; i++) { + final String node = randomSubsetOf(1, internalCluster().nodesInclude("source")).get(0); + final IndicesService indexServices = internalCluster().getInstance(IndicesService.class, node); + final IndexService indexShards = indexServices.indexServiceSafe(source); + for (final Integer shardId : indexShards.shardIds()) { + final IndexShard shard = indexShards.getShard(shardId); + if (shard.routingEntry().primary() && randomBoolean()) { + disableAllocation("source"); + shard.failShard("test", new Exception("test")); + // this can not succeed until the shard is failed and a replica is promoted + int id = 0; + while (true) { + // find an ID that routes to the right shard, we will only index to the shard that saw a primary failure + final String s = Integer.toString(id); + final int hash = Math.floorMod(Murmur3HashFunction.hash(s), numberOfShards); + if (hash == shardId) { + final IndexRequest request = new IndexRequest("source").id(s) + .source("{ \"f\": \"" + s + "\"}", MediaTypeRegistry.JSON); + client().index(request).get(); + break; + } else { + id++; + } + } + enableAllocation("source"); + ensureGreen(); + } + } + } + + // relocate all shards to one node such that we can merge it. + final Settings.Builder prepareShrinkSettings = Settings.builder() + .put("index.routing.allocation.require._name", mergeNode) + .put("index.blocks.write", true); + client().admin().indices().prepareUpdateSettings("source").setSettings(prepareShrinkSettings).get(); + ensureGreen(TimeValue.timeValueSeconds(120)); // needs more than the default to relocate many shards + + final IndexMetadata indexMetadata = indexMetadata(client(), "source"); + final long beforeShrinkPrimaryTerm = IntStream.range(0, numberOfShards).mapToLong(indexMetadata::primaryTerm).max().getAsLong(); + + // now merge source into target + final Settings shrinkSettings = Settings.builder() + .put("index.number_of_replicas", 0) + .put("index.number_of_shards", numberOfTargetShards) + .build(); + assertAcked(client().admin().indices().prepareResizeIndex("source", "target").setSettings(shrinkSettings).get()); + + ensureGreen(TimeValue.timeValueSeconds(120)); + + final IndexMetadata afterShrinkIndexMetadata = indexMetadata(client(), "target"); + for (int shardId = 0; shardId < numberOfTargetShards; shardId++) { + assertThat(afterShrinkIndexMetadata.primaryTerm(shardId), equalTo(beforeShrinkPrimaryTerm + 1)); + } + } + + private static IndexMetadata indexMetadata(final Client client, final String index) { + final ClusterStateResponse clusterStateResponse = client.admin().cluster().state(new ClusterStateRequest()).actionGet(); + return clusterStateResponse.getState().metadata().index(index); + } + + public void testCreateShrinkIndex() { + internalCluster().ensureAtLeastNumDataNodes(2); + Version version = VersionUtils.randomVersion(random()); + prepareCreate("source").setSettings( + Settings.builder().put(indexSettings()).put("number_of_shards", randomIntBetween(2, 7)).put("index.version.created", version) + ).get(); + final int docs = randomIntBetween(0, 128); + for (int i = 0; i < docs; i++) { + client().prepareIndex("source").setSource("{\"foo\" : \"bar\", \"i\" : " + i + "}", MediaTypeRegistry.JSON).get(); + } + final Map dataNodes = client().admin().cluster().prepareState().get().getState().nodes().getDataNodes(); + assertTrue("at least 2 nodes but was: " + dataNodes.size(), dataNodes.size() >= 2); + DiscoveryNode[] discoveryNodes = dataNodes.values().toArray(new DiscoveryNode[0]); + // ensure all shards are allocated otherwise the ensure green below might not succeed since we require the merge node + // if we change the setting too quickly we will end up with one replica unassigned which can't be assigned anymore due + // to the require._name below. + ensureGreen(); + // relocate all shards to one node such that we can merge it. + client().admin() + .indices() + .prepareUpdateSettings("source") + .setSettings( + Settings.builder() + .put("index.routing.allocation.require._name", discoveryNodes[0].getName()) + .put("index.blocks.write", true) + ) + .get(); + ensureGreen(); + + final IndicesStatsResponse sourceStats = client().admin().indices().prepareStats("source").setSegments(true).get(); + + // disable rebalancing to be able to capture the right stats. balancing can move the target primary + // making it hard to pin point the source shards. + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), "none")) + .get(); + + // now merge source into a single shard index + assertAcked( + client().admin() + .indices() + .prepareResizeIndex("source", "target") + .setSettings( + Settings.builder() + .put("index.number_of_replicas", 0) + .putNull("index.blocks.write") + .putNull("index.routing.allocation.require._name") + .build() + ) + .get() + ); + ensureGreen(); + + // resolve true merge node - this is not always the node we required as all shards may be on another node + final ClusterState state = client().admin().cluster().prepareState().get().getState(); + DiscoveryNode mergeNode = state.nodes().get(state.getRoutingTable().index("target").shard(0).primaryShard().currentNodeId()); + logger.info("merge node {}", mergeNode); + + final long maxSeqNo = Arrays.stream(sourceStats.getShards()) + .filter(shard -> shard.getShardRouting().currentNodeId().equals(mergeNode.getId())) + .map(ShardStats::getSeqNoStats) + .mapToLong(SeqNoStats::getMaxSeqNo) + .max() + .getAsLong(); + final long maxUnsafeAutoIdTimestamp = Arrays.stream(sourceStats.getShards()) + .filter(shard -> shard.getShardRouting().currentNodeId().equals(mergeNode.getId())) + .map(ShardStats::getStats) + .map(CommonStats::getSegments) + .mapToLong(SegmentsStats::getMaxUnsafeAutoIdTimestamp) + .max() + .getAsLong(); + + final IndicesStatsResponse targetStats = client().admin().indices().prepareStats("target").get(); + for (final ShardStats shardStats : targetStats.getShards()) { + final SeqNoStats seqNoStats = shardStats.getSeqNoStats(); + final ShardRouting shardRouting = shardStats.getShardRouting(); + assertThat("failed on " + shardRouting, seqNoStats.getMaxSeqNo(), equalTo(maxSeqNo)); + assertThat("failed on " + shardRouting, seqNoStats.getLocalCheckpoint(), equalTo(maxSeqNo)); + assertThat( + "failed on " + shardRouting, + shardStats.getStats().getSegments().getMaxUnsafeAutoIdTimestamp(), + equalTo(maxUnsafeAutoIdTimestamp) + ); + } + + final int size = docs > 0 ? 2 * docs : 1; + assertHitCount(client().prepareSearch("target").setSize(size).setQuery(new TermsQueryBuilder("foo", "bar")).get(), docs); + + for (int i = docs; i < 2 * docs; i++) { + client().prepareIndex("target").setSource("{\"foo\" : \"bar\", \"i\" : " + i + "}", MediaTypeRegistry.JSON).get(); + } + flushAndRefresh(); + assertHitCount(client().prepareSearch("target").setSize(2 * size).setQuery(new TermsQueryBuilder("foo", "bar")).get(), 2 * docs); + assertHitCount(client().prepareSearch("source").setSize(size).setQuery(new TermsQueryBuilder("foo", "bar")).get(), docs); + GetSettingsResponse target = client().admin().indices().prepareGetSettings("target").get(); + assertEquals(version, target.getIndexToSettings().get("target").getAsVersion("index.version.created", null)); + + // clean up + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings( + Settings.builder().put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), (String) null) + ) + .get(); + } + + /** + * Tests that we can manually recover from a failed allocation due to shards being moved away etc. + */ + public void testCreateShrinkIndexFails() throws Exception { + internalCluster().ensureAtLeastNumDataNodes(2); + prepareCreate("source").setSettings( + Settings.builder().put(indexSettings()).put("number_of_shards", randomIntBetween(2, 7)).put("number_of_replicas", 0) + ).get(); + for (int i = 0; i < 20; i++) { + client().prepareIndex("source").setSource("{\"foo\" : \"bar\", \"i\" : " + i + "}", MediaTypeRegistry.JSON).get(); + } + final Map dataNodes = client().admin().cluster().prepareState().get().getState().nodes().getDataNodes(); + assertTrue("at least 2 nodes but was: " + dataNodes.size(), dataNodes.size() >= 2); + DiscoveryNode[] discoveryNodes = dataNodes.values().toArray(new DiscoveryNode[0]); + String spareNode = discoveryNodes[0].getName(); + String mergeNode = discoveryNodes[1].getName(); + // ensure all shards are allocated otherwise the ensure green below might not succeed since we require the merge node + // if we change the setting too quickly we will end up with one replica unassigned which can't be assigned anymore due + // to the require._name below. + ensureGreen(); + // relocate all shards to one node such that we can merge it. + client().admin() + .indices() + .prepareUpdateSettings("source") + .setSettings(Settings.builder().put("index.routing.allocation.require._name", mergeNode).put("index.blocks.write", true)) + .get(); + ensureGreen(); + + // now merge source into a single shard index + client().admin() + .indices() + .prepareResizeIndex("source", "target") + .setWaitForActiveShards(ActiveShardCount.NONE) + .setSettings( + Settings.builder() + .put("index.routing.allocation.exclude._name", mergeNode) // we manually exclude the merge node to forcefully fuck it up + .put("index.number_of_replicas", 0) + .put("index.allocation.max_retries", 1) + .build() + ) + .get(); + client().admin().cluster().prepareHealth("target").setWaitForEvents(Priority.LANGUID).get(); + + // now we move all shards away from the merge node + client().admin() + .indices() + .prepareUpdateSettings("source") + .setSettings(Settings.builder().put("index.routing.allocation.require._name", spareNode).put("index.blocks.write", true)) + .get(); + ensureGreen("source"); + + client().admin() + .indices() + .prepareUpdateSettings("target") // erase the forcefully fuckup! + .setSettings(Settings.builder().putNull("index.routing.allocation.exclude._name")) + .get(); + // wait until it fails + assertBusy(() -> { + ClusterStateResponse clusterStateResponse = client().admin().cluster().prepareState().get(); + RoutingTable routingTables = clusterStateResponse.getState().routingTable(); + assertTrue(routingTables.index("target").shard(0).getShards().get(0).unassigned()); + assertEquals( + UnassignedInfo.Reason.ALLOCATION_FAILED, + routingTables.index("target").shard(0).getShards().get(0).unassignedInfo().getReason() + ); + assertEquals(1, routingTables.index("target").shard(0).getShards().get(0).unassignedInfo().getNumFailedAllocations()); + }); + client().admin() + .indices() + .prepareUpdateSettings("source") // now relocate them all to the right node + .setSettings(Settings.builder().put("index.routing.allocation.require._name", mergeNode)) + .get(); + ensureGreen("source"); + + final InternalClusterInfoService infoService = (InternalClusterInfoService) internalCluster().getInstance( + ClusterInfoService.class, + internalCluster().getClusterManagerName() + ); + infoService.refresh(); + // kick off a retry and wait until it's done! + ClusterRerouteResponse clusterRerouteResponse = client().admin().cluster().prepareReroute().setRetryFailed(true).get(); + long expectedShardSize = clusterRerouteResponse.getState() + .routingTable() + .index("target") + .shard(0) + .getShards() + .get(0) + .getExpectedShardSize(); + // we support the expected shard size in the allocator to sum up over the source index shards + assertTrue("expected shard size must be set but wasn't: " + expectedShardSize, expectedShardSize > 0); + ensureGreen(); + assertHitCount(client().prepareSearch("target").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), 20); + } + + public void testCreateShrinkWithIndexSort() throws Exception { + SortField expectedSortField = new SortedSetSortField("id", true, SortedSetSelector.Type.MAX); + expectedSortField.setMissingValue(SortedSetSortField.STRING_FIRST); + Sort expectedIndexSort = new Sort(expectedSortField); + internalCluster().ensureAtLeastNumDataNodes(2); + prepareCreate("source").setSettings( + Settings.builder() + .put(indexSettings()) + .put("sort.field", "id") + .put("sort.order", "desc") + .put("number_of_shards", 8) + .put("number_of_replicas", 0) + ).setMapping("id", "type=keyword,doc_values=true").get(); + for (int i = 0; i < 20; i++) { + client().prepareIndex("source") + .setId(Integer.toString(i)) + .setSource("{\"foo\" : \"bar\", \"id\" : " + i + "}", MediaTypeRegistry.JSON) + .get(); + } + final Map dataNodes = client().admin().cluster().prepareState().get().getState().nodes().getDataNodes(); + assertTrue("at least 2 nodes but was: " + dataNodes.size(), dataNodes.size() >= 2); + DiscoveryNode[] discoveryNodes = dataNodes.values().toArray(new DiscoveryNode[0]); + String mergeNode = discoveryNodes[0].getName(); + // ensure all shards are allocated otherwise the ensure green below might not succeed since we require the merge node + // if we change the setting too quickly we will end up with one replica unassigned which can't be assigned anymore due + // to the require._name below. + ensureGreen(); + + flushAndRefresh(); + assertSortedSegments("source", expectedIndexSort); + + // relocate all shards to one node such that we can merge it. + client().admin() + .indices() + .prepareUpdateSettings("source") + .setSettings(Settings.builder().put("index.routing.allocation.require._name", mergeNode).put("index.blocks.write", true)) + .get(); + ensureGreen(); + + // check that index sort cannot be set on the target index + IllegalArgumentException exc = expectThrows( + IllegalArgumentException.class, + () -> client().admin() + .indices() + .prepareResizeIndex("source", "target") + .setSettings( + Settings.builder() + .put("index.number_of_replicas", 0) + .put("index.number_of_shards", "2") + .put("index.sort.field", "foo") + .build() + ) + .get() + ); + assertThat(exc.getMessage(), containsString("can't override index sort when resizing an index")); + + // check that the index sort order of `source` is correctly applied to the `target` + assertAcked( + client().admin() + .indices() + .prepareResizeIndex("source", "target") + .setSettings( + Settings.builder() + .put("index.number_of_replicas", 0) + .put("index.number_of_shards", "2") + .putNull("index.blocks.write") + .build() + ) + .get() + ); + ensureGreen(); + flushAndRefresh(); + GetSettingsResponse settingsResponse = client().admin().indices().prepareGetSettings("target").execute().actionGet(); + assertEquals(settingsResponse.getSetting("target", "index.sort.field"), "id"); + assertEquals(settingsResponse.getSetting("target", "index.sort.order"), "desc"); + assertSortedSegments("target", expectedIndexSort); + + // ... and that the index sort is also applied to updates + for (int i = 20; i < 40; i++) { + client().prepareIndex("target").setSource("{\"foo\" : \"bar\", \"i\" : " + i + "}", MediaTypeRegistry.JSON).get(); + } + flushAndRefresh(); + assertSortedSegments("target", expectedIndexSort); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteSplitIndexIT.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteSplitIndexIT.java new file mode 100644 index 0000000000000..dd4252d24f314 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/create/RemoteSplitIndexIT.java @@ -0,0 +1,506 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.action.admin.indices.create; + +import org.apache.lucene.search.join.ScoreMode; +import org.apache.lucene.util.Constants; +import org.opensearch.Version; +import org.opensearch.action.admin.cluster.state.ClusterStateRequest; +import org.opensearch.action.admin.cluster.state.ClusterStateResponse; +import org.opensearch.action.admin.indices.settings.get.GetSettingsResponse; +import org.opensearch.action.admin.indices.shrink.ResizeType; +import org.opensearch.action.admin.indices.stats.CommonStats; +import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; +import org.opensearch.action.admin.indices.stats.ShardStats; +import org.opensearch.action.get.GetResponse; +import org.opensearch.action.index.IndexRequest; +import org.opensearch.action.index.IndexRequestBuilder; +import org.opensearch.action.search.SearchResponse; +import org.opensearch.client.Client; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.MetadataCreateIndexService; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.Murmur3HashFunction; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.allocation.decider.EnableAllocationDecider; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.index.Index; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.index.IndexModule; +import org.opensearch.index.IndexService; +import org.opensearch.index.engine.SegmentsStats; +import org.opensearch.index.query.TermsQueryBuilder; +import org.opensearch.index.seqno.SeqNoStats; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.indices.IndicesService; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; +import org.opensearch.test.VersionUtils; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import java.util.function.BiFunction; +import java.util.stream.IntStream; + +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; +import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.opensearch.index.query.QueryBuilders.nestedQuery; +import static org.opensearch.index.query.QueryBuilders.termQuery; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertNoFailures; +import static org.hamcrest.Matchers.equalTo; + +public class RemoteSplitIndexIT extends RemoteStoreBaseIntegTestCase { + + @Override + protected boolean forbidPrivateIndexSettings() { + return false; + } + + public Settings indexSettings() { + return Settings.builder() + .put(super.indexSettings()) + .put(IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING.getKey(), false) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .build(); + } + + public void testCreateSplitIndexToN() throws IOException { + int[][] possibleShardSplits = new int[][] { { 2, 4, 8 }, { 3, 6, 12 }, { 1, 2, 4 } }; + int[] shardSplits = randomFrom(possibleShardSplits); + splitToN(shardSplits[0], shardSplits[1], shardSplits[2]); + } + + public void testSplitFromOneToN() { + + assumeFalse("https://github.com/elastic/elasticsearch/issues/34080", Constants.WINDOWS); + + splitToN(1, 5, 10); + client().admin().indices().prepareDelete("*").get(); + int randomSplit = randomIntBetween(2, 6); + splitToN(1, randomSplit, randomSplit * 2); + } + + private void splitToN(int sourceShards, int firstSplitShards, int secondSplitShards) { + + assertEquals(sourceShards, (sourceShards * firstSplitShards) / firstSplitShards); + assertEquals(firstSplitShards, (firstSplitShards * secondSplitShards) / secondSplitShards); + internalCluster().ensureAtLeastNumDataNodes(2); + final boolean useRouting = randomBoolean(); + final boolean useNested = randomBoolean(); + final boolean useMixedRouting = useRouting ? randomBoolean() : false; + CreateIndexRequestBuilder createInitialIndex = prepareCreate("source"); + Settings.Builder settings = Settings.builder().put(indexSettings()).put("number_of_shards", sourceShards); + final boolean useRoutingPartition; + if (randomBoolean()) { + // randomly set the value manually + int routingShards = secondSplitShards * randomIntBetween(1, 10); + settings.put("index.number_of_routing_shards", routingShards); + useRoutingPartition = false; + } else { + useRoutingPartition = randomBoolean(); + } + if (useRouting && useMixedRouting == false && useRoutingPartition) { + int numRoutingShards = MetadataCreateIndexService.calculateNumRoutingShards(secondSplitShards, Version.CURRENT) - 1; + settings.put("index.routing_partition_size", randomIntBetween(1, numRoutingShards)); + if (useNested) { + createInitialIndex.setMapping("_routing", "required=true", "nested1", "type=nested"); + } else { + createInitialIndex.setMapping("_routing", "required=true"); + } + } else if (useNested) { + createInitialIndex.setMapping("nested1", "type=nested"); + } + logger.info("use routing {} use mixed routing {} use nested {}", useRouting, useMixedRouting, useNested); + createInitialIndex.setSettings(settings).get(); + + int numDocs = randomIntBetween(10, 50); + String[] routingValue = new String[numDocs]; + + BiFunction indexFunc = (index, id) -> { + try { + return client().prepareIndex(index) + .setId(Integer.toString(id)) + .setSource( + jsonBuilder().startObject() + .field("foo", "bar") + .field("i", id) + .startArray("nested1") + .startObject() + .field("n_field1", "n_value1_1") + .field("n_field2", "n_value2_1") + .endObject() + .startObject() + .field("n_field1", "n_value1_2") + .field("n_field2", "n_value2_2") + .endObject() + .endArray() + .endObject() + ); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }; + for (int i = 0; i < numDocs; i++) { + IndexRequestBuilder builder = indexFunc.apply("source", i); + if (useRouting) { + String routing = randomRealisticUnicodeOfCodepointLengthBetween(1, 10); + if (useMixedRouting && randomBoolean()) { + routingValue[i] = null; + } else { + routingValue[i] = routing; + } + builder.setRouting(routingValue[i]); + } + builder.get(); + } + + if (randomBoolean()) { + for (int i = 0; i < numDocs; i++) { // let's introduce some updates / deletes on the index + if (randomBoolean()) { + IndexRequestBuilder builder = indexFunc.apply("source", i); + if (useRouting) { + builder.setRouting(routingValue[i]); + } + builder.get(); + } + } + } + + ensureYellow(); + client().admin().indices().prepareUpdateSettings("source").setSettings(Settings.builder().put("index.blocks.write", true)).get(); + ensureGreen(); + Settings.Builder firstSplitSettingsBuilder = Settings.builder() + .put("index.number_of_replicas", 0) + .put("index.number_of_shards", firstSplitShards) + .putNull("index.blocks.write"); + if (sourceShards == 1 && useRoutingPartition == false && randomBoolean()) { // try to set it if we have a source index with 1 shard + firstSplitSettingsBuilder.put("index.number_of_routing_shards", secondSplitShards); + } + assertAcked( + client().admin() + .indices() + .prepareResizeIndex("source", "first_split") + .setResizeType(ResizeType.SPLIT) + .setSettings(firstSplitSettingsBuilder.build()) + .get() + ); + ensureGreen(); + assertHitCount(client().prepareSearch("first_split").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), numDocs); + + for (int i = 0; i < numDocs; i++) { // now update + IndexRequestBuilder builder = indexFunc.apply("first_split", i); + if (useRouting) { + builder.setRouting(routingValue[i]); + } + builder.get(); + } + flushAndRefresh(); + assertHitCount(client().prepareSearch("first_split").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), numDocs); + assertHitCount(client().prepareSearch("source").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), numDocs); + for (int i = 0; i < numDocs; i++) { + GetResponse getResponse = client().prepareGet("first_split", Integer.toString(i)).setRouting(routingValue[i]).get(); + assertTrue(getResponse.isExists()); + } + + client().admin() + .indices() + .prepareUpdateSettings("first_split") + .setSettings(Settings.builder().put("index.blocks.write", true)) + .get(); + ensureGreen(); + // now split source into a new index + assertAcked( + client().admin() + .indices() + .prepareResizeIndex("first_split", "second_split") + .setResizeType(ResizeType.SPLIT) + .setSettings( + Settings.builder() + .put("index.number_of_replicas", 0) + .put("index.number_of_shards", secondSplitShards) + .putNull("index.blocks.write") + .build() + ) + .get() + ); + ensureGreen(); + assertHitCount(client().prepareSearch("second_split").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), numDocs); + // let it be allocated anywhere and bump replicas + client().admin() + .indices() + .prepareUpdateSettings("second_split") + .setSettings(Settings.builder().put("index.number_of_replicas", 0)) + .get(); + ensureGreen(); + assertHitCount(client().prepareSearch("second_split").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), numDocs); + + for (int i = 0; i < numDocs; i++) { // now update + IndexRequestBuilder builder = indexFunc.apply("second_split", i); + if (useRouting) { + builder.setRouting(routingValue[i]); + } + builder.get(); + } + flushAndRefresh(); + for (int i = 0; i < numDocs; i++) { + GetResponse getResponse = client().prepareGet("second_split", Integer.toString(i)).setRouting(routingValue[i]).get(); + assertTrue(getResponse.isExists()); + } + assertHitCount(client().prepareSearch("second_split").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), numDocs); + assertHitCount(client().prepareSearch("first_split").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), numDocs); + assertHitCount(client().prepareSearch("source").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), numDocs); + if (useNested) { + assertNested("source", numDocs); + assertNested("first_split", numDocs); + assertNested("second_split", numDocs); + } + assertAllUniqueDocs( + client().prepareSearch("second_split").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), + numDocs + ); + assertAllUniqueDocs( + client().prepareSearch("first_split").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), + numDocs + ); + assertAllUniqueDocs(client().prepareSearch("source").setSize(100).setQuery(new TermsQueryBuilder("foo", "bar")).get(), numDocs); + } + + public void assertNested(String index, int numDocs) { + // now, do a nested query + SearchResponse searchResponse = client().prepareSearch(index) + .setQuery(nestedQuery("nested1", termQuery("nested1.n_field1", "n_value1_1"), ScoreMode.Avg)) + .get(); + assertNoFailures(searchResponse); + assertThat(searchResponse.getHits().getTotalHits().value, equalTo((long) numDocs)); + } + + public void assertAllUniqueDocs(SearchResponse response, int numDocs) { + Set ids = new HashSet<>(); + for (int i = 0; i < response.getHits().getHits().length; i++) { + String id = response.getHits().getHits()[i].getId(); + assertTrue("found ID " + id + " more than once", ids.add(id)); + } + assertEquals(numDocs, ids.size()); + } + + public void testSplitIndexPrimaryTerm() throws Exception { + int numberOfTargetShards = randomIntBetween(2, 20); + int numberOfShards = randomValueOtherThanMany(n -> numberOfTargetShards % n != 0, () -> between(1, numberOfTargetShards - 1)); + internalCluster().ensureAtLeastNumDataNodes(2); + prepareCreate("source").setSettings( + Settings.builder() + .put(indexSettings()) + .put("number_of_shards", numberOfShards) + .put("index.number_of_routing_shards", numberOfTargetShards) + ).get(); + ensureGreen(TimeValue.timeValueSeconds(120)); // needs more than the default to allocate many shards + + // fail random primary shards to force primary terms to increase + final Index source = resolveIndex("source"); + final int iterations = scaledRandomIntBetween(0, 16); + for (int i = 0; i < iterations; i++) { + final String node = randomSubsetOf(1, internalCluster().nodesInclude("source")).get(0); + final IndicesService indexServices = internalCluster().getInstance(IndicesService.class, node); + final IndexService indexShards = indexServices.indexServiceSafe(source); + for (final Integer shardId : indexShards.shardIds()) { + final IndexShard shard = indexShards.getShard(shardId); + if (shard.routingEntry().primary() && randomBoolean()) { + disableAllocation("source"); + shard.failShard("test", new Exception("test")); + // this can not succeed until the shard is failed and a replica is promoted + int id = 0; + while (true) { + // find an ID that routes to the right shard, we will only index to the shard that saw a primary failure + final String s = Integer.toString(id); + final int hash = Math.floorMod(Murmur3HashFunction.hash(s), numberOfShards); + if (hash == shardId) { + final IndexRequest request = new IndexRequest("source").id(s) + .source("{ \"f\": \"" + s + "\"}", MediaTypeRegistry.JSON); + client().index(request).get(); + break; + } else { + id++; + } + } + enableAllocation("source"); + ensureGreen(); + } + } + } + + final Settings.Builder prepareSplitSettings = Settings.builder().put("index.blocks.write", true); + client().admin().indices().prepareUpdateSettings("source").setSettings(prepareSplitSettings).get(); + ensureYellow(); + + final IndexMetadata indexMetadata = indexMetadata(client(), "source"); + final long beforeSplitPrimaryTerm = IntStream.range(0, numberOfShards).mapToLong(indexMetadata::primaryTerm).max().getAsLong(); + + // now split source into target + final Settings splitSettings = Settings.builder() + .put("index.number_of_replicas", 0) + .put("index.number_of_shards", numberOfTargetShards) + .putNull("index.blocks.write") + .build(); + assertAcked( + client().admin() + .indices() + .prepareResizeIndex("source", "target") + .setResizeType(ResizeType.SPLIT) + .setSettings(splitSettings) + .get() + ); + + ensureGreen(TimeValue.timeValueSeconds(120)); // needs more than the default to relocate many shards + + final IndexMetadata aftersplitIndexMetadata = indexMetadata(client(), "target"); + for (int shardId = 0; shardId < numberOfTargetShards; shardId++) { + assertThat(aftersplitIndexMetadata.primaryTerm(shardId), equalTo(beforeSplitPrimaryTerm + 1)); + } + } + + private static IndexMetadata indexMetadata(final Client client, final String index) { + final ClusterStateResponse clusterStateResponse = client.admin().cluster().state(new ClusterStateRequest()).actionGet(); + return clusterStateResponse.getState().metadata().index(index); + } + + public void testCreateSplitIndex() throws Exception { + internalCluster().ensureAtLeastNumDataNodes(2); + Version version = VersionUtils.randomIndexCompatibleVersion(random()); + prepareCreate("source").setSettings( + Settings.builder().put(indexSettings()).put("number_of_shards", 1).put("index.version.created", version) + ).get(); + final int docs = randomIntBetween(0, 128); + for (int i = 0; i < docs; i++) { + client().prepareIndex("source").setSource("{\"foo\" : \"bar\", \"i\" : " + i + "}", MediaTypeRegistry.JSON).get(); + } + // ensure all shards are allocated otherwise the ensure green below might not succeed since we require the merge node + // if we change the setting too quickly we will end up with one replica unassigned which can't be assigned anymore due + // to the require._name below. + ensureGreen(); + // relocate all shards to one node such that we can merge it. + client().admin().indices().prepareUpdateSettings("source").setSettings(Settings.builder().put("index.blocks.write", true)).get(); + ensureGreen(); + + final IndicesStatsResponse sourceStats = client().admin().indices().prepareStats("source").setSegments(true).get(); + + // disable rebalancing to be able to capture the right stats. balancing can move the target primary + // making it hard to pin point the source shards. + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), "none")) + .get(); + try { + assertAcked( + client().admin() + .indices() + .prepareResizeIndex("source", "target") + .setResizeType(ResizeType.SPLIT) + .setSettings( + Settings.builder() + .put("index.number_of_replicas", 0) + .put("index.number_of_shards", 2) + .putNull("index.blocks.write") + .build() + ) + .get() + ); + ensureGreen(); + + final ClusterState state = client().admin().cluster().prepareState().get().getState(); + DiscoveryNode mergeNode = state.nodes().get(state.getRoutingTable().index("target").shard(0).primaryShard().currentNodeId()); + logger.info("split node {}", mergeNode); + + final long maxSeqNo = Arrays.stream(sourceStats.getShards()) + .filter(shard -> shard.getShardRouting().currentNodeId().equals(mergeNode.getId())) + .map(ShardStats::getSeqNoStats) + .mapToLong(SeqNoStats::getMaxSeqNo) + .max() + .getAsLong(); + final long maxUnsafeAutoIdTimestamp = Arrays.stream(sourceStats.getShards()) + .filter(shard -> shard.getShardRouting().currentNodeId().equals(mergeNode.getId())) + .map(ShardStats::getStats) + .map(CommonStats::getSegments) + .mapToLong(SegmentsStats::getMaxUnsafeAutoIdTimestamp) + .max() + .getAsLong(); + + final IndicesStatsResponse targetStats = client().admin().indices().prepareStats("target").get(); + for (final ShardStats shardStats : targetStats.getShards()) { + final SeqNoStats seqNoStats = shardStats.getSeqNoStats(); + final ShardRouting shardRouting = shardStats.getShardRouting(); + assertThat("failed on " + shardRouting, seqNoStats.getMaxSeqNo(), equalTo(maxSeqNo)); + assertThat("failed on " + shardRouting, seqNoStats.getLocalCheckpoint(), equalTo(maxSeqNo)); + assertThat( + "failed on " + shardRouting, + shardStats.getStats().getSegments().getMaxUnsafeAutoIdTimestamp(), + equalTo(maxUnsafeAutoIdTimestamp) + ); + } + + final int size = docs > 0 ? 2 * docs : 1; + assertHitCount(client().prepareSearch("target").setSize(size).setQuery(new TermsQueryBuilder("foo", "bar")).get(), docs); + + for (int i = docs; i < 2 * docs; i++) { + client().prepareIndex("target").setSource("{\"foo\" : \"bar\", \"i\" : " + i + "}", MediaTypeRegistry.JSON).get(); + } + flushAndRefresh(); + assertHitCount( + client().prepareSearch("target").setSize(2 * size).setQuery(new TermsQueryBuilder("foo", "bar")).get(), + 2 * docs + ); + assertHitCount(client().prepareSearch("source").setSize(size).setQuery(new TermsQueryBuilder("foo", "bar")).get(), docs); + GetSettingsResponse target = client().admin().indices().prepareGetSettings("target").get(); + assertEquals(version, target.getIndexToSettings().get("target").getAsVersion("index.version.created", null)); + } finally { + // clean up + client().admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings( + Settings.builder().put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), (String) null) + ) + .get(); + } + + } + +} diff --git a/server/src/internalClusterTest/java/org/opensearch/discovery/ClusterManagerDisruptionIT.java b/server/src/internalClusterTest/java/org/opensearch/discovery/ClusterManagerDisruptionIT.java index 1463c45aa9b2f..79f6ba6dfa642 100644 --- a/server/src/internalClusterTest/java/org/opensearch/discovery/ClusterManagerDisruptionIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/discovery/ClusterManagerDisruptionIT.java @@ -39,6 +39,7 @@ import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.coordination.NoClusterManagerBlockService; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.service.ClusterStateStats; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.xcontent.MediaTypeRegistry; @@ -199,6 +200,8 @@ public void testIsolateClusterManagerAndVerifyClusterStateConsensus() throws Exc } } + ClusterStateStats clusterStateStats = internalCluster().clusterService().getClusterManagerService().getClusterStateStats(); + assertTrue(clusterStateStats.getUpdateFailed() > 0); }); } diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java index 7304304e522f8..dfde1b958882c 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java @@ -8,9 +8,12 @@ package org.opensearch.gateway.remote; +import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest; +import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.settings.Settings; +import org.opensearch.discovery.DiscoveryStats; import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.blobstore.BlobStoreRepository; @@ -19,6 +22,7 @@ import java.nio.charset.StandardCharsets; import java.util.Base64; import java.util.Map; +import java.util.stream.Collectors; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; @@ -86,14 +90,105 @@ public void testFullClusterRestoreStaleDelete() throws Exception { assertEquals(10, repository.blobStore().blobContainer(baseMetadataPath.add("manifest")).listBlobsByPrefix("manifest").size()); - Map indexMetadataMap = remoteClusterStateService.getLatestMetadata( + Map indexMetadataMap = remoteClusterStateService.getLatestClusterState( cluster().getClusterName(), getClusterState().metadata().clusterUUID() - ).getIndices(); + ).getMetadata().getIndices(); assertEquals(0, indexMetadataMap.values().stream().findFirst().get().getNumberOfReplicas()); assertEquals(shardCount, indexMetadataMap.values().stream().findFirst().get().getNumberOfShards()); } + public void testRemoteStateStats() { + int shardCount = randomIntBetween(1, 2); + int replicaCount = 1; + int dataNodeCount = shardCount * (replicaCount + 1); + int clusterManagerNodeCount = 1; + prepareCluster(clusterManagerNodeCount, dataNodeCount, INDEX_NAME, replicaCount, shardCount); + String clusterManagerNode = internalCluster().getClusterManagerName(); + String dataNode = internalCluster().getDataNodeNames().stream().collect(Collectors.toList()).get(0); + + // Fetch _nodes/stats + NodesStatsResponse nodesStatsResponse = client().admin() + .cluster() + .prepareNodesStats(clusterManagerNode) + .addMetric(NodesStatsRequest.Metric.DISCOVERY.metricName()) + .get(); + + // assert cluster state stats + assertClusterManagerClusterStateStats(nodesStatsResponse); + + NodesStatsResponse nodesStatsResponseDataNode = client().admin() + .cluster() + .prepareNodesStats(dataNode) + .addMetric(NodesStatsRequest.Metric.DISCOVERY.metricName()) + .get(); + // assert cluster state stats for data node + DiscoveryStats dataNodeDiscoveryStats = nodesStatsResponseDataNode.getNodes().get(0).getDiscoveryStats(); + assertNotNull(dataNodeDiscoveryStats.getClusterStateStats()); + assertEquals(0, dataNodeDiscoveryStats.getClusterStateStats().getUpdateSuccess()); + + // call nodes/stats with nodeId filter + NodesStatsResponse nodesStatsNodeIdFilterResponse = client().admin() + .cluster() + .prepareNodesStats(dataNode) + .addMetric(NodesStatsRequest.Metric.DISCOVERY.metricName()) + .setNodesIds(clusterManagerNode) + .get(); + + assertClusterManagerClusterStateStats(nodesStatsNodeIdFilterResponse); + } + + private void assertClusterManagerClusterStateStats(NodesStatsResponse nodesStatsResponse) { + // assert cluster state stats + DiscoveryStats discoveryStats = nodesStatsResponse.getNodes().get(0).getDiscoveryStats(); + + assertNotNull(discoveryStats.getClusterStateStats()); + assertTrue(discoveryStats.getClusterStateStats().getUpdateSuccess() > 1); + assertEquals(0, discoveryStats.getClusterStateStats().getUpdateFailed()); + assertTrue(discoveryStats.getClusterStateStats().getUpdateTotalTimeInMillis() > 0); + // assert remote state stats + assertTrue(discoveryStats.getClusterStateStats().getPersistenceStats().get(0).getSuccessCount() > 1); + assertEquals(0, discoveryStats.getClusterStateStats().getPersistenceStats().get(0).getFailedCount()); + assertTrue(discoveryStats.getClusterStateStats().getPersistenceStats().get(0).getTotalTimeInMillis() > 0); + } + + public void testRemoteStateStatsFromAllNodes() { + int shardCount = randomIntBetween(1, 5); + int replicaCount = 1; + int dataNodeCount = shardCount * (replicaCount + 1); + int clusterManagerNodeCount = 3; + prepareCluster(clusterManagerNodeCount, dataNodeCount, INDEX_NAME, replicaCount, shardCount); + String[] allNodes = internalCluster().getNodeNames(); + // call _nodes/stats/discovery from all the nodes + for (String node : allNodes) { + NodesStatsResponse nodesStatsResponse = client().admin() + .cluster() + .prepareNodesStats(node) + .addMetric(NodesStatsRequest.Metric.DISCOVERY.metricName()) + .get(); + validateNodesStatsResponse(nodesStatsResponse); + } + + // call _nodes/stats/discovery from all the nodes with random nodeId filter + for (String node : allNodes) { + NodesStatsResponse nodesStatsResponse = client().admin() + .cluster() + .prepareNodesStats(node) + .addMetric(NodesStatsRequest.Metric.DISCOVERY.metricName()) + .setNodesIds(allNodes[randomIntBetween(0, allNodes.length - 1)]) + .get(); + validateNodesStatsResponse(nodesStatsResponse); + } + } + + private void validateNodesStatsResponse(NodesStatsResponse nodesStatsResponse) { + // _nodes/stats/discovery must never fail due to any exception + assertFalse(nodesStatsResponse.toString().contains("exception")); + assertNotNull(nodesStatsResponse.getNodes()); + assertNotNull(nodesStatsResponse.getNodes().get(0)); + assertNotNull(nodesStatsResponse.getNodes().get(0).getDiscoveryStats()); + } + private void setReplicaCount(int replicaCount) { client().admin() .indices() diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java index 98a22717019cf..848f6eddbb0df 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/IndicesRequestCacheIT.java @@ -539,7 +539,7 @@ public void testCanCache() throws Exception { assertCacheState(client, "index", 0, 4); } - public void testCacheWithFilteredAlias() { + public void testCacheWithFilteredAlias() throws InterruptedException { Client client = client(); Settings settings = Settings.builder() .put(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING.getKey(), true) @@ -562,6 +562,8 @@ public void testCacheWithFilteredAlias() { OpenSearchAssertions.assertAllSuccessful(forceMergeResponse); refresh(); + indexRandomForConcurrentSearch("index"); + assertCacheState(client, "index", 0, 0); SearchResponse r1 = client.prepareSearch("index") diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationClusterSettingIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationClusterSettingIT.java index a82fd8d845709..186a5ce39f131 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationClusterSettingIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationClusterSettingIT.java @@ -19,6 +19,7 @@ import org.opensearch.test.OpenSearchIntegTestCase; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; +import static org.opensearch.indices.IndicesService.CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING; import static org.opensearch.indices.IndicesService.CLUSTER_SETTING_REPLICATION_TYPE; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) @@ -123,4 +124,30 @@ public void testIndexReplicationSettingOverridesDocRepClusterSetting() throws Ex assertEquals(indicesService.indexService(anotherIndex).getIndexSettings().isSegRepEnabled(), false); } + public void testIndexReplicationTypeWhenRestrictSettingTrue() { + testRestrictIndexReplicationTypeSetting(true, randomFrom(ReplicationType.values())); + } + + public void testIndexReplicationTypeWhenRestrictSettingFalse() { + testRestrictIndexReplicationTypeSetting(false, randomFrom(ReplicationType.values())); + } + + private void testRestrictIndexReplicationTypeSetting(boolean setRestrict, ReplicationType replicationType) { + String expectedExceptionMsg = + "Validation Failed: 1: index setting [index.replication.type] is not allowed to be set as [cluster.restrict.index.replication_type=true];"; + String clusterManagerName = internalCluster().startNode( + Settings.builder().put(CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING.getKey(), setRestrict).build() + ); + internalCluster().startDataOnlyNodes(1); + + // Test create index fails + Settings indexSettings = Settings.builder().put(indexSettings()).put(SETTING_REPLICATION_TYPE, replicationType).build(); + if (setRestrict) { + IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> createIndex(INDEX_NAME, indexSettings)); + assertEquals(expectedExceptionMsg, exception.getMessage()); + } else { + createIndex(INDEX_NAME, indexSettings); + } + } + } diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationDisruptionIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationDisruptionIT.java new file mode 100644 index 0000000000000..66b26b5d25cfe --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationDisruptionIT.java @@ -0,0 +1,167 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.replication; + +import org.apache.lucene.tests.util.LuceneTestCase; +import org.opensearch.action.admin.indices.recovery.RecoveryResponse; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.common.bytes.BytesArray; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.indices.recovery.FileChunkRequest; +import org.opensearch.indices.recovery.RecoveryState; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.transport.MockTransportService; +import org.opensearch.transport.TransportRequest; +import org.opensearch.transport.TransportService; +import org.junit.Before; + +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + +import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; + +/** + * These tests simulate corruption cases during replication. They are skipped on WindowsFS simulation where file renaming + * can fail with an access denied IOException because deletion is not permitted. + */ +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +@LuceneTestCase.SuppressFileSystems("WindowsFS") +public class SegmentReplicationDisruptionIT extends SegmentReplicationBaseIT { + @Before + private void setup() { + internalCluster().startClusterManagerOnlyNode(); + } + + public void testSendCorruptBytesToReplica() throws Exception { + final String primaryNode = internalCluster().startDataOnlyNode(); + createIndex( + INDEX_NAME, + Settings.builder() + .put(indexSettings()) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put("index.refresh_interval", -1) + .build() + ); + ensureYellow(INDEX_NAME); + final String replicaNode = internalCluster().startDataOnlyNode(); + ensureGreen(INDEX_NAME); + + MockTransportService primaryTransportService = ((MockTransportService) internalCluster().getInstance( + TransportService.class, + primaryNode + )); + CountDownLatch latch = new CountDownLatch(1); + AtomicBoolean failed = new AtomicBoolean(false); + primaryTransportService.addSendBehavior( + internalCluster().getInstance(TransportService.class, replicaNode), + (connection, requestId, action, request, options) -> { + if (action.equals(SegmentReplicationTargetService.Actions.FILE_CHUNK) && failed.getAndSet(true) == false) { + FileChunkRequest req = (FileChunkRequest) request; + TransportRequest corrupt = new FileChunkRequest( + req.recoveryId(), + ((FileChunkRequest) request).requestSeqNo(), + ((FileChunkRequest) request).shardId(), + ((FileChunkRequest) request).metadata(), + ((FileChunkRequest) request).position(), + new BytesArray("test"), + false, + 0, + 0L + ); + connection.sendRequest(requestId, action, corrupt, options); + latch.countDown(); + } else { + connection.sendRequest(requestId, action, request, options); + } + } + ); + for (int i = 0; i < 100; i++) { + client().prepareIndex(INDEX_NAME) + .setId(String.valueOf(i)) + .setSource(jsonBuilder().startObject().field("field", i).endObject()) + .get(); + } + final long originalRecoveryTime = getRecoveryStopTime(replicaNode); + assertNotEquals(originalRecoveryTime, 0); + refresh(INDEX_NAME); + latch.await(); + assertTrue(failed.get()); + waitForNewPeerRecovery(replicaNode, originalRecoveryTime); + // reset checkIndex to ensure our original shard doesn't throw + resetCheckIndexStatus(); + waitForSearchableDocs(100, primaryNode, replicaNode); + } + + public void testWipeSegmentBetweenSyncs() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + final String primaryNode = internalCluster().startDataOnlyNode(); + createIndex( + INDEX_NAME, + Settings.builder() + .put(indexSettings()) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put("index.refresh_interval", -1) + .build() + ); + ensureYellow(INDEX_NAME); + final String replicaNode = internalCluster().startDataOnlyNode(); + ensureGreen(INDEX_NAME); + + for (int i = 0; i < 10; i++) { + client().prepareIndex(INDEX_NAME) + .setId(String.valueOf(i)) + .setSource(jsonBuilder().startObject().field("field", i).endObject()) + .get(); + } + refresh(INDEX_NAME); + ensureGreen(INDEX_NAME); + final long originalRecoveryTime = getRecoveryStopTime(replicaNode); + + final IndexShard indexShard = getIndexShard(replicaNode, INDEX_NAME); + waitForSearchableDocs(INDEX_NAME, 10, List.of(replicaNode)); + indexShard.store().directory().deleteFile("_0.si"); + + for (int i = 11; i < 21; i++) { + client().prepareIndex(INDEX_NAME) + .setId(String.valueOf(i)) + .setSource(jsonBuilder().startObject().field("field", i).endObject()) + .get(); + } + refresh(INDEX_NAME); + waitForNewPeerRecovery(replicaNode, originalRecoveryTime); + resetCheckIndexStatus(); + waitForSearchableDocs(20, primaryNode, replicaNode); + } + + private void waitForNewPeerRecovery(String replicaNode, long originalRecoveryTime) throws Exception { + assertBusy(() -> { + // assert we have a peer recovery after the original + final long time = getRecoveryStopTime(replicaNode); + assertNotEquals(time, 0); + assertNotEquals(originalRecoveryTime, time); + + }, 1, TimeUnit.MINUTES); + } + + private long getRecoveryStopTime(String nodeName) { + final RecoveryResponse recoveryResponse = client().admin().indices().prepareRecoveries(INDEX_NAME).get(); + final List recoveryStates = recoveryResponse.shardRecoveryStates().get(INDEX_NAME); + for (RecoveryState recoveryState : recoveryStates) { + if (recoveryState.getTargetNode().getName().equals(nodeName)) { + return recoveryState.getTimer().stopTime(); + } + } + return 0L; + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java index 81556cc270151..f48df082a25dc 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java @@ -24,7 +24,6 @@ import org.apache.lucene.util.BytesRef; import org.opensearch.action.admin.indices.alias.Alias; import org.opensearch.action.admin.indices.flush.FlushRequest; -import org.opensearch.action.admin.indices.recovery.RecoveryResponse; import org.opensearch.action.admin.indices.stats.IndicesStatsRequest; import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; import org.opensearch.action.get.GetResponse; @@ -59,7 +58,6 @@ import org.opensearch.common.lucene.index.OpenSearchDirectoryReader; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; -import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.common.io.stream.NamedWriteableRegistry; import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.xcontent.XContentBuilder; @@ -73,7 +71,6 @@ import org.opensearch.index.engine.NRTReplicationReaderManager; import org.opensearch.index.shard.IndexShard; import org.opensearch.indices.recovery.FileChunkRequest; -import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.node.NodeClosedException; @@ -85,7 +82,6 @@ import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.transport.MockTransportService; -import org.opensearch.transport.TransportRequest; import org.opensearch.transport.TransportService; import org.junit.Before; @@ -98,7 +94,6 @@ import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; import static java.util.Arrays.asList; @@ -1781,135 +1776,4 @@ public void testRealtimeTermVectorRequestsUnSuccessful() throws IOException { assertThat(response.getIndex(), equalTo(INDEX_NAME)); } - - public void testSendCorruptBytesToReplica() throws Exception { - // this test stubs transport calls specific to node-node replication. - assumeFalse( - "Skipping the test as its not compatible with segment replication with remote store.", - segmentReplicationWithRemoteEnabled() - ); - final String primaryNode = internalCluster().startDataOnlyNode(); - createIndex( - INDEX_NAME, - Settings.builder() - .put(indexSettings()) - .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) - .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) - .put("index.refresh_interval", -1) - .build() - ); - ensureYellow(INDEX_NAME); - final String replicaNode = internalCluster().startDataOnlyNode(); - ensureGreen(INDEX_NAME); - - MockTransportService primaryTransportService = ((MockTransportService) internalCluster().getInstance( - TransportService.class, - primaryNode - )); - CountDownLatch latch = new CountDownLatch(1); - AtomicBoolean failed = new AtomicBoolean(false); - primaryTransportService.addSendBehavior( - internalCluster().getInstance(TransportService.class, replicaNode), - (connection, requestId, action, request, options) -> { - if (action.equals(SegmentReplicationTargetService.Actions.FILE_CHUNK) && failed.getAndSet(true) == false) { - FileChunkRequest req = (FileChunkRequest) request; - logger.info("SENDING CORRUPT file chunk [{}] lastChunk: {}", req, req.lastChunk()); - TransportRequest corrupt = new FileChunkRequest( - req.recoveryId(), - ((FileChunkRequest) request).requestSeqNo(), - ((FileChunkRequest) request).shardId(), - ((FileChunkRequest) request).metadata(), - ((FileChunkRequest) request).position(), - new BytesArray("test"), - false, - 0, - 0L - ); - connection.sendRequest(requestId, action, corrupt, options); - latch.countDown(); - } else { - connection.sendRequest(requestId, action, request, options); - } - } - ); - for (int i = 0; i < 100; i++) { - client().prepareIndex(INDEX_NAME) - .setId(String.valueOf(i)) - .setSource(jsonBuilder().startObject().field("field", i).endObject()) - .get(); - } - final long originalRecoveryTime = getRecoveryStopTime(replicaNode); - assertNotEquals(originalRecoveryTime, 0); - refresh(INDEX_NAME); - latch.await(); - assertTrue(failed.get()); - waitForNewPeerRecovery(replicaNode, originalRecoveryTime); - // reset checkIndex to ensure our original shard doesn't throw - resetCheckIndexStatus(); - waitForSearchableDocs(100, primaryNode, replicaNode); - } - - public void testWipeSegmentBetweenSyncs() throws Exception { - internalCluster().startClusterManagerOnlyNode(); - final String primaryNode = internalCluster().startDataOnlyNode(); - createIndex( - INDEX_NAME, - Settings.builder() - .put(indexSettings()) - .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) - .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) - .put("index.refresh_interval", -1) - .build() - ); - ensureYellow(INDEX_NAME); - final String replicaNode = internalCluster().startDataOnlyNode(); - ensureGreen(INDEX_NAME); - - for (int i = 0; i < 10; i++) { - client().prepareIndex(INDEX_NAME) - .setId(String.valueOf(i)) - .setSource(jsonBuilder().startObject().field("field", i).endObject()) - .get(); - } - refresh(INDEX_NAME); - ensureGreen(INDEX_NAME); - final long originalRecoveryTime = getRecoveryStopTime(replicaNode); - - final IndexShard indexShard = getIndexShard(replicaNode, INDEX_NAME); - waitForSearchableDocs(INDEX_NAME, 10, List.of(replicaNode)); - indexShard.store().directory().deleteFile("_0.si"); - - for (int i = 11; i < 21; i++) { - client().prepareIndex(INDEX_NAME) - .setId(String.valueOf(i)) - .setSource(jsonBuilder().startObject().field("field", i).endObject()) - .get(); - } - refresh(INDEX_NAME); - waitForNewPeerRecovery(replicaNode, originalRecoveryTime); - resetCheckIndexStatus(); - waitForSearchableDocs(20, primaryNode, replicaNode); - } - - private void waitForNewPeerRecovery(String replicaNode, long originalRecoveryTime) throws Exception { - assertBusy(() -> { - // assert we have a peer recovery after the original - final long time = getRecoveryStopTime(replicaNode); - assertNotEquals(time, 0); - assertNotEquals(originalRecoveryTime, time); - - }, 1, TimeUnit.MINUTES); - } - - private long getRecoveryStopTime(String nodeName) { - final RecoveryResponse recoveryResponse = client().admin().indices().prepareRecoveries(INDEX_NAME).get(); - final List recoveryStates = recoveryResponse.shardRecoveryStates().get(INDEX_NAME); - logger.info("Recovery states {}", recoveryResponse); - for (RecoveryState recoveryState : recoveryStates) { - if (recoveryState.getTargetNode().getName().equals(nodeName)) { - return recoveryState.getTimer().stopTime(); - } - } - return 0L; - } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java index 865b2d13f189e..21ce4be9981fb 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java @@ -11,6 +11,7 @@ import org.opensearch.action.DocWriteResponse; import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreRequest; import org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; +import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; import org.opensearch.action.admin.indices.get.GetIndexRequest; import org.opensearch.action.admin.indices.get.GetIndexResponse; import org.opensearch.action.delete.DeleteResponse; @@ -20,8 +21,13 @@ import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.io.PathUtils; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.io.IOUtils; +import org.opensearch.core.index.Index; import org.opensearch.core.rest.RestStatus; +import org.opensearch.index.IndexService; import org.opensearch.index.IndexSettings; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.indices.IndicesService; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.snapshots.AbstractSnapshotIntegTestCase; import org.opensearch.snapshots.SnapshotInfo; @@ -32,11 +38,15 @@ import org.junit.Before; import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Optional; import java.util.concurrent.ExecutionException; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_SEGMENT_STORE_REPOSITORY; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_STORE_ENABLED; @@ -320,6 +330,8 @@ public void testRestoreInSameRemoteStoreEnabledIndex() throws IOException { assertEquals(restoreSnapshotResponse1.status(), RestStatus.ACCEPTED); assertEquals(restoreSnapshotResponse2.status(), RestStatus.ACCEPTED); ensureGreen(indexName1, restoredIndexName2); + + assertRemoteSegmentsAndTranslogUploaded(restoredIndexName2); assertDocsPresentInIndex(client, indexName1, numDocsInIndex1); assertDocsPresentInIndex(client, restoredIndexName2, numDocsInIndex2); // indexing some new docs and validating @@ -345,6 +357,97 @@ public void testRestoreInSameRemoteStoreEnabledIndex() throws IOException { assertDocsPresentInIndex(client, indexName1, numDocsInIndex1 + 4); } + void assertRemoteSegmentsAndTranslogUploaded(String idx) throws IOException { + String indexUUID = client().admin().indices().prepareGetSettings(idx).get().getSetting(idx, IndexMetadata.SETTING_INDEX_UUID); + + Path remoteTranslogMetadataPath = Path.of(String.valueOf(remoteRepoPath), indexUUID, "/0/translog/metadata"); + Path remoteTranslogDataPath = Path.of(String.valueOf(remoteRepoPath), indexUUID, "/0/translog/data"); + Path segmentMetadataPath = Path.of(String.valueOf(remoteRepoPath), indexUUID, "/0/segments/metadata"); + Path segmentDataPath = Path.of(String.valueOf(remoteRepoPath), indexUUID, "/0/segments/data"); + + try ( + Stream translogMetadata = Files.list(remoteTranslogMetadataPath); + Stream translogData = Files.list(remoteTranslogDataPath); + Stream segmentMetadata = Files.list(segmentMetadataPath); + Stream segmentData = Files.list(segmentDataPath); + + ) { + assertTrue(translogData.count() > 0); + assertTrue(translogMetadata.count() > 0); + assertTrue(segmentMetadata.count() > 0); + assertTrue(segmentData.count() > 0); + } + + } + + public void testRemoteRestoreIndexRestoredFromSnapshot() throws IOException, ExecutionException, InterruptedException { + internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNodes(2); + + String indexName1 = "testindex1"; + String snapshotRepoName = "test-restore-snapshot-repo"; + String snapshotName1 = "test-restore-snapshot1"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + createRepository(snapshotRepoName, "fs", getRepositorySettings(absolutePath1, true)); + + Settings indexSettings = getIndexSettings(1, 0).build(); + createIndex(indexName1, indexSettings); + + final int numDocsInIndex1 = randomIntBetween(20, 30); + indexDocuments(client(), indexName1, numDocsInIndex1); + flushAndRefresh(indexName1); + ensureGreen(indexName1); + + logger.info("--> snapshot"); + SnapshotInfo snapshotInfo1 = createSnapshot(snapshotRepoName, snapshotName1, new ArrayList<>(Arrays.asList(indexName1))); + assertThat(snapshotInfo1.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo1.successfulShards(), equalTo(snapshotInfo1.totalShards())); + assertThat(snapshotInfo1.state(), equalTo(SnapshotState.SUCCESS)); + + assertAcked(client().admin().indices().delete(new DeleteIndexRequest(indexName1)).get()); + assertFalse(indexExists(indexName1)); + + RestoreSnapshotResponse restoreSnapshotResponse1 = client().admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(false) + .setIndices(indexName1) + .get(); + + assertEquals(restoreSnapshotResponse1.status(), RestStatus.ACCEPTED); + ensureGreen(indexName1); + assertDocsPresentInIndex(client(), indexName1, numDocsInIndex1); + + assertRemoteSegmentsAndTranslogUploaded(indexName1); + + // Clear the local data before stopping the node. This will make sure that remote translog is empty. + IndexShard indexShard = getIndexShard(primaryNodeName(indexName1), indexName1); + try (Stream files = Files.list(indexShard.shardPath().resolveTranslog())) { + IOUtils.deleteFilesIgnoringExceptions(files.collect(Collectors.toList())); + } + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primaryNodeName(indexName1))); + + ensureRed(indexName1); + + client().admin() + .cluster() + .restoreRemoteStore(new RestoreRemoteStoreRequest().indices(indexName1).restoreAllShards(false), PlainActionFuture.newFuture()); + + ensureGreen(indexName1); + assertDocsPresentInIndex(client(), indexName1, numDocsInIndex1); + } + + protected IndexShard getIndexShard(String node, String indexName) { + final Index index = resolveIndex(indexName); + IndicesService indicesService = internalCluster().getInstance(IndicesService.class, node); + IndexService indexService = indicesService.indexService(index); + assertNotNull(indexService); + final Optional shardId = indexService.shardIds().stream().findFirst(); + return shardId.map(indexService::getShard).orElse(null); + } + public void testRestoreShallowCopySnapshotWithDifferentRepo() throws IOException { String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(); String primary = internalCluster().startDataOnlyNode(); @@ -486,4 +589,71 @@ public void testRestoreShallowSnapshotRepository() throws ExecutionException, In assertDocsPresentInIndex(client, restoredIndexName1, numDocsInIndex1 + 2); } + public void testRestoreShallowSnapshotIndexAfterSnapshot() throws ExecutionException, InterruptedException { + String indexName1 = "testindex1"; + String snapshotRepoName = "test-restore-snapshot-repo"; + String remoteStoreRepoNameUpdated = "test-rs-repo-updated" + TEST_REMOTE_STORE_REPO_SUFFIX; + String snapshotName1 = "test-restore-snapshot1"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + Path absolutePath2 = randomRepoPath().toAbsolutePath(); + String[] pathTokens = absolutePath1.toString().split("/"); + String basePath = pathTokens[pathTokens.length - 1]; + Arrays.copyOf(pathTokens, pathTokens.length - 1); + Path location = PathUtils.get(String.join("/", pathTokens)); + pathTokens = absolutePath2.toString().split("/"); + String basePath2 = pathTokens[pathTokens.length - 1]; + Arrays.copyOf(pathTokens, pathTokens.length - 1); + Path location2 = PathUtils.get(String.join("/", pathTokens)); + logger.info("Path 1 [{}]", absolutePath1); + logger.info("Path 2 [{}]", absolutePath2); + String restoredIndexName1 = indexName1 + "-restored"; + + createRepository(snapshotRepoName, "fs", getRepositorySettings(location, basePath, true)); + + Client client = client(); + Settings indexSettings = Settings.builder() + .put(super.indexSettings()) + .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .build(); + createIndex(indexName1, indexSettings); + + int numDocsInIndex1 = randomIntBetween(2, 5); + indexDocuments(client, indexName1, numDocsInIndex1); + + ensureGreen(indexName1); + + logger.info("--> snapshot"); + SnapshotInfo snapshotInfo1 = createSnapshot(snapshotRepoName, snapshotName1, new ArrayList<>(List.of(indexName1))); + assertThat(snapshotInfo1.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo1.successfulShards(), equalTo(snapshotInfo1.totalShards())); + assertThat(snapshotInfo1.state(), equalTo(SnapshotState.SUCCESS)); + + int extraNumDocsInIndex1 = randomIntBetween(20, 50); + indexDocuments(client, indexName1, extraNumDocsInIndex1); + refresh(indexName1); + + client().admin().indices().close(Requests.closeIndexRequest(indexName1)).get(); + createRepository(remoteStoreRepoNameUpdated, "fs", remoteRepoPath); + RestoreSnapshotResponse restoreSnapshotResponse2 = client.admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(true) + .setIndices(indexName1) + .setRenamePattern(indexName1) + .setRenameReplacement(restoredIndexName1) + .setSourceRemoteStoreRepository(remoteStoreRepoNameUpdated) + .get(); + + assertTrue(restoreSnapshotResponse2.getRestoreInfo().failedShards() == 0); + ensureGreen(restoredIndexName1); + assertDocsPresentInIndex(client, restoredIndexName1, numDocsInIndex1); + + // indexing some new docs and validating + indexDocuments(client, restoredIndexName1, numDocsInIndex1, numDocsInIndex1 + 2); + ensureGreen(restoredIndexName1); + assertDocsPresentInIndex(client, restoredIndexName1, numDocsInIndex1 + 2); + } + } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java index bccca283ba772..8b4981a15433a 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java @@ -56,7 +56,7 @@ public class RemoteStoreBaseIntegTestCase extends OpenSearchIntegTestCase { protected static final String REPOSITORY_NAME = "test-remote-store-repo"; protected static final String REPOSITORY_2_NAME = "test-remote-store-repo-2"; protected static final int SHARD_COUNT = 1; - protected static final int REPLICA_COUNT = 1; + protected static int REPLICA_COUNT = 1; protected static final String TOTAL_OPERATIONS = "total-operations"; protected static final String REFRESHED_OR_FLUSHED_OPERATIONS = "refreshed-or-flushed-operations"; protected static final String MAX_SEQ_NO_TOTAL = "max-seq-no-total"; diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java index e9afd6d36bb87..c61e2ec6e4f6c 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java @@ -30,6 +30,7 @@ import java.nio.file.Path; import java.util.Arrays; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Objects; import java.util.concurrent.ExecutionException; @@ -85,6 +86,7 @@ public void testFullClusterRestore() throws Exception { // Step - 1 index some data to generate files in remote directory Map indexStats = initialTestSetup(shardCount, replicaCount, dataNodeCount, 1); String prevClusterUUID = clusterService().state().metadata().clusterUUID(); + long prevClusterStateVersion = clusterService().state().version(); // Step - 2 Replace all nodes in the cluster with new nodes. This ensures new cluster state doesn't have previous index metadata resetCluster(dataNodeCount, clusterManagerNodeCount); @@ -92,9 +94,17 @@ public void testFullClusterRestore() throws Exception { String newClusterUUID = clusterService().state().metadata().clusterUUID(); assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; - // Step - 3 Trigger full cluster restore and validate + // Step - 3 validate cluster state restored + long newClusterStateVersion = clusterService().state().version(); + assert prevClusterStateVersion < newClusterStateVersion : String.format( + Locale.ROOT, + "ClusterState version is not restored. previousClusterVersion: [%s] is greater than current [%s]", + prevClusterStateVersion, + newClusterStateVersion + ); validateMetadata(List.of(INDEX_NAME)); verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, true); + } /** @@ -121,6 +131,7 @@ public void testFullClusterRestoreDoesntFailWithConflictingLocalState() throws E // index some data to generate files in remote directory Map indexStats = initialTestSetup(shardCount, replicaCount, dataNodeCount, 1); String prevClusterUUID = clusterService().state().metadata().clusterUUID(); + long prevClusterStateVersion = clusterService().state().version(); // stop all nodes internalCluster().stopAllNodes(); @@ -156,6 +167,14 @@ public Settings onNodeStopped(String nodeName) { newClusterUUID = clusterService().state().metadata().clusterUUID(); assert !Objects.equals(newClusterUUID, ClusterState.UNKNOWN_UUID) : "cluster restart not successful. cluster uuid is still unknown"; assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; + + long newClusterStateVersion = clusterService().state().version(); + assert prevClusterStateVersion < newClusterStateVersion : String.format( + Locale.ROOT, + "ClusterState version is not restored. previousClusterVersion: [%s] is greater than current [%s]", + prevClusterStateVersion, + newClusterStateVersion + ); validateMetadata(List.of(INDEX_NAME)); // start data nodes to trigger index data recovery @@ -180,6 +199,7 @@ public void testFullClusterRestoreMultipleIndices() throws Exception { updateIndexBlock(true, secondIndexName); String prevClusterUUID = clusterService().state().metadata().clusterUUID(); + long prevClusterStateVersion = clusterService().state().version(); // Step - 2 Replace all nodes in the cluster with new nodes. This ensures new cluster state doesn't have previous index metadata resetCluster(dataNodeCount, clusterManagerNodeCount); @@ -187,7 +207,14 @@ public void testFullClusterRestoreMultipleIndices() throws Exception { String newClusterUUID = clusterService().state().metadata().clusterUUID(); assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; - // Step - 3 Trigger full cluster restore + // Step - 3 validate cluster state restored + long newClusterStateVersion = clusterService().state().version(); + assert prevClusterStateVersion < newClusterStateVersion : String.format( + Locale.ROOT, + "ClusterState version is not restored. previousClusterVersion: [%s] is greater than current [%s]", + prevClusterStateVersion, + newClusterStateVersion + ); validateMetadata(List.of(INDEX_NAME, secondIndexName)); verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, false); verifyRedIndicesAndTriggerRestore(indexStats2, secondIndexName, false); @@ -239,6 +266,7 @@ public void testRemoteStateFullRestart() throws Exception { Map indexStats = initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); String prevClusterUUID = clusterService().state().metadata().clusterUUID(); + long prevClusterStateVersion = clusterService().state().version(); // Delete index metadata file in remote try { Files.move( @@ -257,6 +285,14 @@ public void testRemoteStateFullRestart() throws Exception { ensureGreen(INDEX_NAME); String newClusterUUID = clusterService().state().metadata().clusterUUID(); assert Objects.equals(newClusterUUID, prevClusterUUID) : "Full restart not successful. cluster uuid has changed"; + + long newClusterStateVersion = clusterService().state().version(); + assert prevClusterStateVersion < newClusterStateVersion : String.format( + Locale.ROOT, + "ClusterState version is not restored. previousClusterVersion: [%s] is greater than current [%s]", + prevClusterStateVersion, + newClusterStateVersion + ); validateCurrentMetadata(); verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, true); } @@ -309,6 +345,7 @@ public void testFullClusterRestoreGlobalMetadata() throws Exception { // Step - 1 index some data to generate files in remote directory Map indexStats = initialTestSetup(shardCount, replicaCount, dataNodeCount, 1); String prevClusterUUID = clusterService().state().metadata().clusterUUID(); + long prevClusterStateVersion = clusterService().state().version(); // Create global metadata - register a custom repo Path repoPath = registerCustomRepository(); @@ -328,8 +365,16 @@ public void testFullClusterRestoreGlobalMetadata() throws Exception { String newClusterUUID = clusterService().state().metadata().clusterUUID(); assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; - // Step - 3 Trigger full cluster restore and validate - // validateCurrentMetadata(); + // Step - 3 validate cluster state restored + long newClusterStateVersion = clusterService().state().version(); + assert prevClusterStateVersion < newClusterStateVersion : String.format( + Locale.ROOT, + "ClusterState version is not restored. previousClusterVersion: [%s] is greater than current [%s]", + prevClusterStateVersion, + newClusterStateVersion + ); + + validateCurrentMetadata(); assertEquals(Integer.valueOf(34), SETTING_CLUSTER_MAX_SHARDS_PER_NODE.get(clusterService().state().metadata().settings())); assertEquals(true, SETTING_READ_ONLY_SETTING.get(clusterService().state().metadata().settings())); assertTrue(clusterService().state().blocks().hasGlobalBlock(CLUSTER_READ_ONLY_BLOCK)); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java index 5e91176ed0473..2d3ab135d0377 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java @@ -581,21 +581,23 @@ public void testNonZeroPrimaryStatsOnNewlyCreatedIndexWithZeroDocs() throws Exce .getRemoteStoreStats(); Arrays.stream(remoteStoreStats).forEach(statObject -> { RemoteSegmentTransferTracker.Stats segmentStats = statObject.getSegmentStats(); + RemoteTranslogTransferTracker.Stats translogStats = statObject.getTranslogStats(); if (statObject.getShardRouting().primary()) { assertTrue( segmentStats.totalUploadsSucceeded == 1 && segmentStats.totalUploadsStarted == segmentStats.totalUploadsSucceeded && segmentStats.totalUploadsFailed == 0 ); + // On primary shard creation, we upload to remote translog post primary mode activation. + // This changes upload stats to non-zero for primary shard. + assertNonZeroTranslogUploadStatsNoFailures(translogStats); } else { assertTrue( segmentStats.directoryFileTransferTrackerStats.transferredBytesStarted == 0 && segmentStats.directoryFileTransferTrackerStats.transferredBytesSucceeded == 0 ); + assertZeroTranslogUploadStats(translogStats); } - - RemoteTranslogTransferTracker.Stats translogStats = statObject.getTranslogStats(); - assertZeroTranslogUploadStats(translogStats); assertZeroTranslogDownloadStats(translogStats); }); }, 5, TimeUnit.SECONDS); @@ -653,6 +655,29 @@ public void testStatsCorrectnessOnFailover() { logger.info("Test completed"); } + public void testZeroLagOnCreateIndex() throws InterruptedException { + setup(); + String clusterManagerNode = internalCluster().getClusterManagerName(); + + int numOfShards = randomIntBetween(1, 3); + createIndex(INDEX_NAME, remoteStoreIndexSettings(1, numOfShards)); + ensureGreen(INDEX_NAME); + long currentTimeNs = System.nanoTime(); + while (currentTimeNs == System.nanoTime()) { + Thread.sleep(10); + } + + for (int i = 0; i < numOfShards; i++) { + RemoteStoreStatsResponse response = client(clusterManagerNode).admin() + .cluster() + .prepareRemoteStoreStats(INDEX_NAME, String.valueOf(i)) + .get(); + for (RemoteStoreStats remoteStoreStats : response.getRemoteStoreStats()) { + assertEquals(0, remoteStoreStats.getSegmentStats().refreshTimeLagMs); + } + } + } + private void indexDocs() { for (int i = 0; i < randomIntBetween(5, 10); i++) { if (randomBoolean()) { diff --git a/server/src/internalClusterTest/java/org/opensearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/server/src/internalClusterTest/java/org/opensearch/search/fetch/subphase/highlight/HighlighterSearchIT.java index 42d91ac945662..f7bc5eb75ad0f 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/fetch/subphase/highlight/HighlighterSearchIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/fetch/subphase/highlight/HighlighterSearchIT.java @@ -155,7 +155,7 @@ protected Collection> nodePlugins() { return Arrays.asList(InternalSettingsPlugin.class, MockKeywordPlugin.class, MockAnalysisPlugin.class); } - public void testHighlightingWithKeywordIgnoreBoundaryScanner() throws IOException { + public void testHighlightingWithKeywordIgnoreBoundaryScanner() throws IOException, InterruptedException { XContentBuilder mappings = jsonBuilder(); mappings.startObject(); mappings.startObject("properties") @@ -177,6 +177,7 @@ public void testHighlightingWithKeywordIgnoreBoundaryScanner() throws IOExceptio .setSource(jsonBuilder().startObject().array("tags", "foo baz", "foo baz", "foo baz", "foo bar").field("sort", 2).endObject()) .get(); refresh(); + indexRandomForConcurrentSearch("test"); for (BoundaryScannerType scanner : BoundaryScannerType.values()) { SearchResponse search = client().prepareSearch() @@ -190,12 +191,13 @@ public void testHighlightingWithKeywordIgnoreBoundaryScanner() throws IOExceptio } } - public void testHighlightingWithStoredKeyword() throws IOException { + public void testHighlightingWithStoredKeyword() throws IOException, InterruptedException { XContentBuilder mappings = jsonBuilder(); mappings.startObject(); mappings.startObject("properties").startObject("text").field("type", "keyword").field("store", true).endObject().endObject(); mappings.endObject(); assertAcked(prepareCreate("test").setMapping(mappings)); + indexRandomForConcurrentSearch("test"); client().prepareIndex("test").setId("1").setSource(jsonBuilder().startObject().field("text", "foo").endObject()).get(); refresh(); SearchResponse search = client().prepareSearch() @@ -205,7 +207,7 @@ public void testHighlightingWithStoredKeyword() throws IOException { assertHighlight(search, 0, "text", 0, equalTo("foo")); } - public void testHighlightingWithWildcardName() throws IOException { + public void testHighlightingWithWildcardName() throws IOException, InterruptedException { // test the kibana case with * as fieldname that will try highlight all fields including meta fields XContentBuilder mappings = jsonBuilder(); mappings.startObject(); @@ -221,6 +223,7 @@ public void testHighlightingWithWildcardName() throws IOException { assertAcked(prepareCreate("test").setMapping(mappings)); client().prepareIndex("test").setId("1").setSource(jsonBuilder().startObject().field("text", "text").endObject()).get(); refresh(); + indexRandomForConcurrentSearch("test"); for (String type : ALL_TYPES) { SearchResponse search = client().prepareSearch() .setQuery(constantScoreQuery(matchQuery("text", "text"))) @@ -230,7 +233,7 @@ public void testHighlightingWithWildcardName() throws IOException { } } - public void testFieldAlias() throws IOException { + public void testFieldAlias() throws IOException, InterruptedException { XContentBuilder mappings = jsonBuilder().startObject() .startObject("properties") .startObject("text") @@ -248,7 +251,7 @@ public void testFieldAlias() throws IOException { client().prepareIndex("test").setId("1").setSource("text", "foo").get(); refresh(); - + indexRandomForConcurrentSearch("test"); for (String type : ALL_TYPES) { HighlightBuilder builder = new HighlightBuilder().field(new Field("alias").highlighterType(type)) .requireFieldMatch(randomBoolean()); @@ -257,7 +260,7 @@ public void testFieldAlias() throws IOException { } } - public void testFieldAliasWithSourceLookup() throws IOException { + public void testFieldAliasWithSourceLookup() throws IOException, InterruptedException { XContentBuilder mappings = jsonBuilder().startObject() .startObject("properties") .startObject("text") @@ -276,7 +279,7 @@ public void testFieldAliasWithSourceLookup() throws IOException { client().prepareIndex("test").setId("1").setSource("text", "foo bar").get(); refresh(); - + indexRandomForConcurrentSearch("test"); for (String type : ALL_TYPES) { HighlightBuilder builder = new HighlightBuilder().field(new Field("alias").highlighterType(type)) .requireFieldMatch(randomBoolean()); @@ -285,7 +288,7 @@ public void testFieldAliasWithSourceLookup() throws IOException { } } - public void testFieldAliasWithWildcardField() throws IOException { + public void testFieldAliasWithWildcardField() throws IOException, InterruptedException { XContentBuilder mappings = jsonBuilder().startObject() .startObject("properties") .startObject("keyword") @@ -301,13 +304,14 @@ public void testFieldAliasWithWildcardField() throws IOException { client().prepareIndex("test").setId("1").setSource("keyword", "foo").get(); refresh(); + indexRandomForConcurrentSearch("test"); HighlightBuilder builder = new HighlightBuilder().field(new Field("al*")).requireFieldMatch(false); SearchResponse search = client().prepareSearch().setQuery(matchQuery("alias", "foo")).highlighter(builder).get(); assertHighlight(search, 0, "alias", 0, equalTo("foo")); } - public void testHighlightingWhenFieldsAreNotStoredThereIsNoSource() throws IOException { + public void testHighlightingWhenFieldsAreNotStoredThereIsNoSource() throws IOException, InterruptedException { XContentBuilder mappings = jsonBuilder(); mappings.startObject(); mappings.startObject("_source") @@ -334,6 +338,7 @@ public void testHighlightingWhenFieldsAreNotStoredThereIsNoSource() throws IOExc .setSource(jsonBuilder().startObject().field("unstored_text", "text").field("text", "text").endObject()) .get(); refresh(); + indexRandomForConcurrentSearch("test"); for (String type : ALL_TYPES) { SearchResponse search = client().prepareSearch() .setQuery(constantScoreQuery(matchQuery("text", "text"))) @@ -350,7 +355,7 @@ public void testHighlightingWhenFieldsAreNotStoredThereIsNoSource() throws IOExc } // see #3486 - public void testHighTermFrequencyDoc() throws IOException { + public void testHighTermFrequencyDoc() throws IOException, InterruptedException { assertAcked(prepareCreate("test").setMapping("name", "type=text,term_vector=with_positions_offsets,store=" + randomBoolean())); StringBuilder builder = new StringBuilder(); for (int i = 0; i < 6000; i++) { @@ -358,6 +363,7 @@ public void testHighTermFrequencyDoc() throws IOException { } client().prepareIndex("test").setId("1").setSource("name", builder.toString()).get(); refresh(); + indexRandomForConcurrentSearch("test"); SearchResponse search = client().prepareSearch() .setQuery(constantScoreQuery(matchQuery("name", "abc"))) .highlighter(new HighlightBuilder().field("name")) @@ -385,6 +391,7 @@ public void testEnsureNoNegativeOffsets() throws Exception { ) .get(); refresh(); + indexRandomForConcurrentSearch("test"); SearchResponse search = client().prepareSearch() .setQuery(matchQuery("long_term", "thisisaverylongwordandmakessurethisfails foo highlighed")) .highlighter(new HighlightBuilder().field("long_term", 18, 1).highlighterType("fvh")) @@ -671,7 +678,7 @@ public void testHighlightIssue1994() throws Exception { assertHighlight(search, 0, "titleTV", 1, 2, equalTo("highlight other text")); } - public void testGlobalHighlightingSettingsOverriddenAtFieldLevel() { + public void testGlobalHighlightingSettingsOverriddenAtFieldLevel() throws InterruptedException { createIndex("test"); ensureGreen(); @@ -684,6 +691,7 @@ public void testGlobalHighlightingSettingsOverriddenAtFieldLevel() { ) .get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field1 and field2 produces different tags"); SearchSourceBuilder source = searchSource().query(termQuery("field1", "test")) @@ -734,6 +742,7 @@ public void testHighlightingOnWildcardFields() throws Exception { ) .get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field*"); SearchSourceBuilder source = searchSource() @@ -783,6 +792,7 @@ public void testForceSourceWithSourceDisabled() throws Exception { .setSource("field1", "The quick brown fox jumps over the lazy dog", "field2", "second field content") .get(); refresh(); + indexRandomForConcurrentSearch("test"); // works using stored field SearchResponse searchResponse = client().prepareSearch("test") @@ -823,6 +833,7 @@ public void testPlainHighlighter() throws Exception { client().prepareIndex("test").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog").get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field1"); SearchSourceBuilder source = searchSource().query(termQuery("field1", "test")) @@ -1025,6 +1036,7 @@ public void testFVHManyMatches() throws Exception { String value = new String(new char[1024 * 256 / pattern.length()]).replace("\0", pattern); client().prepareIndex("test").setSource("field1", value).get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field1 with default phrase limit"); SearchSourceBuilder source = searchSource().query(termQuery("field1", "t")) @@ -1116,6 +1128,7 @@ private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception ); index("test", "type1", "3", "foo", "weird", "bar", "result"); refresh(); + indexRandomForConcurrentSearch("test"); Field fooField = new Field("foo").numOfFragments(1) .order("score") @@ -1408,6 +1421,7 @@ public void testMultiMapperVectorWithStore() throws Exception { ensureGreen(); client().prepareIndex("test").setId("1").setSource("title", "this is a test").get(); refresh(); + indexRandomForConcurrentSearch("test"); // simple search on body with standard analyzer with a simple field query SearchResponse search = client().prepareSearch() @@ -1453,6 +1467,7 @@ public void testMultiMapperVectorFromSource() throws Exception { client().prepareIndex("test").setId("1").setSource("title", "this is a test").get(); refresh(); + indexRandomForConcurrentSearch("test"); // simple search on body with standard analyzer with a simple field query SearchResponse search = client().prepareSearch() @@ -1498,6 +1513,7 @@ public void testMultiMapperNoVectorWithStore() throws Exception { ensureGreen(); client().prepareIndex("test").setId("1").setSource("title", "this is a test").get(); refresh(); + indexRandomForConcurrentSearch("test"); // simple search on body with standard analyzer with a simple field query SearchResponse search = client().prepareSearch() @@ -1542,6 +1558,7 @@ public void testMultiMapperNoVectorFromSource() throws Exception { ensureGreen(); client().prepareIndex("test").setId("1").setSource("title", "this is a test").get(); refresh(); + indexRandomForConcurrentSearch("test"); // simple search on body with standard analyzer with a simple field query SearchResponse search = client().prepareSearch() @@ -1571,6 +1588,7 @@ public void testFastVectorHighlighterShouldFailIfNoTermVectors() throws Exceptio .setSource("title", "This is a test for the enabling fast vector highlighter"); } indexRandom(true, indexRequestBuilders); + indexRandomForConcurrentSearch("test"); SearchResponse search = client().prepareSearch() .setQuery(matchPhraseQuery("title", "this is a test")) @@ -1608,6 +1626,7 @@ public void testDisableFastVectorHighlighter() throws Exception { .setSource("title", "This is a test for the workaround for the fast vector highlighting SOLR-3724"); } indexRandom(true, indexRequestBuilders); + indexRandomForConcurrentSearch("test"); SearchResponse search = client().prepareSearch() .setQuery(matchPhraseQuery("title", "test for the workaround")) @@ -1669,6 +1688,7 @@ public void testFSHHighlightAllMvFragments() throws Exception { ) .get(); refresh(); + indexRandomForConcurrentSearch("test"); SearchResponse response = client().prepareSearch("test") .setQuery(QueryBuilders.matchQuery("tags", "tag")) @@ -1686,11 +1706,12 @@ public void testFSHHighlightAllMvFragments() throws Exception { ); } - public void testBoostingQuery() { + public void testBoostingQuery() throws InterruptedException { createIndex("test"); ensureGreen(); client().prepareIndex("test").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog").get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field1"); SearchSourceBuilder source = searchSource().query( @@ -1702,11 +1723,12 @@ public void testBoostingQuery() { assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); } - public void testBoostingQueryTermVector() throws IOException { + public void testBoostingQueryTermVector() throws IOException, InterruptedException { assertAcked(prepareCreate("test").setMapping(type1TermVectorMapping())); ensureGreen(); client().prepareIndex("test").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog").get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field1"); SearchSourceBuilder source = searchSource().query( @@ -1718,12 +1740,13 @@ public void testBoostingQueryTermVector() throws IOException { assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); } - public void testCommonTermsQuery() { + public void testCommonTermsQuery() throws InterruptedException { createIndex("test"); ensureGreen(); client().prepareIndex("test").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog").get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field1"); SearchSourceBuilder source = searchSource().query(commonTermsQuery("field2", "quick brown").cutoffFrequency(100)) @@ -1733,12 +1756,13 @@ public void testCommonTermsQuery() { assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); } - public void testCommonTermsTermVector() throws IOException { + public void testCommonTermsTermVector() throws IOException, InterruptedException { assertAcked(prepareCreate("test").setMapping(type1TermVectorMapping())); ensureGreen(); client().prepareIndex("test").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog").get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field1"); SearchSourceBuilder source = searchSource().query(commonTermsQuery("field2", "quick brown").cutoffFrequency(100)) .highlighter(highlight().field("field2").order("score").preTags("").postTags("")); @@ -1764,6 +1788,7 @@ public void testPlainHighlightDifferentFragmenter() throws Exception { ) .get(); refresh(); + indexRandomForConcurrentSearch("test"); SearchResponse response = client().prepareSearch("test") .setQuery(QueryBuilders.matchPhraseQuery("tags", "long tag")) @@ -1816,12 +1841,13 @@ public void testPlainHighlightDifferentFragmenter() throws Exception { ); } - public void testPlainHighlighterMultipleFields() { + public void testPlainHighlighterMultipleFields() throws InterruptedException { createIndex("test"); ensureGreen(); index("test", "type1", "1", "field1", "The quick brown fox", "field2", "The slow brown fox"); refresh(); + indexRandomForConcurrentSearch("test"); SearchResponse response = client().prepareSearch("test") .setQuery(QueryBuilders.matchQuery("field1", "fox")) @@ -1834,7 +1860,7 @@ public void testPlainHighlighterMultipleFields() { assertHighlight(response, 0, "field2", 0, 1, equalTo("The slow brown <2>fox")); } - public void testFastVectorHighlighterMultipleFields() { + public void testFastVectorHighlighterMultipleFields() throws InterruptedException { assertAcked( prepareCreate("test").setMapping( "field1", @@ -1847,6 +1873,7 @@ public void testFastVectorHighlighterMultipleFields() { index("test", "type1", "1", "field1", "The quick brown fox", "field2", "The slow brown fox"); refresh(); + indexRandomForConcurrentSearch("test"); SearchResponse response = client().prepareSearch("test") .setQuery(QueryBuilders.matchQuery("field1", "fox")) @@ -1864,6 +1891,7 @@ public void testMissingStoredField() throws Exception { ensureGreen(); client().prepareIndex("test").setId("1").setSource(jsonBuilder().startObject().field("field", "highlight").endObject()).get(); refresh(); + indexRandomForConcurrentSearch("test"); // This query used to fail when the field to highlight was absent SearchResponse response = client().prepareSearch("test") @@ -1904,6 +1932,7 @@ public void testNumericHighlighting() throws Exception { .setSource("text", "opensearch test", "byte", 25, "short", 42, "int", 100, "long", -1, "float", 3.2f, "double", 42.42) .get(); refresh(); + indexRandomForConcurrentSearch("test"); SearchResponse response = client().prepareSearch("test") .setQuery(QueryBuilders.matchQuery("text", "test")) @@ -1926,6 +1955,7 @@ public void testResetTwice() throws Exception { ensureGreen(); client().prepareIndex("test").setId("1").setSource("text", "opensearch test").get(); refresh(); + indexRandomForConcurrentSearch("test"); SearchResponse response = client().prepareSearch("test") .setQuery(QueryBuilders.matchQuery("text", "test")) @@ -1935,7 +1965,7 @@ public void testResetTwice() throws Exception { assertHitCount(response, 1L); } - public void testHighlightUsesHighlightQuery() throws IOException { + public void testHighlightUsesHighlightQuery() throws IOException, InterruptedException { assertAcked( prepareCreate("test").setMapping( "text", @@ -1946,6 +1976,7 @@ public void testHighlightUsesHighlightQuery() throws IOException { index("test", "type1", "1", "text", "Testing the highlight query feature"); refresh(); + indexRandomForConcurrentSearch("test"); for (String type : ALL_TYPES) { HighlightBuilder.Field field = new HighlightBuilder.Field("text"); @@ -1981,7 +2012,11 @@ private static String randomStoreField() { return ""; } - public void testHighlightNoMatchSize() throws IOException { + public void testHighlightNoMatchSize() throws IOException, InterruptedException { + assumeFalse( + "Concurrent search case muted pending fix: https://github.com/opensearch-project/OpenSearch/issues/10900", + internalCluster().clusterService().getClusterSettings().get(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING) + ); assertAcked( prepareCreate("test").setMapping( "text", @@ -1993,6 +2028,7 @@ public void testHighlightNoMatchSize() throws IOException { String text = "I am pretty long so some of me should get cut off. Second sentence"; index("test", "type1", "1", "text", text); refresh(); + indexRandomForConcurrentSearch("test"); // When you don't set noMatchSize you don't get any results if there isn't anything to highlight. HighlightBuilder.Field field = new HighlightBuilder.Field("text").fragmentSize(21).numOfFragments(1).highlighterType("plain"); @@ -2091,7 +2127,11 @@ public void testHighlightNoMatchSize() throws IOException { assertNotHighlighted(response, 0, "text"); } - public void testHighlightNoMatchSizeWithMultivaluedFields() throws IOException { + public void testHighlightNoMatchSizeWithMultivaluedFields() throws IOException, InterruptedException { + assumeFalse( + "Concurrent search case muted pending fix: https://github.com/opensearch-project/OpenSearch/issues/10900", + internalCluster().clusterService().getClusterSettings().get(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING) + ); assertAcked( prepareCreate("test").setMapping( "text", @@ -2104,6 +2144,7 @@ public void testHighlightNoMatchSizeWithMultivaluedFields() throws IOException { String text2 = "I am short"; index("test", "type1", "1", "text", new String[] { text1, text2 }); refresh(); + indexRandomForConcurrentSearch("test"); // The no match fragment should come from the first value of a multi-valued field HighlightBuilder.Field field = new HighlightBuilder.Field("text").fragmentSize(21) @@ -2186,7 +2227,11 @@ public void testHighlightNoMatchSizeWithMultivaluedFields() throws IOException { assertNotHighlighted(response, 0, "text"); } - public void testHighlightNoMatchSizeNumberOfFragments() throws IOException { + public void testHighlightNoMatchSizeNumberOfFragments() throws IOException, InterruptedException { + assumeFalse( + "Concurrent search case muted pending fix: https://github.com/opensearch-project/OpenSearch/issues/10900", + internalCluster().clusterService().getClusterSettings().get(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING) + ); assertAcked( prepareCreate("test").setMapping( "text", @@ -2200,6 +2245,7 @@ public void testHighlightNoMatchSizeNumberOfFragments() throws IOException { String text3 = "This is the fifth sentence"; index("test", "type1", "1", "text", new String[] { text1, text2, text3 }); refresh(); + indexRandomForConcurrentSearch("test"); // The no match fragment should come from the first value of a multi-valued field HighlightBuilder.Field field = new HighlightBuilder.Field("text").fragmentSize(1) @@ -2243,6 +2289,7 @@ public void testPostingsHighlighter() throws Exception { .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy quick dog") .get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field1"); SearchSourceBuilder source = searchSource().query(termQuery("field1", "test")) @@ -2320,6 +2367,7 @@ public void testPostingsHighlighterMultipleFields() throws Exception { "The slow brown fox. Second sentence." ); refresh(); + indexRandomForConcurrentSearch("test"); SearchResponse response = client().prepareSearch("test") .setQuery(QueryBuilders.matchQuery("field1", "fox")) @@ -2344,6 +2392,7 @@ public void testPostingsHighlighterNumberOfFragments() throws Exception { ) .get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field1"); SearchSourceBuilder source = searchSource().query(termQuery("field1", "fox")) @@ -2376,6 +2425,7 @@ public void testPostingsHighlighterNumberOfFragments() throws Exception { ) .get(); refresh(); + indexRandomForConcurrentSearch("test"); source = searchSource().query(termQuery("field1", "fox")) .highlighter(highlight().field(new Field("field1").numOfFragments(0).preTags("").postTags(""))); @@ -2412,7 +2462,7 @@ public void testPostingsHighlighterNumberOfFragments() throws Exception { } } - public void testMultiMatchQueryHighlight() throws IOException { + public void testMultiMatchQueryHighlight() throws IOException, InterruptedException { XContentBuilder mapping = XContentFactory.jsonBuilder() .startObject() .startObject("properties") @@ -2434,6 +2484,7 @@ public void testMultiMatchQueryHighlight() throws IOException { .setSource("field1", "The quick brown fox jumps over", "field2", "The quick brown fox jumps over") .get(); refresh(); + indexRandomForConcurrentSearch("test"); final int iters = scaledRandomIntBetween(20, 30); for (int i = 0; i < iters; i++) { String highlighterType = rarely() ? null : RandomPicks.randomFrom(random(), ALL_TYPES); @@ -2479,6 +2530,7 @@ public void testPostingsHighlighterOrderByScore() throws Exception { ) .get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field1"); SearchSourceBuilder source = searchSource().query(termQuery("field1", "sentence")) @@ -2565,6 +2617,7 @@ public void testPostingsHighlighterMultiMapperWithStore() throws Exception { ensureGreen(); client().prepareIndex("test").setId("1").setSource("title", "this is a test . Second sentence.").get(); refresh(); + indexRandomForConcurrentSearch("test"); // simple search on body with standard analyzer with a simple field query SearchResponse searchResponse = client().prepareSearch() @@ -2623,6 +2676,7 @@ public void testPostingsHighlighterMultiMapperFromSource() throws Exception { client().prepareIndex("test").setId("1").setSource("title", "this is a test").get(); refresh(); + indexRandomForConcurrentSearch("test"); // simple search on body with standard analyzer with a simple field query SearchResponse searchResponse = client().prepareSearch() @@ -2672,13 +2726,14 @@ public void testPostingsHighlighterShouldFailIfNoOffsets() throws Exception { assertNoFailures(search); } - public void testPostingsHighlighterBoostingQuery() throws IOException { + public void testPostingsHighlighterBoostingQuery() throws IOException, InterruptedException { assertAcked(prepareCreate("test").setMapping(type1PostingsffsetsMapping())); ensureGreen(); client().prepareIndex("test") .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.") .get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field1"); SearchSourceBuilder source = searchSource().query( @@ -2689,7 +2744,7 @@ public void testPostingsHighlighterBoostingQuery() throws IOException { assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog! Second sentence.")); } - public void testPostingsHighlighterCommonTermsQuery() throws IOException { + public void testPostingsHighlighterCommonTermsQuery() throws IOException, InterruptedException { assertAcked(prepareCreate("test").setMapping(type1PostingsffsetsMapping())); ensureGreen(); @@ -2697,6 +2752,7 @@ public void testPostingsHighlighterCommonTermsQuery() throws IOException { .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.") .get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field1"); SearchSourceBuilder source = searchSource().query(commonTermsQuery("field2", "quick brown").cutoffFrequency(100)) @@ -2738,6 +2794,7 @@ public void testPostingsHighlighterPrefixQuery() throws Exception { .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.") .get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field2"); SearchSourceBuilder source = searchSource().query(prefixQuery("field2", "qui")).highlighter(highlight().field("field2")); @@ -2760,6 +2817,7 @@ public void testPostingsHighlighterFuzzyQuery() throws Exception { .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.") .get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field2"); SearchSourceBuilder source = searchSource().query(fuzzyQuery("field2", "quck")).highlighter(highlight().field("field2")); @@ -2783,6 +2841,7 @@ public void testPostingsHighlighterRegexpQuery() throws Exception { .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.") .get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field2"); SearchSourceBuilder source = searchSource().query(regexpQuery("field2", "qu[a-l]+k")).highlighter(highlight().field("field2")); @@ -2806,6 +2865,7 @@ public void testPostingsHighlighterWildcardQuery() throws Exception { .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.") .get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field2"); SearchSourceBuilder source = searchSource().query(wildcardQuery("field2", "qui*")).highlighter(highlight().field("field2")); @@ -2840,6 +2900,7 @@ public void testPostingsHighlighterTermRangeQuery() throws Exception { client().prepareIndex("test").setSource("field1", "this is a test", "field2", "aaab").get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field2"); SearchSourceBuilder source = searchSource().query(rangeQuery("field2").gte("aaaa").lt("zzzz")) @@ -2857,6 +2918,7 @@ public void testPostingsHighlighterQueryString() throws Exception { .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.") .get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field2"); SearchSourceBuilder source = searchSource().query(queryStringQuery("qui*").defaultField("field2")) @@ -2878,6 +2940,7 @@ public void testPostingsHighlighterRegexpQueryWithinConstantScoreQuery() throws client().prepareIndex("test").setSource("field1", "The photography word will get highlighted").get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field1"); SearchSourceBuilder source = searchSource().query(constantScoreQuery(regexpQuery("field1", "pho[a-z]+"))) @@ -2892,6 +2955,7 @@ public void testPostingsHighlighterMultiTermQueryMultipleLevels() throws Excepti client().prepareIndex("test").setSource("field1", "The photography word will get highlighted").get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field1"); SearchSourceBuilder source = searchSource().query( @@ -2909,6 +2973,7 @@ public void testPostingsHighlighterPrefixQueryWithinBooleanQuery() throws Except client().prepareIndex("test").setSource("field1", "The photography word will get highlighted").get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field1"); SearchSourceBuilder source = searchSource().query( @@ -2924,6 +2989,7 @@ public void testPostingsHighlighterQueryStringWithinFilteredQuery() throws Excep client().prepareIndex("test").setSource("field1", "The photography word will get highlighted").get(); refresh(); + indexRandomForConcurrentSearch("test"); logger.info("--> highlighting and searching on field1"); SearchSourceBuilder source = searchSource().query( @@ -3028,7 +3094,7 @@ public void testFastVectorHighlighterPhraseBoost() throws Exception { * because it doesn't support the concept of terms having a different weight based on position. * @param highlighterType highlighter to test */ - private void phraseBoostTestCase(String highlighterType) { + private void phraseBoostTestCase(String highlighterType) throws InterruptedException { ensureGreen(); StringBuilder text = new StringBuilder(); text.append("words words junk junk junk junk junk junk junk junk highlight junk junk junk junk together junk\n"); @@ -3041,6 +3107,7 @@ private void phraseBoostTestCase(String highlighterType) { } index("test", "type1", "1", "field1", text.toString()); refresh(); + indexRandomForConcurrentSearch("test"); // Match queries phraseBoostTestCaseForClauses( @@ -3109,7 +3176,7 @@ private

> void phraseBoostTestCaseForClauses( assertHighlight(response, 0, "field1", 0, 1, highlightedMatcher); } - public void testGeoFieldHighlightingWithDifferentHighlighters() throws IOException { + public void testGeoFieldHighlightingWithDifferentHighlighters() throws IOException, InterruptedException { // check that we do not get an exception for geo_point fields in case someone tries to highlight // it accidentially with a wildcard // see https://github.com/elastic/elasticsearch/issues/17537 @@ -3133,6 +3200,7 @@ public void testGeoFieldHighlightingWithDifferentHighlighters() throws IOExcepti .setSource(jsonBuilder().startObject().field("text", "Arbitrary text field which will should not cause a failure").endObject()) .get(); refresh(); + indexRandomForConcurrentSearch("test"); String highlighterType = randomFrom(ALL_TYPES); QueryBuilder query = QueryBuilders.boolQuery() .should( @@ -3150,7 +3218,7 @@ public void testGeoFieldHighlightingWithDifferentHighlighters() throws IOExcepti assertThat(search.getHits().getAt(0).getHighlightFields().get("text").fragments().length, equalTo(1)); } - public void testGeoFieldHighlightingWhenQueryGetsRewritten() throws IOException { + public void testGeoFieldHighlightingWhenQueryGetsRewritten() throws IOException, InterruptedException { // same as above but in this example the query gets rewritten during highlighting // see https://github.com/elastic/elasticsearch/issues/17537#issuecomment-244939633 XContentBuilder mappings = jsonBuilder(); @@ -3177,6 +3245,7 @@ public void testGeoFieldHighlightingWhenQueryGetsRewritten() throws IOException ) .get(); refresh(); + indexRandomForConcurrentSearch("test"); QueryBuilder query = QueryBuilders.functionScoreQuery( QueryBuilders.boolQuery() @@ -3192,7 +3261,7 @@ public void testGeoFieldHighlightingWhenQueryGetsRewritten() throws IOException assertThat(search.getHits().getTotalHits().value, equalTo(1L)); } - public void testKeywordFieldHighlighting() throws IOException { + public void testKeywordFieldHighlighting() throws IOException, InterruptedException { // check that keyword highlighting works XContentBuilder mappings = jsonBuilder(); mappings.startObject(); @@ -3205,6 +3274,7 @@ public void testKeywordFieldHighlighting() throws IOException { .setSource(jsonBuilder().startObject().field("keyword_field", "some text").endObject()) .get(); refresh(); + indexRandomForConcurrentSearch("test"); SearchResponse search = client().prepareSearch() .setSource( new SearchSourceBuilder().query(QueryBuilders.matchQuery("keyword_field", "some text")) @@ -3238,6 +3308,7 @@ public void testCopyToFields() throws Exception { .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) .get(); + indexRandomForConcurrentSearch("test"); SearchResponse response = client().prepareSearch() .setQuery(matchQuery("foo_copy", "brown")) .highlighter(new HighlightBuilder().field(new Field("foo_copy"))) @@ -3287,7 +3358,7 @@ public void testACopyFieldWithNestedQuery() throws Exception { ) .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) .get(); - + indexRandomForConcurrentSearch("test"); SearchResponse searchResponse = client().prepareSearch() .setQuery(nestedQuery("foo", matchQuery("foo.text", "brown cow"), ScoreMode.None)) .highlighter(new HighlightBuilder().field(new Field("foo_text").highlighterType("fvh")).requireFieldMatch(false)) @@ -3305,6 +3376,7 @@ public void testFunctionScoreQueryHighlight() throws Exception { .setSource(jsonBuilder().startObject().field("text", "brown").endObject()) .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) .get(); + indexRandomForConcurrentSearch("test"); SearchResponse searchResponse = client().prepareSearch() .setQuery(new FunctionScoreQueryBuilder(QueryBuilders.prefixQuery("text", "bro"))) @@ -3322,6 +3394,7 @@ public void testFiltersFunctionScoreQueryHighlight() throws Exception { .setSource(jsonBuilder().startObject().field("text", "brown").field("enable", "yes").endObject()) .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) .get(); + indexRandomForConcurrentSearch("test"); FunctionScoreQueryBuilder.FilterFunctionBuilder filterBuilder = new FunctionScoreQueryBuilder.FilterFunctionBuilder( QueryBuilders.termQuery("enable", "yes"), new RandomScoreFunctionBuilder() @@ -3420,6 +3493,7 @@ public void testWithNestedQuery() throws Exception { ) .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) .get(); + indexRandomForConcurrentSearch("test"); for (String type : new String[] { "unified", "plain" }) { SearchResponse searchResponse = client().prepareSearch() @@ -3477,6 +3551,7 @@ public void testWithNormalizer() throws Exception { .setSource("keyword", "Hello World") .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) .get(); + indexRandomForConcurrentSearch("test"); for (String highlighterType : new String[] { "unified", "plain" }) { SearchResponse searchResponse = client().prepareSearch() @@ -3499,6 +3574,7 @@ public void testDisableHighlightIdField() throws Exception { .setSource("keyword", "Hello World") .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) .get(); + indexRandomForConcurrentSearch("test"); for (String highlighterType : new String[] { "plain", "unified" }) { SearchResponse searchResponse = client().prepareSearch() diff --git a/server/src/main/java/org/opensearch/action/search/SearchQueryCategorizer.java b/server/src/main/java/org/opensearch/action/search/SearchQueryCategorizer.java index 9cbe2d2ffcb7d..8fe1be610f9af 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchQueryCategorizer.java +++ b/server/src/main/java/org/opensearch/action/search/SearchQueryCategorizer.java @@ -75,7 +75,7 @@ private void logQueryShape(QueryBuilder topLevelQueryBuilder) { } QueryShapeVisitor shapeVisitor = new QueryShapeVisitor(); topLevelQueryBuilder.visit(shapeVisitor); - log.debug("Query shape : {}", shapeVisitor.prettyPrintTree(" ")); + log.trace("Query shape : {}", shapeVisitor.prettyPrintTree(" ")); } } diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequest.java b/server/src/main/java/org/opensearch/action/search/SearchRequest.java index 9e50213eab5f9..fb026dae630b7 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequest.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequest.java @@ -256,7 +256,7 @@ public SearchRequest(StreamInput in) throws IOException { if (in.getVersion().onOrAfter(Version.V_2_7_0)) { pipeline = in.readOptionalString(); } - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { phaseTook = in.readOptionalBoolean(); } } @@ -290,7 +290,7 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_7_0)) { out.writeOptionalString(pipeline); } - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalBoolean(phaseTook); } } diff --git a/server/src/main/java/org/opensearch/action/search/SearchResponse.java b/server/src/main/java/org/opensearch/action/search/SearchResponse.java index 91f0dc0737637..96d07982d03db 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchResponse.java +++ b/server/src/main/java/org/opensearch/action/search/SearchResponse.java @@ -116,7 +116,7 @@ public SearchResponse(StreamInput in) throws IOException { clusters = new Clusters(in); scrollId = in.readOptionalString(); tookInMillis = in.readVLong(); - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { phaseTook = in.readOptionalWriteable(PhaseTook::new); } else { phaseTook = null; @@ -557,7 +557,7 @@ public void writeTo(StreamOutput out) throws IOException { clusters.writeTo(out); out.writeOptionalString(scrollId); out.writeVLong(tookInMillis); - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(phaseTook); } out.writeVInt(skippedShards); diff --git a/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java b/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java index a339852e6ed8d..987a3e3ffa7d3 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java @@ -638,6 +638,12 @@ public interface PersistedState extends Closeable { */ void setLastAcceptedState(ClusterState clusterState); + /** + * Returns the stats for the persistence layer for {@link CoordinationState}. + * @return PersistedStateStats + */ + PersistedStateStats getStats(); + /** * Marks the last accepted cluster state as committed. * After a successful call to this method, {@link #getLastAcceptedState()} should return the last cluster state that was set, diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index eb30460ca1b7f..a4ffab7fb70c9 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -56,6 +56,7 @@ import org.opensearch.cluster.service.ClusterApplier; import org.opensearch.cluster.service.ClusterApplier.ClusterApplyListener; import org.opensearch.cluster.service.ClusterManagerService; +import org.opensearch.cluster.service.ClusterStateStats; import org.opensearch.common.Booleans; import org.opensearch.common.Nullable; import org.opensearch.common.Priority; @@ -865,7 +866,16 @@ protected void doStart() { @Override public DiscoveryStats stats() { - return new DiscoveryStats(new PendingClusterStateStats(0, 0, 0), publicationHandler.stats()); + ClusterStateStats clusterStateStats = clusterManagerService.getClusterStateStats(); + ArrayList stats = new ArrayList<>(); + Stream.of(PersistedStateRegistry.PersistedStateType.values()).forEach(stateType -> { + if (persistedStateRegistry.getPersistedState(stateType) != null + && persistedStateRegistry.getPersistedState(stateType).getStats() != null) { + stats.add(persistedStateRegistry.getPersistedState(stateType).getStats()); + } + }); + clusterStateStats.setPersistenceStats(stats); + return new DiscoveryStats(new PendingClusterStateStats(0, 0, 0), publicationHandler.stats(), clusterStateStats); } @Override diff --git a/server/src/main/java/org/opensearch/cluster/coordination/InMemoryPersistedState.java b/server/src/main/java/org/opensearch/cluster/coordination/InMemoryPersistedState.java index 67ef82ee7b2e9..b77ede5471534 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/InMemoryPersistedState.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/InMemoryPersistedState.java @@ -65,6 +65,11 @@ public void setLastAcceptedState(ClusterState clusterState) { this.acceptedState = clusterState; } + @Override + public PersistedStateStats getStats() { + return null; + } + @Override public long getCurrentTerm() { return currentTerm; diff --git a/server/src/main/java/org/opensearch/cluster/coordination/PersistedStateStats.java b/server/src/main/java/org/opensearch/cluster/coordination/PersistedStateStats.java new file mode 100644 index 0000000000000..4d466c4b3ad73 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/coordination/PersistedStateStats.java @@ -0,0 +1,132 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.coordination; + +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentObject; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Persisted cluster state related stats. + * + * @opensearch.internal + */ +public class PersistedStateStats implements Writeable, ToXContentObject { + private final String statsName; + private AtomicLong totalTimeInMillis = new AtomicLong(0); + private AtomicLong failedCount = new AtomicLong(0); + private AtomicLong successCount = new AtomicLong(0); + private Map extendedFields = new HashMap<>(); // keeping minimal extensibility + + public PersistedStateStats(String statsName) { + this.statsName = statsName; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(statsName); + out.writeVLong(successCount.get()); + out.writeVLong(failedCount.get()); + out.writeVLong(totalTimeInMillis.get()); + if (extendedFields.size() > 0) { + out.writeBoolean(true); + out.writeVInt(extendedFields.size()); + for (Map.Entry extendedField : extendedFields.entrySet()) { + out.writeString(extendedField.getKey()); + out.writeVLong(extendedField.getValue().get()); + } + } else { + out.writeBoolean(false); + } + } + + public PersistedStateStats(StreamInput in) throws IOException { + this.statsName = in.readString(); + this.successCount = new AtomicLong(in.readVLong()); + this.failedCount = new AtomicLong(in.readVLong()); + this.totalTimeInMillis = new AtomicLong(in.readVLong()); + if (in.readBoolean()) { + int extendedFieldsSize = in.readVInt(); + this.extendedFields = new HashMap<>(); + for (int fieldNumber = 0; fieldNumber < extendedFieldsSize; fieldNumber++) { + extendedFields.put(in.readString(), new AtomicLong(in.readVLong())); + } + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(statsName); + builder.field(Fields.SUCCESS_COUNT, getSuccessCount()); + builder.field(Fields.FAILED_COUNT, getFailedCount()); + builder.field(Fields.TOTAL_TIME_IN_MILLIS, getTotalTimeInMillis()); + if (extendedFields.size() > 0) { + for (Map.Entry extendedField : extendedFields.entrySet()) { + builder.field(extendedField.getKey(), extendedField.getValue().get()); + } + } + builder.endObject(); + return builder; + } + + public void stateFailed() { + failedCount.incrementAndGet(); + } + + public void stateSucceeded() { + successCount.incrementAndGet(); + } + + /** + * Expects user to send time taken in milliseconds. + * + * @param timeTakenInUpload time taken in uploading the cluster state to remote + */ + public void stateTook(long timeTakenInUpload) { + totalTimeInMillis.addAndGet(timeTakenInUpload); + } + + public long getTotalTimeInMillis() { + return totalTimeInMillis.get(); + } + + public long getFailedCount() { + return failedCount.get(); + } + + public long getSuccessCount() { + return successCount.get(); + } + + protected void addToExtendedFields(String extendedField, AtomicLong extendedFieldValue) { + this.extendedFields.put(extendedField, extendedFieldValue); + } + + public String getStatsName() { + return statsName; + } + + /** + * Fields for parsing and toXContent + * + * @opensearch.internal + */ + static final class Fields { + static final String SUCCESS_COUNT = "success_count"; + static final String TOTAL_TIME_IN_MILLIS = "total_time_in_millis"; + static final String FAILED_COUNT = "failed_count"; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java b/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java index 8d76a39712ee3..78a22fe11f072 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java @@ -1252,6 +1252,7 @@ List getIndexSettingsValidationErrors( if (forbidPrivateIndexSettings) { validationErrors.addAll(validatePrivateSettingsNotExplicitlySet(settings, indexScopedSettings)); } + validateIndexReplicationTypeSettings(settings, clusterService.getClusterSettings()).ifPresent(validationErrors::add); if (indexName.isEmpty() || indexName.get().charAt(0) != '.') { // Apply aware replica balance validation only to non system indices int replicaCount = settings.getAsInt( @@ -1306,6 +1307,24 @@ private static List validateIndexCustomPath(Settings settings, @Nullable return validationErrors; } + /** + * Validates {@code index.replication.type} is not set if {@code cluster.restrict.index.replication_type} is set to true. + * + * @param requestSettings settings passed in during index create request + * @param clusterSettings cluster setting + */ + private static Optional validateIndexReplicationTypeSettings(Settings requestSettings, ClusterSettings clusterSettings) { + if (requestSettings.hasValue(SETTING_REPLICATION_TYPE) + && clusterSettings.get(IndicesService.CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING)) { + return Optional.of( + "index setting [index.replication.type] is not allowed to be set as [" + + IndicesService.CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING.getKey() + + "=true]" + ); + } + return Optional.empty(); + } + /** * Validates the settings and mappings for shrinking an index. * diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterStateStats.java b/server/src/main/java/org/opensearch/cluster/service/ClusterStateStats.java new file mode 100644 index 0000000000000..96683ce720d0b --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterStateStats.java @@ -0,0 +1,120 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.service; + +import org.opensearch.cluster.coordination.PersistedStateStats; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentObject; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Cluster state related stats. + * + * @opensearch.internal + */ +public class ClusterStateStats implements Writeable, ToXContentObject { + + private AtomicLong updateSuccess = new AtomicLong(0); + private AtomicLong updateTotalTimeInMillis = new AtomicLong(0); + private AtomicLong updateFailed = new AtomicLong(0); + private List persistenceStats = new ArrayList<>(); + + public ClusterStateStats() {} + + public long getUpdateSuccess() { + return updateSuccess.get(); + } + + public long getUpdateTotalTimeInMillis() { + return updateTotalTimeInMillis.get(); + } + + public long getUpdateFailed() { + return updateFailed.get(); + } + + public List getPersistenceStats() { + return persistenceStats; + } + + public void stateUpdated() { + updateSuccess.incrementAndGet(); + } + + public void stateUpdateFailed() { + updateFailed.incrementAndGet(); + } + + public void stateUpdateTook(long stateUpdateTime) { + updateTotalTimeInMillis.addAndGet(stateUpdateTime); + } + + public ClusterStateStats setPersistenceStats(List persistenceStats) { + this.persistenceStats = persistenceStats; + return this; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(updateSuccess.get()); + out.writeVLong(updateTotalTimeInMillis.get()); + out.writeVLong(updateFailed.get()); + out.writeVInt(persistenceStats.size()); + for (PersistedStateStats stats : persistenceStats) { + stats.writeTo(out); + } + } + + public ClusterStateStats(StreamInput in) throws IOException { + this.updateSuccess = new AtomicLong(in.readVLong()); + this.updateTotalTimeInMillis = new AtomicLong(in.readVLong()); + this.updateFailed = new AtomicLong(in.readVLong()); + int persistedStatsSize = in.readVInt(); + this.persistenceStats = new ArrayList<>(); + for (int statsNumber = 0; statsNumber < persistedStatsSize; statsNumber++) { + PersistedStateStats stats = new PersistedStateStats(in); + this.persistenceStats.add(stats); + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(Fields.CLUSTER_STATE_STATS); + builder.startObject(Fields.OVERALL); + builder.field(Fields.UPDATE_COUNT, getUpdateSuccess()); + builder.field(Fields.TOTAL_TIME_IN_MILLIS, getUpdateTotalTimeInMillis()); + builder.field(Fields.FAILED_COUNT, getUpdateFailed()); + builder.endObject(); + for (PersistedStateStats stats : persistenceStats) { + stats.toXContent(builder, params); + } + builder.endObject(); + return builder; + } + + /** + * Fields for parsing and toXContent + * + * @opensearch.internal + */ + static final class Fields { + static final String CLUSTER_STATE_STATS = "cluster_state_stats"; + static final String OVERALL = "overall"; + static final String UPDATE_COUNT = "update_count"; + static final String TOTAL_TIME_IN_MILLIS = "total_time_in_millis"; + static final String FAILED_COUNT = "failed_count"; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/service/MasterService.java b/server/src/main/java/org/opensearch/cluster/service/MasterService.java index 563b69dfd0e2a..07c3f93ae6486 100644 --- a/server/src/main/java/org/opensearch/cluster/service/MasterService.java +++ b/server/src/main/java/org/opensearch/cluster/service/MasterService.java @@ -112,7 +112,9 @@ public class MasterService extends AbstractLifecycleComponent { static final String CLUSTER_MANAGER_UPDATE_THREAD_NAME = "clusterManagerService#updateTask"; - /** @deprecated As of 2.2, because supporting inclusive language, replaced by {@link #CLUSTER_MANAGER_UPDATE_THREAD_NAME} */ + /** + * @deprecated As of 2.2, because supporting inclusive language, replaced by {@link #CLUSTER_MANAGER_UPDATE_THREAD_NAME} + */ @Deprecated static final String MASTER_UPDATE_THREAD_NAME = "masterService#updateTask"; @@ -130,6 +132,7 @@ public class MasterService extends AbstractLifecycleComponent { private volatile Batcher taskBatcher; protected final ClusterManagerTaskThrottler clusterManagerTaskThrottler; private final ClusterManagerThrottlingStats throttlingStats; + private final ClusterStateStats stateStats; public MasterService(Settings settings, ClusterSettings clusterSettings, ThreadPool threadPool) { this.nodeName = Objects.requireNonNull(Node.NODE_NAME_SETTING.get(settings)); @@ -147,6 +150,7 @@ public MasterService(Settings settings, ClusterSettings clusterSettings, ThreadP this::getMinNodeVersion, throttlingStats ); + this.stateStats = new ClusterStateStats(); this.threadPool = threadPool; } @@ -339,7 +343,7 @@ private TimeValue getTimeSince(long startTimeNanos) { return TimeValue.timeValueMillis(TimeValue.nsecToMSec(threadPool.preciseRelativeTimeInNanos() - startTimeNanos)); } - protected void publish(ClusterChangedEvent clusterChangedEvent, TaskOutputs taskOutputs, long startTimeMillis) { + protected void publish(ClusterChangedEvent clusterChangedEvent, TaskOutputs taskOutputs, long startTimeNanos) { final PlainActionFuture fut = new PlainActionFuture() { @Override protected boolean blockingAllowed() { @@ -352,8 +356,12 @@ protected boolean blockingAllowed() { try { FutureUtils.get(fut); onPublicationSuccess(clusterChangedEvent, taskOutputs); + final long durationMillis = getTimeSince(startTimeNanos).millis(); + stateStats.stateUpdateTook(durationMillis); + stateStats.stateUpdated(); } catch (Exception e) { - onPublicationFailed(clusterChangedEvent, taskOutputs, startTimeMillis, e); + stateStats.stateUpdateFailed(); + onPublicationFailed(clusterChangedEvent, taskOutputs, startTimeNanos, e); } } @@ -464,7 +472,6 @@ public Builder incrementVersion(ClusterState clusterState) { * @param source the source of the cluster state update task * @param updateTask the full context for the cluster state update * task - * */ public & ClusterStateTaskListener> void submitStateUpdateTask( String source, @@ -490,7 +497,6 @@ public & Cluster * @param listener callback after the cluster state update task * completes * @param the type of the cluster state update task state - * */ public void submitStateUpdateTask( String source, @@ -947,7 +953,7 @@ void onNoLongerClusterManager() { /** * Functionality for register task key to cluster manager node. * - * @param taskKey - task key of task + * @param taskKey - task key of task * @param throttlingEnabled - throttling is enabled for task or not i.e does data node perform retries on it or not * @return throttling task key which needs to be passed while submitting task to cluster manager */ @@ -966,7 +972,6 @@ public ClusterManagerTaskThrottler.ThrottlingKey registerClusterManagerTask(Stri * that share the same executor will be executed * batches on this executor * @param the type of the cluster state update task state - * */ public void submitStateUpdateTasks( final String source, @@ -996,4 +1001,8 @@ public void submitStateUpdateTasks( } } + public ClusterStateStats getClusterStateStats() { + return stateStats; + } + } diff --git a/server/src/main/java/org/opensearch/common/blobstore/stream/write/WritePriority.java b/server/src/main/java/org/opensearch/common/blobstore/stream/write/WritePriority.java index b8c0b52f93a3c..3f341c878c3c7 100644 --- a/server/src/main/java/org/opensearch/common/blobstore/stream/write/WritePriority.java +++ b/server/src/main/java/org/opensearch/common/blobstore/stream/write/WritePriority.java @@ -15,5 +15,6 @@ */ public enum WritePriority { NORMAL, - HIGH + HIGH, + URGENT } diff --git a/server/src/main/java/org/opensearch/common/rounding/DateTimeUnit.java b/server/src/main/java/org/opensearch/common/rounding/DateTimeUnit.java deleted file mode 100644 index 47e182b3caf84..0000000000000 --- a/server/src/main/java/org/opensearch/common/rounding/DateTimeUnit.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - -package org.opensearch.common.rounding; - -import org.opensearch.OpenSearchException; -import org.opensearch.common.joda.Joda; -import org.joda.time.DateTimeField; -import org.joda.time.DateTimeZone; -import org.joda.time.chrono.ISOChronology; - -import java.util.function.Function; - -/** - * Main date time unit class. - * - * @opensearch.internal - */ -public enum DateTimeUnit { - - WEEK_OF_WEEKYEAR((byte) 1, tz -> ISOChronology.getInstance(tz).weekOfWeekyear()), - YEAR_OF_CENTURY((byte) 2, tz -> ISOChronology.getInstance(tz).yearOfCentury()), - QUARTER((byte) 3, tz -> Joda.QuarterOfYear.getField(ISOChronology.getInstance(tz))), - MONTH_OF_YEAR((byte) 4, tz -> ISOChronology.getInstance(tz).monthOfYear()), - DAY_OF_MONTH((byte) 5, tz -> ISOChronology.getInstance(tz).dayOfMonth()), - HOUR_OF_DAY((byte) 6, tz -> ISOChronology.getInstance(tz).hourOfDay()), - MINUTES_OF_HOUR((byte) 7, tz -> ISOChronology.getInstance(tz).minuteOfHour()), - SECOND_OF_MINUTE((byte) 8, tz -> ISOChronology.getInstance(tz).secondOfMinute()); - - private final byte id; - private final Function fieldFunction; - - DateTimeUnit(byte id, Function fieldFunction) { - this.id = id; - this.fieldFunction = fieldFunction; - } - - public byte id() { - return id; - } - - /** - * @return the {@link DateTimeField} for the provided {@link DateTimeZone} for this time unit - */ - public DateTimeField field(DateTimeZone tz) { - return fieldFunction.apply(tz); - } - - public static DateTimeUnit resolve(byte id) { - switch (id) { - case 1: - return WEEK_OF_WEEKYEAR; - case 2: - return YEAR_OF_CENTURY; - case 3: - return QUARTER; - case 4: - return MONTH_OF_YEAR; - case 5: - return DAY_OF_MONTH; - case 6: - return HOUR_OF_DAY; - case 7: - return MINUTES_OF_HOUR; - case 8: - return SECOND_OF_MINUTE; - default: - throw new OpenSearchException("Unknown date time unit id [" + id + "]"); - } - } -} diff --git a/server/src/main/java/org/opensearch/common/rounding/Rounding.java b/server/src/main/java/org/opensearch/common/rounding/Rounding.java deleted file mode 100644 index 41e808b64f7d9..0000000000000 --- a/server/src/main/java/org/opensearch/common/rounding/Rounding.java +++ /dev/null @@ -1,459 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - -package org.opensearch.common.rounding; - -import org.opensearch.OpenSearchException; -import org.opensearch.common.unit.TimeValue; -import org.opensearch.core.common.io.stream.StreamInput; -import org.opensearch.core.common.io.stream.StreamOutput; -import org.opensearch.core.common.io.stream.Writeable; -import org.joda.time.DateTimeField; -import org.joda.time.DateTimeZone; -import org.joda.time.IllegalInstantException; - -import java.io.IOException; -import java.util.Objects; - -/** - * A strategy for rounding long values. - *

- * Use the java based Rounding class where applicable - * - * @opensearch.internal - */ -@Deprecated -public abstract class Rounding implements Writeable { - - public abstract byte id(); - - /** - * Rounds the given value. - */ - public abstract long round(long value); - - /** - * Given the rounded value (which was potentially generated by {@link #round(long)}, returns the next rounding value. For example, with - * interval based rounding, if the interval is 3, {@code nextRoundValue(6) = 9 }. - * - * @param value The current rounding value - * @return The next rounding value; - */ - public abstract long nextRoundingValue(long value); - - @Override - public abstract boolean equals(Object obj); - - @Override - public abstract int hashCode(); - - public static Builder builder(DateTimeUnit unit) { - return new Builder(unit); - } - - public static Builder builder(TimeValue interval) { - return new Builder(interval); - } - - /** - * Builder for rounding - * - * @opensearch.internal - */ - public static class Builder { - - private final DateTimeUnit unit; - private final long interval; - - private DateTimeZone timeZone = DateTimeZone.UTC; - - public Builder(DateTimeUnit unit) { - this.unit = unit; - this.interval = -1; - } - - public Builder(TimeValue interval) { - this.unit = null; - if (interval.millis() < 1) throw new IllegalArgumentException("Zero or negative time interval not supported"); - this.interval = interval.millis(); - } - - public Builder timeZone(DateTimeZone timeZone) { - if (timeZone == null) { - throw new IllegalArgumentException("Setting null as timezone is not supported"); - } - this.timeZone = timeZone; - return this; - } - - public Rounding build() { - Rounding timeZoneRounding; - if (unit != null) { - timeZoneRounding = new TimeUnitRounding(unit, timeZone); - } else { - timeZoneRounding = new TimeIntervalRounding(interval, timeZone); - } - return timeZoneRounding; - } - } - - /** - * Rounding time units - * - * @opensearch.internal - */ - static class TimeUnitRounding extends Rounding { - - static final byte ID = 1; - - private final DateTimeUnit unit; - private final DateTimeField field; - private final DateTimeZone timeZone; - private final boolean unitRoundsToMidnight; - - TimeUnitRounding(DateTimeUnit unit, DateTimeZone timeZone) { - this.unit = unit; - this.field = unit.field(timeZone); - unitRoundsToMidnight = this.field.getDurationField().getUnitMillis() > 60L * 60L * 1000L; - this.timeZone = timeZone; - } - - TimeUnitRounding(StreamInput in) throws IOException { - unit = DateTimeUnit.resolve(in.readByte()); - timeZone = DateTimeZone.forID(in.readString()); - field = unit.field(timeZone); - unitRoundsToMidnight = field.getDurationField().getUnitMillis() > 60L * 60L * 1000L; - } - - @Override - public byte id() { - return ID; - } - - /** - * @return The latest timestamp T which is strictly before utcMillis - * and such that timeZone.getOffset(T) != timeZone.getOffset(utcMillis). - * If there is no such T, returns Long.MAX_VALUE. - */ - private long previousTransition(long utcMillis) { - final int offsetAtInputTime = timeZone.getOffset(utcMillis); - do { - // Some timezones have transitions that do not change the offset, so we have to - // repeatedly call previousTransition until a nontrivial transition is found. - - long previousTransition = timeZone.previousTransition(utcMillis); - if (previousTransition == utcMillis) { - // There are no earlier transitions - return Long.MAX_VALUE; - } - assert previousTransition < utcMillis; // Progress was made - utcMillis = previousTransition; - } while (timeZone.getOffset(utcMillis) == offsetAtInputTime); - - return utcMillis; - } - - @Override - public long round(long utcMillis) { - - // field.roundFloor() works as long as the offset doesn't change. It is worth getting this case out of the way first, as - // the calculations for fixing things near to offset changes are a little expensive and are unnecessary in the common case - // of working in UTC. - if (timeZone.isFixed()) { - return field.roundFloor(utcMillis); - } - - // When rounding to hours we consider any local time of the form 'xx:00:00' as rounded, even though this gives duplicate - // bucket names for the times when the clocks go back. Shorter units behave similarly. However, longer units round down to - // midnight, and on the days where there are two midnights we would rather pick the earlier one, so that buckets are - // uniquely identified by the date. - if (unitRoundsToMidnight) { - final long anyLocalStartOfDay = field.roundFloor(utcMillis); - // `anyLocalStartOfDay` is _supposed_ to be the Unix timestamp for the start of the day in question in the current time - // zone. Mostly this just means "midnight", which is fine, and on days with no local midnight it's the first time that - // does occur on that day which is also ok. However, on days with >1 local midnight this is _one_ of the midnights, but - // may not be the first. Check whether this is happening, and fix it if so. - - final long previousTransition = previousTransition(anyLocalStartOfDay); - - if (previousTransition == Long.MAX_VALUE) { - // No previous transitions, so there can't be another earlier local midnight. - return anyLocalStartOfDay; - } - - final long currentOffset = timeZone.getOffset(anyLocalStartOfDay); - final long previousOffset = timeZone.getOffset(previousTransition); - assert currentOffset != previousOffset; - - // NB we only assume interference from one previous transition. It's theoretically possible to have two transitions in - // quick succession, both of which have a midnight in them, but this doesn't appear to happen in the TZDB so (a) it's - // pointless to implement and (b) it won't be tested. I recognise that this comment is tempting fate and will likely - // cause this very situation to occur in the near future, and eagerly look forward to fixing this using a loop over - // previous transitions when it happens. - - final long alsoLocalStartOfDay = anyLocalStartOfDay + currentOffset - previousOffset; - // `alsoLocalStartOfDay` is the Unix timestamp for the start of the day in question if the previous offset were in - // effect. - - if (alsoLocalStartOfDay <= previousTransition) { - // Therefore the previous offset _is_ in effect at `alsoLocalStartOfDay`, and it's earlier than anyLocalStartOfDay, - // so this is the answer to use. - return alsoLocalStartOfDay; - } else { - // The previous offset is not in effect at `alsoLocalStartOfDay`, so the current offset must be. - return anyLocalStartOfDay; - } - - } else { - do { - long rounded = field.roundFloor(utcMillis); - - // field.roundFloor() mostly works as long as the offset hasn't changed in [rounded, utcMillis], so look at where - // the offset most recently changed. - - final long previousTransition = previousTransition(utcMillis); - - if (previousTransition == Long.MAX_VALUE || previousTransition < rounded) { - // The offset did not change in [rounded, utcMillis], so roundFloor() worked as expected. - return rounded; - } - - // The offset _did_ change in [rounded, utcMillis]. Put differently, this means that none of the times in - // [previousTransition+1, utcMillis] were rounded, so the rounded time must be <= previousTransition. This means - // it's sufficient to try and round previousTransition down. - assert previousTransition < utcMillis; - utcMillis = previousTransition; - } while (true); - } - } - - @Override - public long nextRoundingValue(long utcMillis) { - long floor = round(utcMillis); - // add one unit and round to get to next rounded value - long next = round(field.add(floor, 1)); - if (next == floor) { - // in rare case we need to add more than one unit - next = round(field.add(floor, 2)); - } - return next; - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeByte(unit.id()); - out.writeString(timeZone.getID()); - } - - @Override - public int hashCode() { - return Objects.hash(unit, timeZone); - } - - @Override - public boolean equals(Object obj) { - if (obj == null) { - return false; - } - if (getClass() != obj.getClass()) { - return false; - } - TimeUnitRounding other = (TimeUnitRounding) obj; - return Objects.equals(unit, other.unit) && Objects.equals(timeZone, other.timeZone); - } - - @Override - public String toString() { - return "[" + timeZone + "][" + unit + "]"; - } - } - - /** - * Rounding time intervals - * - * @opensearch.internal - */ - static class TimeIntervalRounding extends Rounding { - - static final byte ID = 2; - - private final long interval; - private final DateTimeZone timeZone; - - TimeIntervalRounding(long interval, DateTimeZone timeZone) { - if (interval < 1) throw new IllegalArgumentException("Zero or negative time interval not supported"); - this.interval = interval; - this.timeZone = timeZone; - } - - TimeIntervalRounding(StreamInput in) throws IOException { - interval = in.readVLong(); - timeZone = DateTimeZone.forID(in.readString()); - } - - @Override - public byte id() { - return ID; - } - - @Override - public long round(long utcMillis) { - long timeLocal = timeZone.convertUTCToLocal(utcMillis); - long rounded = roundKey(timeLocal, interval) * interval; - long roundedUTC; - if (isInDSTGap(rounded) == false) { - roundedUTC = timeZone.convertLocalToUTC(rounded, true, utcMillis); - // check if we crossed DST transition, in this case we want the - // last rounded value before the transition - long transition = timeZone.previousTransition(utcMillis); - if (transition != utcMillis && transition > roundedUTC) { - roundedUTC = round(transition - 1); - } - } else { - /* - * Edge case where the rounded local time is illegal and landed - * in a DST gap. In this case, we choose 1ms tick after the - * transition date. We don't want the transition date itself - * because those dates, when rounded themselves, fall into the - * previous interval. This would violate the invariant that the - * rounding operation should be idempotent. - */ - roundedUTC = timeZone.previousTransition(utcMillis) + 1; - } - return roundedUTC; - } - - private static long roundKey(long value, long interval) { - if (value < 0) { - return (value - interval + 1) / interval; - } else { - return value / interval; - } - } - - /** - * Determine whether the local instant is a valid instant in the given - * time zone. The logic for this is taken from - * {@link DateTimeZone#convertLocalToUTC(long, boolean)} for the - * `strict` mode case, but instead of throwing an - * {@link IllegalInstantException}, which is costly, we want to return a - * flag indicating that the value is illegal in that time zone. - */ - private boolean isInDSTGap(long instantLocal) { - if (timeZone.isFixed()) { - return false; - } - // get the offset at instantLocal (first estimate) - int offsetLocal = timeZone.getOffset(instantLocal); - // adjust instantLocal using the estimate and recalc the offset - int offset = timeZone.getOffset(instantLocal - offsetLocal); - // if the offsets differ, we must be near a DST boundary - if (offsetLocal != offset) { - // determine if we are in the DST gap - long nextLocal = timeZone.nextTransition(instantLocal - offsetLocal); - if (nextLocal == (instantLocal - offsetLocal)) { - nextLocal = Long.MAX_VALUE; - } - long nextAdjusted = timeZone.nextTransition(instantLocal - offset); - if (nextAdjusted == (instantLocal - offset)) { - nextAdjusted = Long.MAX_VALUE; - } - if (nextLocal != nextAdjusted) { - // we are in the DST gap - return true; - } - } - return false; - } - - @Override - public long nextRoundingValue(long time) { - long timeLocal = time; - timeLocal = timeZone.convertUTCToLocal(time); - long next = timeLocal + interval; - return timeZone.convertLocalToUTC(next, false); - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeVLong(interval); - out.writeString(timeZone.getID()); - } - - @Override - public int hashCode() { - return Objects.hash(interval, timeZone); - } - - @Override - public boolean equals(Object obj) { - if (obj == null) { - return false; - } - if (getClass() != obj.getClass()) { - return false; - } - TimeIntervalRounding other = (TimeIntervalRounding) obj; - return Objects.equals(interval, other.interval) && Objects.equals(timeZone, other.timeZone); - } - } - - /** - * Rounding streams - * - * @opensearch.internal - */ - public static class Streams { - - public static void write(Rounding rounding, StreamOutput out) throws IOException { - out.writeByte(rounding.id()); - rounding.writeTo(out); - } - - public static Rounding read(StreamInput in) throws IOException { - Rounding rounding; - byte id = in.readByte(); - switch (id) { - case TimeUnitRounding.ID: - rounding = new TimeUnitRounding(in); - break; - case TimeIntervalRounding.ID: - rounding = new TimeIntervalRounding(in); - break; - default: - throw new OpenSearchException("unknown rounding id [" + id + "]"); - } - return rounding; - } - - } - -} diff --git a/server/src/main/java/org/opensearch/common/rounding/package-info.java b/server/src/main/java/org/opensearch/common/rounding/package-info.java deleted file mode 100644 index 5fa3e39c6a786..0000000000000 --- a/server/src/main/java/org/opensearch/common/rounding/package-info.java +++ /dev/null @@ -1,10 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -/** Base DateTime rounding package. */ -package org.opensearch.common.rounding; diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 98e36686e7cf2..803745c89ecfe 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -685,13 +685,15 @@ public void apply(Settings value, Settings current, Settings previous) { RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING, RemoteClusterStateService.INDEX_METADATA_UPLOAD_TIMEOUT_SETTING, RemoteClusterStateService.GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, + RemoteClusterStateService.METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING, RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING, IndicesService.CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING, IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING, AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE, CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE, CPUBasedAdmissionControllerSettings.INDEXING_CPU_USAGE_LIMIT, - CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT + CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT, + IndicesService.CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING ) ) ); diff --git a/server/src/main/java/org/opensearch/discovery/DiscoveryStats.java b/server/src/main/java/org/opensearch/discovery/DiscoveryStats.java index 665ecf77d7aa7..fb341ac2ac569 100644 --- a/server/src/main/java/org/opensearch/discovery/DiscoveryStats.java +++ b/server/src/main/java/org/opensearch/discovery/DiscoveryStats.java @@ -32,8 +32,10 @@ package org.opensearch.discovery; +import org.opensearch.Version; import org.opensearch.cluster.coordination.PendingClusterStateStats; import org.opensearch.cluster.coordination.PublishClusterStateStats; +import org.opensearch.cluster.service.ClusterStateStats; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; @@ -51,21 +53,31 @@ public class DiscoveryStats implements Writeable, ToXContentFragment { private final PendingClusterStateStats queueStats; private final PublishClusterStateStats publishStats; + private final ClusterStateStats clusterStateStats; - public DiscoveryStats(PendingClusterStateStats queueStats, PublishClusterStateStats publishStats) { + public DiscoveryStats(PendingClusterStateStats queueStats, PublishClusterStateStats publishStats, ClusterStateStats clusterStateStats) { this.queueStats = queueStats; this.publishStats = publishStats; + this.clusterStateStats = clusterStateStats; } public DiscoveryStats(StreamInput in) throws IOException { queueStats = in.readOptionalWriteable(PendingClusterStateStats::new); publishStats = in.readOptionalWriteable(PublishClusterStateStats::new); + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { + clusterStateStats = in.readOptionalWriteable(ClusterStateStats::new); + } else { + clusterStateStats = null; + } } @Override public void writeTo(StreamOutput out) throws IOException { out.writeOptionalWriteable(queueStats); out.writeOptionalWriteable(publishStats); + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { + out.writeOptionalWriteable(clusterStateStats); + } } @Override @@ -77,6 +89,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (publishStats != null) { publishStats.toXContent(builder, params); } + if (clusterStateStats != null) { + clusterStateStats.toXContent(builder, params); + } builder.endObject(); return builder; } @@ -92,4 +107,8 @@ public PendingClusterStateStats getQueueStats() { public PublishClusterStateStats getPublishStats() { return publishStats; } + + public ClusterStateStats getClusterStateStats() { + return clusterStateStats; + } } diff --git a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java index f855449c708d2..c3056276706a0 100644 --- a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java +++ b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java @@ -47,6 +47,7 @@ import org.opensearch.cluster.coordination.InMemoryPersistedState; import org.opensearch.cluster.coordination.PersistedStateRegistry; import org.opensearch.cluster.coordination.PersistedStateRegistry.PersistedStateType; +import org.opensearch.cluster.coordination.PersistedStateStats; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.IndexTemplateMetadata; import org.opensearch.cluster.metadata.Manifest; @@ -615,6 +616,12 @@ public void setLastAcceptedState(ClusterState clusterState) { lastAcceptedState = clusterState; } + @Override + public PersistedStateStats getStats() { + // Note: These stats are not published yet, will come in future + return null; + } + private PersistedClusterStateService.Writer getWriterSafe() { final PersistedClusterStateService.Writer writer = persistenceWriter.get(); if (writer == null) { @@ -688,24 +695,21 @@ public void setLastAcceptedState(ClusterState clusterState) { try { final ClusterMetadataManifest manifest; if (shouldWriteFullClusterState(clusterState)) { - if (clusterState.metadata().clusterUUIDCommitted() == true) { - final Optional latestManifest = remoteClusterStateService.getLatestClusterMetadataManifest( - clusterState.getClusterName().value(), + final Optional latestManifest = remoteClusterStateService.getLatestClusterMetadataManifest( + clusterState.getClusterName().value(), + clusterState.metadata().clusterUUID() + ); + if (latestManifest.isPresent()) { + // The previous UUID should not change for the current UUID. So fetching the latest manifest + // from remote store and getting the previous UUID. + previousClusterUUID = latestManifest.get().getPreviousClusterUUID(); + } else { + // When the user starts the cluster with remote state disabled but later enables the remote state, + // there will not be any manifest for the current cluster UUID. + logger.error( + "Latest manifest is not present in remote store for cluster UUID: {}", clusterState.metadata().clusterUUID() ); - if (latestManifest.isPresent()) { - // The previous UUID should not change for the current UUID. So fetching the latest manifest - // from remote store and getting the previous UUID. - previousClusterUUID = latestManifest.get().getPreviousClusterUUID(); - } else { - // When the user starts the cluster with remote state disabled but later enables the remote state, - // there will not be any manifest for the current cluster UUID. - logger.error( - "Latest manifest is not present in remote store for cluster UUID: {}", - clusterState.metadata().clusterUUID() - ); - previousClusterUUID = ClusterState.UNKNOWN_UUID; - } } manifest = remoteClusterStateService.writeFullMetadata(clusterState, previousClusterUUID); } else { @@ -717,10 +721,16 @@ assert verifyManifestAndClusterState(lastAcceptedManifest, lastAcceptedState) == lastAcceptedManifest = manifest; lastAcceptedState = clusterState; } catch (Exception e) { + remoteClusterStateService.writeMetadataFailed(); handleExceptionOnWrite(e); } } + @Override + public PersistedStateStats getStats() { + return remoteClusterStateService.getStats(); + } + private boolean verifyManifestAndClusterState(ClusterMetadataManifest manifest, ClusterState clusterState) { assert manifest != null : "ClusterMetadataManifest is null"; assert clusterState != null : "ClusterState is null"; diff --git a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java index 97b37d9532f85..4725f40076ce2 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java +++ b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java @@ -262,7 +262,7 @@ public ClusterMetadataManifest(StreamInput in) throws IOException { this.indices = Collections.unmodifiableList(in.readList(UploadedIndexMetadata::new)); this.previousClusterUUID = in.readString(); this.clusterUUIDCommitted = in.readBoolean(); - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { this.codecVersion = in.readInt(); this.globalMetadataFileName = in.readString(); } else { @@ -316,7 +316,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeCollection(indices); out.writeString(previousClusterUUID); out.writeBoolean(clusterUUIDCommitted); - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeInt(codecVersion); out.writeString(globalMetadataFileName); } diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index ae4a3fab9852d..c892b475d71da 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -87,6 +87,8 @@ public class RemoteClusterStateService implements Closeable { public static final TimeValue GLOBAL_METADATA_UPLOAD_TIMEOUT_DEFAULT = TimeValue.timeValueMillis(20000); + public static final TimeValue METADATA_MANIFEST_UPLOAD_TIMEOUT_DEFAULT = TimeValue.timeValueMillis(20000); + public static final Setting INDEX_METADATA_UPLOAD_TIMEOUT_SETTING = Setting.timeSetting( "cluster.remote_store.state.index_metadata.upload_timeout", INDEX_METADATA_UPLOAD_TIMEOUT_DEFAULT, @@ -101,6 +103,13 @@ public class RemoteClusterStateService implements Closeable { Setting.Property.NodeScope ); + public static final Setting METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING = Setting.timeSetting( + "cluster.remote_store.state.metadata_manifest.upload_timeout", + METADATA_MANIFEST_UPLOAD_TIMEOUT_DEFAULT, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + public static final ChecksumBlobStoreFormat INDEX_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( "index-metadata", METADATA_NAME_FORMAT, @@ -157,9 +166,10 @@ public class RemoteClusterStateService implements Closeable { private volatile TimeValue indexMetadataUploadTimeout; private volatile TimeValue globalMetadataUploadTimeout; + private volatile TimeValue metadataManifestUploadTimeout; private final AtomicBoolean deleteStaleMetadataRunning = new AtomicBoolean(false); - + private final RemotePersistenceStats remoteStateStats; public static final int INDEX_METADATA_CURRENT_CODEC_VERSION = 1; public static final int MANIFEST_CURRENT_CODEC_VERSION = ClusterMetadataManifest.CODEC_V1; public static final int GLOBAL_METADATA_CURRENT_CODEC_VERSION = 1; @@ -190,9 +200,12 @@ public RemoteClusterStateService( this.slowWriteLoggingThreshold = clusterSettings.get(SLOW_WRITE_LOGGING_THRESHOLD); this.indexMetadataUploadTimeout = clusterSettings.get(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING); this.globalMetadataUploadTimeout = clusterSettings.get(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING); + this.metadataManifestUploadTimeout = clusterSettings.get(METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING); clusterSettings.addSettingsUpdateConsumer(SLOW_WRITE_LOGGING_THRESHOLD, this::setSlowWriteLoggingThreshold); clusterSettings.addSettingsUpdateConsumer(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING, this::setIndexMetadataUploadTimeout); clusterSettings.addSettingsUpdateConsumer(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, this::setGlobalMetadataUploadTimeout); + clusterSettings.addSettingsUpdateConsumer(METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING, this::setMetadataManifestUploadTimeout); + this.remoteStateStats = new RemotePersistenceStats(); } private BlobStoreTransferService getBlobStoreTransferService() { @@ -233,6 +246,8 @@ public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, Stri false ); final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos); + remoteStateStats.stateSucceeded(); + remoteStateStats.stateTook(durationMillis); if (durationMillis >= slowWriteLoggingThreshold.getMillis()) { logger.warn( "writing cluster state took [{}ms] which is above the warn threshold of [{}]; " + "wrote full state with [{}] indices", @@ -241,9 +256,8 @@ public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, Stri allUploadedIndexMetadata.size() ); } else { - // todo change to debug logger.info( - "writing cluster state took [{}ms]; " + "wrote full state with [{}] indices", + "writing cluster state took [{}ms]; " + "wrote full state with [{}] indices and global metadata", durationMillis, allUploadedIndexMetadata.size() ); @@ -282,6 +296,7 @@ public ClusterMetadataManifest writeIncrementalMetadata( if (updateGlobalMetadata || previousManifest.getGlobalMetadataFileName() == null) { globalMetadataFile = writeGlobalMetadata(clusterState); } else { + logger.debug("Global metadata has not updated in cluster state, skipping upload of it"); globalMetadataFile = previousManifest.getGlobalMetadataFileName(); } @@ -302,7 +317,7 @@ public ClusterMetadataManifest writeIncrementalMetadata( for (final IndexMetadata indexMetadata : clusterState.metadata().indices().values()) { final Long previousVersion = previousStateIndexMetadataVersionByName.get(indexMetadata.getIndex().getName()); if (previousVersion == null || indexMetadata.getVersion() != previousVersion) { - logger.trace( + logger.debug( "updating metadata for [{}], changing version from [{}] to [{}]", indexMetadata.getIndex(), previousVersion, @@ -334,21 +349,27 @@ public ClusterMetadataManifest writeIncrementalMetadata( deleteStaleClusterMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID(), RETAINED_MANIFESTS); final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos); + remoteStateStats.stateSucceeded(); + remoteStateStats.stateTook(durationMillis); if (durationMillis >= slowWriteLoggingThreshold.getMillis()) { logger.warn( "writing cluster state took [{}ms] which is above the warn threshold of [{}]; " - + "wrote metadata for [{}] indices and skipped [{}] unchanged indices", + + "wrote metadata for [{}] indices and skipped [{}] unchanged indices, global metadata updated : [{}]", durationMillis, slowWriteLoggingThreshold, numIndicesUpdated, - numIndicesUnchanged + numIndicesUnchanged, + updateGlobalMetadata ); } else { - logger.trace( - "writing cluster state took [{}ms]; " + "wrote metadata for [{}] indices and skipped [{}] unchanged indices", + logger.info( + "writing cluster state for version [{}] took [{}ms]; " + + "wrote metadata for [{}] indices and skipped [{}] unchanged indices, global metadata updated : [{}]", + manifest.getStateVersion(), durationMillis, numIndicesUpdated, - numIndicesUnchanged + numIndicesUnchanged, + updateGlobalMetadata ); } return manifest; @@ -364,6 +385,8 @@ public ClusterMetadataManifest writeIncrementalMetadata( private String writeGlobalMetadata(ClusterState clusterState) throws IOException { AtomicReference result = new AtomicReference(); + AtomicReference exceptionReference = new AtomicReference(); + final BlobContainer globalMetadataContainer = globalMetadataContainer( clusterState.getClusterName().value(), clusterState.metadata().clusterUUID() @@ -376,9 +399,9 @@ private String writeGlobalMetadata(ClusterState clusterState) throws IOException LatchedActionListener completionListener = new LatchedActionListener<>(ActionListener.wrap(resp -> { logger.trace(String.format(Locale.ROOT, "GlobalMetadata uploaded successfully.")); result.set(globalMetadataContainer.path().buildAsString() + globalMetadataFilename); - }, ex -> { throw new GlobalMetadataTransferException(ex.getMessage(), ex); }), latch); + }, ex -> { exceptionReference.set(ex); }), latch); - GLOBAL_METADATA_FORMAT.writeAsync( + GLOBAL_METADATA_FORMAT.writeAsyncWithUrgentPriority( clusterState.metadata(), globalMetadataContainer, globalMetadataFilename, @@ -390,20 +413,22 @@ private String writeGlobalMetadata(ClusterState clusterState) throws IOException try { if (latch.await(getGlobalMetadataUploadTimeout().millis(), TimeUnit.MILLISECONDS) == false) { // TODO: We should add metrics where transfer is timing out. [Issue: #10687] - GlobalMetadataTransferException ex = new GlobalMetadataTransferException( + RemoteStateTransferException ex = new RemoteStateTransferException( String.format(Locale.ROOT, "Timed out waiting for transfer of global metadata to complete") ); throw ex; } } catch (InterruptedException ex) { - GlobalMetadataTransferException exception = new GlobalMetadataTransferException( + RemoteStateTransferException exception = new RemoteStateTransferException( String.format(Locale.ROOT, "Timed out waiting for transfer of global metadata to complete - %s"), ex ); Thread.currentThread().interrupt(); throw exception; } - + if (exceptionReference.get() != null) { + throw new RemoteStateTransferException(exceptionReference.get().getMessage(), exceptionReference.get()); + } return result.get(); } @@ -427,7 +452,7 @@ private List writeIndexMetadataParallel(ClusterState clus ); result.add(uploadedIndexMetadata); }, ex -> { - assert ex instanceof IndexMetadataTransferException; + assert ex instanceof RemoteStateTransferException; logger.error( () -> new ParameterizedMessage("Exception during transfer of IndexMetadata to Remote {}", ex.getMessage()), ex @@ -444,7 +469,7 @@ private List writeIndexMetadataParallel(ClusterState clus try { if (latch.await(getIndexMetadataUploadTimeout().millis(), TimeUnit.MILLISECONDS) == false) { - IndexMetadataTransferException ex = new IndexMetadataTransferException( + RemoteStateTransferException ex = new RemoteStateTransferException( String.format( Locale.ROOT, "Timed out waiting for transfer of index metadata to complete - %s", @@ -456,7 +481,7 @@ private List writeIndexMetadataParallel(ClusterState clus } } catch (InterruptedException ex) { exceptionList.forEach(ex::addSuppressed); - IndexMetadataTransferException exception = new IndexMetadataTransferException( + RemoteStateTransferException exception = new RemoteStateTransferException( String.format( Locale.ROOT, "Timed out waiting for transfer of index metadata to complete - %s", @@ -468,7 +493,7 @@ private List writeIndexMetadataParallel(ClusterState clus throw exception; } if (exceptionList.size() > 0) { - IndexMetadataTransferException exception = new IndexMetadataTransferException( + RemoteStateTransferException exception = new RemoteStateTransferException( String.format( Locale.ROOT, "Exception during transfer of IndexMetadata to Remote %s", @@ -507,10 +532,10 @@ private void writeIndexMetadataAsync( indexMetadataContainer.path().buildAsString() + indexMetadataFilename ) ), - ex -> latchedActionListener.onFailure(new IndexMetadataTransferException(indexMetadata.getIndex().toString(), ex)) + ex -> latchedActionListener.onFailure(new RemoteStateTransferException(indexMetadata.getIndex().toString(), ex)) ); - INDEX_METADATA_FORMAT.writeAsync( + INDEX_METADATA_FORMAT.writeAsyncWithUrgentPriority( indexMetadata, indexMetadataContainer, indexMetadataFilename, @@ -588,14 +613,50 @@ private ClusterMetadataManifest uploadManifest( private void writeMetadataManifest(String clusterName, String clusterUUID, ClusterMetadataManifest uploadManifest, String fileName) throws IOException { + AtomicReference result = new AtomicReference(); + AtomicReference exceptionReference = new AtomicReference(); + final BlobContainer metadataManifestContainer = manifestContainer(clusterName, clusterUUID); - CLUSTER_METADATA_MANIFEST_FORMAT.write( + + // latch to wait until upload is not finished + CountDownLatch latch = new CountDownLatch(1); + + LatchedActionListener completionListener = new LatchedActionListener<>(ActionListener.wrap(resp -> { + logger.trace(String.format(Locale.ROOT, "Manifest file uploaded successfully.")); + }, ex -> { exceptionReference.set(ex); }), latch); + + CLUSTER_METADATA_MANIFEST_FORMAT.writeAsyncWithUrgentPriority( uploadManifest, metadataManifestContainer, fileName, blobStoreRepository.getCompressor(), + completionListener, FORMAT_PARAMS ); + + try { + if (latch.await(getMetadataManifestUploadTimeout().millis(), TimeUnit.MILLISECONDS) == false) { + RemoteStateTransferException ex = new RemoteStateTransferException( + String.format(Locale.ROOT, "Timed out waiting for transfer of manifest file to complete") + ); + throw ex; + } + } catch (InterruptedException ex) { + RemoteStateTransferException exception = new RemoteStateTransferException( + String.format(Locale.ROOT, "Timed out waiting for transfer of manifest file to complete - %s"), + ex + ); + Thread.currentThread().interrupt(); + throw exception; + } + if (exceptionReference.get() != null) { + throw new RemoteStateTransferException(exceptionReference.get().getMessage(), exceptionReference.get()); + } + logger.debug( + "Metadata manifest file [{}] written during [{}] phase. ", + fileName, + uploadManifest.isCommitted() ? "commit" : "publish" + ); } private String fetchPreviousClusterUUID(String clusterName, String clusterUUID) { @@ -650,6 +711,10 @@ private void setGlobalMetadataUploadTimeout(TimeValue newGlobalMetadataUploadTim this.globalMetadataUploadTimeout = newGlobalMetadataUploadTimeout; } + private void setMetadataManifestUploadTimeout(TimeValue newMetadataManifestUploadTimeout) { + this.metadataManifestUploadTimeout = newMetadataManifestUploadTimeout; + } + public TimeValue getIndexMetadataUploadTimeout() { return this.indexMetadataUploadTimeout; } @@ -658,6 +723,10 @@ public TimeValue getGlobalMetadataUploadTimeout() { return this.globalMetadataUploadTimeout; } + public TimeValue getMetadataManifestUploadTimeout() { + return this.metadataManifestUploadTimeout; + } + static String getManifestFileName(long term, long version, boolean committed) { // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest/manifest______C/P____ return String.join( @@ -749,16 +818,16 @@ private IndexMetadata getIndexMetadata(String clusterName, String clusterUUID, U } /** - * Fetch latest metadata from remote cluster state including global metadata and index metadata + * Fetch latest ClusterState from remote, including global metadata, index metadata and cluster state version * * @param clusterUUID uuid of cluster state to refer to in remote * @param clusterName name of the cluster * @return {@link IndexMetadata} */ - public Metadata getLatestMetadata(String clusterName, String clusterUUID) { + public ClusterState getLatestClusterState(String clusterName, String clusterUUID) { start(); Optional clusterMetadataManifest = getLatestClusterMetadataManifest(clusterName, clusterUUID); - if (!clusterMetadataManifest.isPresent()) { + if (clusterMetadataManifest.isEmpty()) { throw new IllegalStateException( String.format(Locale.ROOT, "Latest cluster metadata manifest is not present for the provided clusterUUID: %s", clusterUUID) ); @@ -772,7 +841,10 @@ public Metadata getLatestMetadata(String clusterName, String clusterUUID) { Map indexMetadataMap = new HashMap<>(); indices.values().forEach(indexMetadata -> { indexMetadataMap.put(indexMetadata.getIndex().getName(), indexMetadata); }); - return Metadata.builder(globalMetadata).indices(indexMetadataMap).build(); + return ClusterState.builder(ClusterState.EMPTY_STATE) + .version(clusterMetadataManifest.get().getStateVersion()) + .metadata(Metadata.builder(globalMetadata).indices(indexMetadataMap).build()) + .build(); } private Metadata getGlobalMetadata(String clusterName, String clusterUUID, ClusterMetadataManifest clusterMetadataManifest) { @@ -863,25 +935,31 @@ private Map getLatestManifestForAllClusterUUIDs * @return List of cluster UUIDs. The first element is the most recent cluster UUID in the chain */ private List createClusterChain(final Map manifestsByClusterUUID, final String clusterName) { - final Map clusterUUIDGraph = manifestsByClusterUUID.values() + final List validClusterManifests = manifestsByClusterUUID.values() .stream() + .filter(this::isValidClusterUUID) + .collect(Collectors.toList()); + final Map clusterUUIDGraph = validClusterManifests.stream() .collect(Collectors.toMap(ClusterMetadataManifest::getClusterUUID, ClusterMetadataManifest::getPreviousClusterUUID)); - final List validClusterUUIDs = manifestsByClusterUUID.values() - .stream() - .filter(m -> !isInvalidClusterUUID(m) && !clusterUUIDGraph.containsValue(m.getClusterUUID())) + final List topLevelClusterUUIDs = validClusterManifests.stream() .map(ClusterMetadataManifest::getClusterUUID) + .filter(clusterUUID -> !clusterUUIDGraph.containsValue(clusterUUID)) .collect(Collectors.toList()); - if (validClusterUUIDs.isEmpty()) { - logger.info("There is no valid previous cluster UUID"); + + if (topLevelClusterUUIDs.isEmpty()) { + // This can occur only when there are no valid cluster UUIDs + assert validClusterManifests.isEmpty() : "There are no top level cluster UUIDs even when there are valid cluster UUIDs"; + logger.info("There is no valid previous cluster UUID. All cluster UUIDs evaluated are: {}", manifestsByClusterUUID.keySet()); return Collections.emptyList(); } - if (validClusterUUIDs.size() > 1) { + if (topLevelClusterUUIDs.size() > 1) { + logger.info("Top level cluster UUIDs: {}", topLevelClusterUUIDs); // If the valid cluster UUIDs are more that 1, it means there was some race condition where // more then 2 cluster manager nodes tried to become active cluster manager and published // 2 cluster UUIDs which followed the same previous UUID. final Map manifestsByClusterUUIDTrimmed = trimClusterUUIDs( manifestsByClusterUUID, - validClusterUUIDs, + topLevelClusterUUIDs, clusterName ); if (manifestsByClusterUUID.size() == manifestsByClusterUUIDTrimmed.size()) { @@ -890,19 +968,20 @@ private List createClusterChain(final Map validChain = new ArrayList<>(); - String currentUUID = validClusterUUIDs.get(0); + String currentUUID = topLevelClusterUUIDs.get(0); while (currentUUID != null && !ClusterState.UNKNOWN_UUID.equals(currentUUID)) { validChain.add(currentUUID); // Getting the previous cluster UUID of a cluster UUID from the clusterUUID Graph currentUUID = clusterUUIDGraph.get(currentUUID); } + logger.info("Known UUIDs found in remote store : [{}]", validChain); return validChain; } @@ -923,11 +1002,7 @@ private Map trimClusterUUIDs( // Here we compare the manifest of current UUID to that of previous UUID // In case currentUUID's latest manifest is same as previous UUIDs latest manifest, // that means it was restored from previousUUID and no IndexMetadata update was performed on it. - if (ClusterState.UNKNOWN_UUID.equals(currentManifest.getPreviousClusterUUID())) { - if (currentManifest.getIndices().isEmpty()) { - trimmedUUIDs.remove(clusterUUID); - } - } else { + if (!ClusterState.UNKNOWN_UUID.equals(currentManifest.getPreviousClusterUUID())) { ClusterMetadataManifest previousManifest = trimmedUUIDs.get(currentManifest.getPreviousClusterUUID()); if (isMetadataEqual(currentManifest, previousManifest, clusterName) && isGlobalMetadataEqual(currentManifest, previousManifest, clusterName)) { @@ -966,8 +1041,8 @@ private boolean isGlobalMetadataEqual(ClusterMetadataManifest first, ClusterMeta return Metadata.isGlobalResourcesMetadataEquals(firstGlobalMetadata, secondGlobalMetadata); } - private boolean isInvalidClusterUUID(ClusterMetadataManifest manifest) { - return !manifest.isClusterUUIDCommitted(); + private boolean isValidClusterUUID(ClusterMetadataManifest manifest) { + return manifest.isClusterUUIDCommitted(); } /** @@ -1059,30 +1134,20 @@ public static String encodeString(String content) { return Base64.getUrlEncoder().withoutPadding().encodeToString(content.getBytes(StandardCharsets.UTF_8)); } - /** - * Exception for IndexMetadata transfer failures to remote - */ - static class IndexMetadataTransferException extends RuntimeException { - - public IndexMetadataTransferException(String errorDesc) { - super(errorDesc); - } - - public IndexMetadataTransferException(String errorDesc, Throwable cause) { - super(errorDesc, cause); - } + public void writeMetadataFailed() { + getStats().stateFailed(); } /** - * Exception for GlobalMetadata transfer failures to remote + * Exception for Remote state transfer. */ - static class GlobalMetadataTransferException extends RuntimeException { + static class RemoteStateTransferException extends RuntimeException { - public GlobalMetadataTransferException(String errorDesc) { + public RemoteStateTransferException(String errorDesc) { super(errorDesc); } - public GlobalMetadataTransferException(String errorDesc, Throwable cause) { + public RemoteStateTransferException(String errorDesc, Throwable cause) { super(errorDesc, cause); } } @@ -1093,7 +1158,7 @@ public GlobalMetadataTransferException(String errorDesc, Throwable cause) { * @param clusterName name of the cluster * @param clusterUUIDs clusteUUIDs for which the remote state needs to be purged */ - private void deleteStaleUUIDsClusterMetadata(String clusterName, List clusterUUIDs) { + void deleteStaleUUIDsClusterMetadata(String clusterName, List clusterUUIDs) { clusterUUIDs.forEach(clusterUUID -> { getBlobStoreTransferService().deleteAsync( ThreadPool.Names.REMOTE_PURGE, @@ -1113,6 +1178,7 @@ public void onFailure(Exception e) { ), e ); + remoteStateStats.cleanUpAttemptFailed(); } } ); @@ -1217,7 +1283,7 @@ private void deleteClusterMetadata( }); if (staleManifestPaths.isEmpty()) { - logger.info("No stale Remote Cluster Metadata files found"); + logger.debug("No stale Remote Cluster Metadata files found"); return; } @@ -1228,8 +1294,10 @@ private void deleteClusterMetadata( logger.error("Error while fetching Remote Cluster Metadata manifests", e); } catch (IOException e) { logger.error("Error while deleting stale Remote Cluster Metadata files", e); + remoteStateStats.cleanUpAttemptFailed(); } catch (Exception e) { logger.error("Unexpected error while deleting stale Remote Cluster Metadata files", e); + remoteStateStats.cleanUpAttemptFailed(); } } @@ -1260,4 +1328,8 @@ public void deleteStaleClusterUUIDs(ClusterState clusterState, ClusterMetadataMa deleteStaleUUIDsClusterMetadata(clusterName, new ArrayList<>(allClustersUUIDsInRemote)); }); } + + public RemotePersistenceStats getStats() { + return remoteStateStats; + } } diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java b/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java new file mode 100644 index 0000000000000..f2330846fa23e --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.opensearch.cluster.coordination.PersistedStateStats; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * Remote state related extended stats. + * + * @opensearch.internal + */ +public class RemotePersistenceStats extends PersistedStateStats { + static final String CLEANUP_ATTEMPT_FAILED_COUNT = "cleanup_attempt_failed_count"; + static final String REMOTE_UPLOAD = "remote_upload"; + private AtomicLong cleanupAttemptFailedCount = new AtomicLong(0); + + public RemotePersistenceStats() { + super(REMOTE_UPLOAD); + addToExtendedFields(CLEANUP_ATTEMPT_FAILED_COUNT, cleanupAttemptFailedCount); + } + + public void cleanUpAttemptFailed() { + cleanupAttemptFailedCount.incrementAndGet(); + } + + public long getCleanupAttemptFailedCount() { + return cleanupAttemptFailedCount.get(); + } +} diff --git a/server/src/main/java/org/opensearch/index/fielddata/IndexNumericFieldData.java b/server/src/main/java/org/opensearch/index/fielddata/IndexNumericFieldData.java index b4e90b8ab570a..6fc074fe0de95 100644 --- a/server/src/main/java/org/opensearch/index/fielddata/IndexNumericFieldData.java +++ b/server/src/main/java/org/opensearch/index/fielddata/IndexNumericFieldData.java @@ -242,7 +242,7 @@ private XFieldComparatorSource comparatorSource( assert !targetNumericType.isFloatingPoint(); source = new IntValuesComparatorSource(this, missingValue, sortMode, nested); } - if (targetNumericType != getNumericType()) { + if (targetNumericType != getNumericType() || getNumericType() == NumericType.HALF_FLOAT) { source.disableSkipping(); // disable skipping logic for cast of sort field } return source; diff --git a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java index 9541d13421e27..23bb4cea17a20 100644 --- a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java +++ b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java @@ -138,7 +138,7 @@ public RemoteRestoreResult restore( String[] indexNames ) { Map> indexMetadataMap = new HashMap<>(); - Metadata remoteMetadata = null; + ClusterState remoteState = null; boolean metadataFromRemoteStore = (restoreClusterUUID == null || restoreClusterUUID.isEmpty() || restoreClusterUUID.isBlank()) == false; @@ -149,8 +149,9 @@ public RemoteRestoreResult restore( if (currentState.metadata().clusterUUID().equals(restoreClusterUUID)) { throw new IllegalArgumentException("clusterUUID to restore from should be different from current cluster UUID"); } - remoteMetadata = remoteClusterStateService.getLatestMetadata(currentState.getClusterName().value(), restoreClusterUUID); - remoteMetadata.getIndices().values().forEach(indexMetadata -> { + logger.info("Restoring cluster state from remote store from cluster UUID : [{}]", restoreClusterUUID); + remoteState = remoteClusterStateService.getLatestClusterState(currentState.getClusterName().value(), restoreClusterUUID); + remoteState.getMetadata().getIndices().values().forEach(indexMetadata -> { indexMetadataMap.put(indexMetadata.getIndex().getName(), new Tuple<>(true, indexMetadata)); }); } catch (Exception e) { @@ -176,7 +177,7 @@ public RemoteRestoreResult restore( } } } - return executeRestore(currentState, indexMetadataMap, restoreAllShards, remoteMetadata); + return executeRestore(currentState, indexMetadataMap, restoreAllShards, remoteState); } /** @@ -190,7 +191,7 @@ private RemoteRestoreResult executeRestore( ClusterState currentState, Map> indexMetadataMap, boolean restoreAllShards, - Metadata remoteMetadata + ClusterState remoteState ) { final String restoreUUID = UUIDs.randomBase64UUID(); List indicesToBeRestored = new ArrayList<>(); @@ -240,8 +241,11 @@ private RemoteRestoreResult executeRestore( totalShards += updatedIndexMetadata.getNumberOfShards(); } - if (remoteMetadata != null) { - restoreGlobalMetadata(mdBuilder, remoteMetadata); + if (remoteState != null) { + restoreGlobalMetadata(mdBuilder, remoteState.getMetadata()); + // Restore ClusterState version + logger.info("Restoring ClusterState with Remote State version [{}]", remoteState.version()); + builder.version(remoteState.version()); } RestoreInfo restoreInfo = new RestoreInfo("remote_store", indicesToBeRestored, totalShards, totalShards); diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index fb4e9056153aa..cf42c6749fc79 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -187,6 +187,7 @@ import org.opensearch.indices.recovery.RecoveryTarget; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher; +import org.opensearch.indices.replication.common.ReplicationTimer; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.Repository; import org.opensearch.search.suggest.completion.CompletionStats; @@ -202,6 +203,7 @@ import java.nio.file.NoSuchFileException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.EnumSet; import java.util.HashSet; @@ -640,7 +642,7 @@ public void updateShardState( if (currentRouting.initializing() && currentRouting.isRelocationTarget() == false && newRouting.active()) { // the cluster-manager started a recovering primary, activate primary mode. replicationTracker.activatePrimaryMode(getLocalCheckpoint()); - ensurePeerRecoveryRetentionLeasesExist(); + postActivatePrimaryMode(); } } else { assert currentRouting.primary() == false : "term is only increased as part of primary promotion"; @@ -698,7 +700,16 @@ public void updateShardState( if (indexSettings.isSegRepEnabled()) { // this Shard's engine was read only, we need to update its engine before restoring local history from xlog. assert newRouting.primary() && currentRouting.primary() == false; + ReplicationTimer timer = new ReplicationTimer(); + timer.start(); + logger.debug( + "Resetting engine on promotion of shard [{}] to primary, startTime {}\n", + shardId, + timer.startTime() + ); resetEngineToGlobalCheckpoint(); + timer.stop(); + logger.info("Completed engine failover for shard [{}] in: {} ms", shardId, timer.time()); // It is possible an engine can open with a SegmentInfos on a higher gen but the reader does not refresh to // trigger our refresh listener. // Force update the checkpoint post engine reset. @@ -711,8 +722,7 @@ public void updateShardState( // are brought up to date. checkpointPublisher.publish(this, getLatestReplicationCheckpoint()); } - - ensurePeerRecoveryRetentionLeasesExist(); + postActivatePrimaryMode(); /* * If this shard was serving as a replica shard when another shard was promoted to primary then * its Lucene index was reset during the primary term transition. In particular, the Lucene index @@ -1997,6 +2007,29 @@ private RemoteSegmentStoreDirectory getRemoteDirectory() { return ((RemoteSegmentStoreDirectory) remoteDirectory); } + /** + Returns true iff it is able to verify that remote segment store + is in sync with local + */ + boolean isRemoteSegmentStoreInSync() { + assert indexSettings.isRemoteStoreEnabled(); + try { + RemoteSegmentStoreDirectory directory = getRemoteDirectory(); + if (directory.readLatestMetadataFile() != null) { + // verifying that all files except EXCLUDE_FILES are uploaded to the remote + Collection uploadFiles = directory.getSegmentsUploadedToRemoteStore().keySet(); + SegmentInfos segmentInfos = store.readLastCommittedSegmentsInfo(); + Collection localFiles = segmentInfos.files(true); + if (uploadFiles.containsAll(localFiles)) { + return true; + } + } + } catch (IOException e) { + logger.error("Exception while reading latest metadata", e); + } + return false; + } + public void preRecovery() { final IndexShardState currentState = this.state; // single volatile read if (currentState == IndexShardState.CLOSED) { @@ -3393,6 +3426,20 @@ assert getLocalCheckpoint() == primaryContext.getCheckpointStates().get(routingE synchronized (mutex) { replicationTracker.activateWithPrimaryContext(primaryContext); // make changes to primaryMode flag only under mutex } + postActivatePrimaryMode(); + } + + private void postActivatePrimaryMode() { + if (indexSettings.isRemoteStoreEnabled()) { + // We make sure to upload translog (even if it does not contain any operations) to remote translog. + // This helps to get a consistent state in remote store where both remote segment store and remote + // translog contains data. + try { + getEngine().translogManager().syncTranslog(); + } catch (IOException e) { + logger.error("Failed to sync translog to remote from new primary", e); + } + } ensurePeerRecoveryRetentionLeasesExist(); } @@ -4861,8 +4908,7 @@ public void syncSegmentsFromGivenRemoteSegmentStore( remoteStore.incRef(); } Map uploadedSegments = sourceRemoteDirectory - .initializeToSpecificCommit(primaryTerm, commitGeneration) - .getMetadata(); + .getSegmentsUploadedToRemoteStore(); final Directory storeDirectory = store.directory(); store.incRef(); @@ -4949,7 +4995,8 @@ private String copySegmentFiles( return segmentNFile; } - private boolean localDirectoryContains(Directory localDirectory, String file, long checksum) { + // Visible for testing + boolean localDirectoryContains(Directory localDirectory, String file, long checksum) throws IOException { try (IndexInput indexInput = localDirectory.openInput(file, IOContext.DEFAULT)) { if (checksum == CodecUtil.retrieveChecksum(indexInput)) { return true; @@ -4968,6 +5015,8 @@ private boolean localDirectoryContains(Directory localDirectory, String file, lo logger.debug("File {} does not exist in local FS, downloading from remote store", file); } catch (IOException e) { logger.warn("Exception while reading checksum of file: {}, this can happen if file is corrupted", file); + // For any other exception on reading checksum, we delete the file to re-download again + localDirectory.deleteFile(file); } return false; } diff --git a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java index 3e97b07abfb5d..dd40327298874 100644 --- a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java +++ b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java @@ -20,6 +20,7 @@ import org.opensearch.action.LatchedActionListener; import org.opensearch.action.bulk.BackoffPolicy; import org.opensearch.action.support.GroupedActionListener; +import org.opensearch.cluster.routing.RecoverySource; import org.opensearch.common.concurrent.GatedCloseable; import org.opensearch.common.logging.Loggers; import org.opensearch.common.unit.TimeValue; @@ -86,7 +87,7 @@ public final class RemoteStoreRefreshListener extends CloseableRetryableRefreshL private final RemoteSegmentStoreDirectory remoteDirectory; private final RemoteSegmentTransferTracker segmentTracker; private final Map localSegmentChecksumMap; - private long primaryTerm; + private volatile long primaryTerm; private volatile Iterator backoffDelayIterator; private final SegmentReplicationCheckpointPublisher checkpointPublisher; @@ -126,10 +127,9 @@ protected void runAfterRefreshExactlyOnce(boolean didRefresh) { // We have 2 separate methods to check if sync needs to be done or not. This is required since we use the return boolean // from isReadyForUpload to schedule refresh retries as the index shard or the primary mode are not in complete // ready state. - if (shouldSync(didRefresh) && isReadyForUpload()) { - segmentTracker.updateLocalRefreshTimeAndSeqNo(); + if (shouldSync(didRefresh, true) && isReadyForUpload()) { try { - initializeRemoteDirectoryOnTermUpdate(); + segmentTracker.updateLocalRefreshTimeAndSeqNo(); try (GatedCloseable segmentInfosGatedCloseable = indexShard.getSegmentInfosSnapshot()) { Collection localSegmentsPostRefresh = segmentInfosGatedCloseable.get().files(true); updateLocalSizeMapAndTracker(localSegmentsPostRefresh); @@ -150,7 +150,7 @@ protected void runAfterRefreshExactlyOnce(boolean didRefresh) { @Override protected boolean performAfterRefreshWithPermit(boolean didRefresh) { boolean successful; - if (shouldSync(didRefresh)) { + if (shouldSync(didRefresh, false)) { successful = syncSegments(); } else { successful = true; @@ -158,10 +158,15 @@ protected boolean performAfterRefreshWithPermit(boolean didRefresh) { return successful; } - private boolean shouldSync(boolean didRefresh) { - return this.primaryTerm != indexShard.getOperationPrimaryTerm() - // If the readers change, didRefresh is always true. - || didRefresh + /** + * This checks if there is a sync required to remote. + * + * @param didRefresh if the readers changed. + * @param skipPrimaryTermCheck consider change in primary term or not for should sync + * @return true if sync is needed + */ + private boolean shouldSync(boolean didRefresh, boolean skipPrimaryTermCheck) { + boolean shouldSync = didRefresh // If the readers change, didRefresh is always true. // The third condition exists for uploading the zero state segments where the refresh has not changed the reader // reference, but it is important to upload the zero state segments so that the restore does not break. || remoteDirectory.getSegmentsUploadedToRemoteStore().isEmpty() @@ -169,8 +174,15 @@ private boolean shouldSync(boolean didRefresh) { // we update the primary term and the same condition would not evaluate to true again in syncSegments. // Below check ensures that if there is commit, then that gets picked up by both 1st and 2nd shouldSync call. || isRefreshAfterCommitSafe(); + if (shouldSync || skipPrimaryTermCheck) { + return shouldSync; + } + return this.primaryTerm != indexShard.getOperationPrimaryTerm(); } + /* + @return false if retry is needed + */ private boolean syncSegments() { if (isReadyForUpload() == false) { // Following check is required to enable retry and make sure that we do not lose this refresh event @@ -188,6 +200,7 @@ private boolean syncSegments() { try { try { + initializeRemoteDirectoryOnTermUpdate(); // if a new segments_N file is present in local that is not uploaded to remote store yet, it // is considered as a first refresh post commit. A cleanup of stale commit files is triggered. // This is done to avoid delete post each refresh. @@ -476,7 +489,9 @@ private void initializeRemoteDirectoryOnTermUpdate() throws IOException { * @return true iff primaryMode is true and index shard is not in closed state. */ private boolean isReadyForUpload() { - boolean isReady = indexShard.getReplicationTracker().isPrimaryMode() && indexShard.state() != IndexShardState.CLOSED; + boolean isReady = (indexShard.getReplicationTracker().isPrimaryMode() && indexShard.state() != IndexShardState.CLOSED) + || isLocalOrSnapshotRecovery(); + if (isReady == false) { StringBuilder sb = new StringBuilder("Skipped syncing segments with"); if (indexShard.getReplicationTracker() != null) { @@ -488,11 +503,25 @@ private boolean isReadyForUpload() { if (indexShard.getEngineOrNull() != null) { sb.append(" engineType=").append(indexShard.getEngine().getClass().getSimpleName()); } + if (isLocalOrSnapshotRecovery() == false) { + sb.append(" recoverySourceType=").append(indexShard.recoveryState().getRecoverySource().getType()); + sb.append(" primary=").append(indexShard.shardRouting.primary()); + } logger.trace(sb.toString()); } return isReady; } + private boolean isLocalOrSnapshotRecovery() { + // In this case when the primary mode is false, we need to upload segments to Remote Store + // This is required in case of snapshots/shrink/ split/clone where we need to durable persist + // all segments to remote before completing the recovery to ensure durability. + + return (indexShard.state() == IndexShardState.RECOVERING && indexShard.shardRouting.primary()) + && (indexShard.recoveryState().getRecoverySource().getType() == RecoverySource.Type.LOCAL_SHARDS + || indexShard.recoveryState().getRecoverySource().getType() == RecoverySource.Type.SNAPSHOT); + } + /** * Creates an {@link UploadListener} containing the stats population logic which would be triggered before and after segment upload events */ diff --git a/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java b/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java index c0211e1257c8e..5b1940bb1d9a5 100644 --- a/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java +++ b/server/src/main/java/org/opensearch/index/shard/StoreRecovery.java @@ -191,6 +191,15 @@ void recoverFromLocalShards( // just trigger a merge to do housekeeping on the // copied segments - we will also see them in stats etc. indexShard.getEngine().forceMerge(false, -1, false, false, false, UUIDs.randomBase64UUID()); + if (indexShard.isRemoteTranslogEnabled()) { + if (indexShard.isRemoteSegmentStoreInSync() == false) { + throw new IndexShardRecoveryException( + indexShard.shardId(), + "failed to upload to remote", + new IOException("Failed to upload to remote segment store") + ); + } + } return true; } catch (IOException ex) { throw new IndexShardRecoveryException(indexShard.shardId(), "failed to recover from local shards", ex); @@ -401,6 +410,11 @@ void recoverFromSnapshotAndRemoteStore( indexUUID, shardId ); + sourceRemoteDirectory.initializeToSpecificCommit( + primaryTerm, + commitGeneration, + recoverySource.snapshot().getSnapshotId().getUUID() + ); indexShard.syncSegmentsFromGivenRemoteSegmentStore(true, sourceRemoteDirectory, primaryTerm, commitGeneration); final Store store = indexShard.store(); if (indexShard.indexSettings.isRemoteTranslogStoreEnabled() == false) { @@ -418,6 +432,12 @@ void recoverFromSnapshotAndRemoteStore( } indexShard.getEngine().fillSeqNoGaps(indexShard.getPendingPrimaryTerm()); indexShard.finalizeRecovery(); + if (indexShard.isRemoteTranslogEnabled()) { + if (indexShard.isRemoteSegmentStoreInSync() == false) { + listener.onFailure(new IndexShardRestoreFailedException(shardId, "Failed to upload to remote segment store")); + return; + } + } indexShard.postRecovery("restore done"); listener.onResponse(true); @@ -697,6 +717,12 @@ private void restore( } indexShard.getEngine().fillSeqNoGaps(indexShard.getPendingPrimaryTerm()); indexShard.finalizeRecovery(); + if (indexShard.isRemoteTranslogEnabled()) { + if (indexShard.isRemoteSegmentStoreInSync() == false) { + listener.onFailure(new IndexShardRestoreFailedException(shardId, "Failed to upload to remote segment store")); + return; + } + } indexShard.postRecovery("restore done"); listener.onResponse(true); }, e -> listener.onFailure(new IndexShardRestoreFailedException(shardId, "restore failed", e))); diff --git a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java index be1f2341236ab..988d52202f975 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java @@ -34,6 +34,7 @@ import org.opensearch.index.store.lockmanager.FileLockInfo; import org.opensearch.index.store.lockmanager.RemoteStoreCommitLevelLockManager; import org.opensearch.index.store.lockmanager.RemoteStoreLockManager; +import org.opensearch.index.store.lockmanager.RemoteStoreMetadataLockManager; import org.opensearch.index.store.remote.metadata.RemoteSegmentMetadata; import org.opensearch.index.store.remote.metadata.RemoteSegmentMetadataHandler; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; @@ -160,8 +161,9 @@ public RemoteSegmentMetadata init() throws IOException { * * @throws IOException if there were any failures in reading the metadata file */ - public RemoteSegmentMetadata initializeToSpecificCommit(long primaryTerm, long commitGeneration) throws IOException { - String metadataFile = getMetadataFileForCommit(primaryTerm, commitGeneration); + public RemoteSegmentMetadata initializeToSpecificCommit(long primaryTerm, long commitGeneration, String acquirerId) throws IOException { + String metadataFilePrefix = MetadataFilenameUtils.getMetadataFilePrefixForCommit(primaryTerm, commitGeneration); + String metadataFile = ((RemoteStoreMetadataLockManager) mdLockManager).fetchLock(metadataFilePrefix, acquirerId); RemoteSegmentMetadata remoteSegmentMetadata = readMetadataFile(metadataFile); if (remoteSegmentMetadata != null) { this.segmentsUploadedToRemoteStore = new ConcurrentHashMap<>(remoteSegmentMetadata.getMetadata()); diff --git a/server/src/main/java/org/opensearch/index/store/lockmanager/RemoteStoreMetadataLockManager.java b/server/src/main/java/org/opensearch/index/store/lockmanager/RemoteStoreMetadataLockManager.java index fd7906729e314..756905d02229a 100644 --- a/server/src/main/java/org/opensearch/index/store/lockmanager/RemoteStoreMetadataLockManager.java +++ b/server/src/main/java/org/opensearch/index/store/lockmanager/RemoteStoreMetadataLockManager.java @@ -14,10 +14,13 @@ import org.apache.lucene.store.IndexOutput; import org.opensearch.index.store.RemoteBufferedOutputDirectory; +import java.io.FileNotFoundException; import java.io.IOException; import java.nio.file.NoSuchFileException; import java.util.Collection; +import java.util.List; import java.util.Objects; +import java.util.stream.Collectors; /** * A Class that implements Remote Store Lock Manager by creating lock files for the remote store files that needs to @@ -70,6 +73,19 @@ public void release(LockInfo lockInfo) throws IOException { } } + public String fetchLock(String filenamePrefix, String acquirerId) throws IOException { + Collection lockFiles = lockDirectory.listFilesByPrefix(filenamePrefix); + List lockFilesForAcquirer = lockFiles.stream() + .filter(lockFile -> acquirerId.equals(FileLockInfo.LockFileUtils.getAcquirerIdFromLock(lockFile))) + .map(FileLockInfo.LockFileUtils::getFileToLockNameFromLock) + .collect(Collectors.toList()); + if (lockFilesForAcquirer.size() == 0) { + throw new FileNotFoundException("No lock file found for prefix: " + filenamePrefix + " and acquirerId: " + acquirerId); + } + assert lockFilesForAcquirer.size() == 1; + return lockFilesForAcquirer.get(0); + } + /** * Checks whether a given file have any lock on it or not. * @param lockInfo File Lock Info instance for which we need to check if lock is acquired. diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java index a305a774f5854..65d16e213cad1 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java @@ -103,6 +103,7 @@ public RemoteFsTranslog( try { download(translogTransferManager, location, logger); Checkpoint checkpoint = readCheckpoint(location); + logger.info("Downloaded data from remote translog till maxSeqNo = {}", checkpoint.maxSeqNo); this.readers.addAll(recoverFromFiles(checkpoint)); if (readers.isEmpty()) { String errorMsg = String.format(Locale.ROOT, "%s at least one reader must be recovered", shardId); @@ -266,9 +267,13 @@ public void rollGeneration() throws IOException { } private boolean prepareAndUpload(Long primaryTerm, Long generation) throws IOException { + long maxSeqNo = -1; try (Releasable ignored = writeLock.acquire()) { if (generation == null || generation == current.getGeneration()) { try { + if (closed.get() == false) { + maxSeqNo = getMaxSeqNo(); + } final TranslogReader reader = current.closeIntoReader(); readers.add(reader); copyCheckpointTo(location.resolve(getCommitCheckpointFileName(current.getGeneration()))); @@ -300,17 +305,17 @@ private boolean prepareAndUpload(Long primaryTerm, Long generation) throws IOExc // is not updated in remote translog except in primary to primary recovery. if (generation == null) { if (closed.get() == false) { - return upload(primaryTerm, current.getGeneration() - 1); + return upload(primaryTerm, current.getGeneration() - 1, maxSeqNo); } else { - return upload(primaryTerm, current.getGeneration()); + return upload(primaryTerm, current.getGeneration(), maxSeqNo); } } else { - return upload(primaryTerm, generation); + return upload(primaryTerm, generation, maxSeqNo); } } } - private boolean upload(Long primaryTerm, Long generation) throws IOException { + private boolean upload(long primaryTerm, long generation, long maxSeqNo) throws IOException { // During primary relocation (primary-primary peer recovery), both the old and the new primary have engine // created with the RemoteFsTranslog. Both primaries are equipped to upload the translogs. The primary mode check // below ensures that the real primary only is uploading. Before the primary mode is set as true for the new @@ -334,7 +339,7 @@ private boolean upload(Long primaryTerm, Long generation) throws IOException { ) { return translogTransferManager.transferSnapshot( transferSnapshotProvider, - new RemoteFsTranslogTransferListener(generation, primaryTerm) + new RemoteFsTranslogTransferListener(generation, primaryTerm, maxSeqNo) ); } @@ -522,23 +527,31 @@ private class RemoteFsTranslogTransferListener implements TranslogTransferListen /** * Generation for the translog */ - private final Long generation; + private final long generation; /** * Primary Term for the translog */ - private final Long primaryTerm; + private final long primaryTerm; + + private final long maxSeqNo; - RemoteFsTranslogTransferListener(Long generation, Long primaryTerm) { + RemoteFsTranslogTransferListener(long generation, long primaryTerm, long maxSeqNo) { this.generation = generation; this.primaryTerm = primaryTerm; + this.maxSeqNo = maxSeqNo; } @Override public void onUploadComplete(TransferSnapshot transferSnapshot) throws IOException { maxRemoteTranslogGenerationUploaded = generation; minRemoteGenReferenced = getMinFileGeneration(); - logger.trace("uploaded translog for {} {} ", primaryTerm, generation); + logger.debug( + "Successfully uploaded translog for primary term = {}, generation = {}, maxSeqNo = {}", + primaryTerm, + generation, + maxSeqNo + ); } @Override diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java index ece6f6d5a534f..2f6055df87804 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java @@ -42,7 +42,6 @@ import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; import java.util.stream.Collectors; import static org.opensearch.index.translog.transfer.FileSnapshot.TransferFileSnapshot; @@ -156,14 +155,17 @@ public boolean transferSnapshot(TransferSnapshot transferSnapshot, TranslogTrans try { if (latch.await(TRANSFER_TIMEOUT_IN_MILLIS, TimeUnit.MILLISECONDS) == false) { - Exception ex = new TimeoutException("Timed out waiting for transfer of snapshot " + transferSnapshot + " to complete"); + Exception ex = new TranslogUploadFailedException( + "Timed out waiting for transfer of snapshot " + transferSnapshot + " to complete" + ); exceptionList.forEach(ex::addSuppressed); throw ex; } } catch (InterruptedException ex) { - exceptionList.forEach(ex::addSuppressed); + Exception exception = new TranslogUploadFailedException("Failed to upload " + transferSnapshot, ex); + exceptionList.forEach(exception::addSuppressed); Thread.currentThread().interrupt(); - throw ex; + throw exception; } if (exceptionList.isEmpty()) { TransferFileSnapshot tlogMetadata = prepareMetadata(transferSnapshot); diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index 50c551c2be29b..36abc77893d81 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -299,6 +299,17 @@ public class IndicesService extends AbstractLifecycleComponent Property.Final ); + /** + * This setting is used to restrict creation of index where the 'index.replication.type' index setting is set. + * If disabled, the replication type can be specified. + */ + public static final Setting CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING = Setting.boolSetting( + "cluster.restrict.index.replication_type", + false, + Property.NodeScope, + Property.Final + ); + /** * The node's settings. */ diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java index cd6dbe8af90d9..cc71ef816e525 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java @@ -232,6 +232,7 @@ private List getFiles(CheckpointInfoResponse checkpointInfo) return missingFiles; } + // pkg private for tests private boolean validateLocalChecksum(StoreFileMetadata file) { try (IndexInput indexInput = indexShard.store().directory().openInput(file.name(), IOContext.DEFAULT)) { String checksum = Store.digestToString(CodecUtil.retrieveChecksum(indexInput)); @@ -243,7 +244,15 @@ private boolean validateLocalChecksum(StoreFileMetadata file) { return false; } } catch (IOException e) { - throw new UncheckedIOException("Error reading " + file, e); + logger.warn("Error reading " + file, e); + // Delete file on exceptions so that it can be re-downloaded. This is safe to do as this file is local only + // and not referenced by reader. + try { + indexShard.store().directory().deleteFile(file.name()); + } catch (IOException ex) { + throw new UncheckedIOException("Error reading " + file, e); + } + return false; } } diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java index e280141c12bc1..3e6052a5ef820 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java @@ -197,21 +197,56 @@ public void write( } /** - * Writes blob with resolving the blob name using {@link #blobName} method. - * Leverages the multipart upload if supported by the blobContainer. + * Internally calls {@link #writeAsyncWithPriority} with {@link WritePriority#NORMAL} + */ + public void writeAsync( + final T obj, + final BlobContainer blobContainer, + final String name, + final Compressor compressor, + ActionListener listener, + final ToXContent.Params params + ) throws IOException { + // use NORMAL priority by default + this.writeAsyncWithPriority(obj, blobContainer, name, compressor, WritePriority.NORMAL, listener, params); + } + + /** + * Internally calls {@link #writeAsyncWithPriority} with {@link WritePriority#URGENT} + *

+ * NOTE: We use this method to upload urgent priority objects like cluster state to remote stores. + * Use {@link #writeAsync(ToXContent, BlobContainer, String, Compressor, ActionListener, ToXContent.Params)} for + * other use cases. + */ + public void writeAsyncWithUrgentPriority( + final T obj, + final BlobContainer blobContainer, + final String name, + final Compressor compressor, + ActionListener listener, + final ToXContent.Params params + ) throws IOException { + this.writeAsyncWithPriority(obj, blobContainer, name, compressor, WritePriority.URGENT, listener, params); + } + + /** + * Method to writes blob with resolving the blob name using {@link #blobName} method with specified + * {@link WritePriority}. Leverages the multipart upload if supported by the blobContainer. * * @param obj object to be serialized * @param blobContainer blob container * @param name blob name * @param compressor whether to use compression + * @param priority write priority to be used * @param listener listener to listen to write result * @param params ToXContent params */ - public void writeAsync( + private void writeAsyncWithPriority( final T obj, final BlobContainer blobContainer, final String name, final Compressor compressor, + final WritePriority priority, ActionListener listener, final ToXContent.Params params ) throws IOException { @@ -222,7 +257,7 @@ public void writeAsync( } final String blobName = blobName(name); final BytesReference bytes = serialize(obj, blobName, compressor, params); - final String resourceDescription = "ChecksumBlobStoreFormat.writeAsync(blob=\"" + blobName + "\")"; + final String resourceDescription = "ChecksumBlobStoreFormat.writeAsyncWithPriority(blob=\"" + blobName + "\")"; try (IndexInput input = new ByteArrayIndexInput(resourceDescription, BytesReference.toBytes(bytes))) { long expectedChecksum; try { @@ -242,7 +277,7 @@ public void writeAsync( blobName, bytes.length(), true, - WritePriority.HIGH, + priority, (size, position) -> new OffsetRangeIndexInputStream(input, size, position), expectedChecksum, ((AsyncMultiStreamBlobContainer) blobContainer).remoteIntegrityCheckSupported() diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java index ebdd012006fb2..80f4ebf5d737a 100644 --- a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -36,10 +36,12 @@ import org.opensearch.action.admin.indices.stats.CommonStatsFlags; import org.opensearch.action.search.SearchRequestStats; import org.opensearch.cluster.coordination.PendingClusterStateStats; +import org.opensearch.cluster.coordination.PersistedStateStats; import org.opensearch.cluster.coordination.PublishClusterStateStats; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.WeightedRoutingStats; import org.opensearch.cluster.service.ClusterManagerThrottlingStats; +import org.opensearch.cluster.service.ClusterStateStats; import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.metrics.OperationStats; import org.opensearch.core.common.io.stream.StreamInput; @@ -47,6 +49,7 @@ import org.opensearch.core.indices.breaker.AllCircuitBreakerStats; import org.opensearch.core.indices.breaker.CircuitBreakerStats; import org.opensearch.discovery.DiscoveryStats; +import org.opensearch.gateway.remote.RemotePersistenceStats; import org.opensearch.http.HttpStats; import org.opensearch.index.ReplicationStats; import org.opensearch.index.SegmentReplicationRejectionStats; @@ -72,6 +75,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; @@ -349,6 +353,26 @@ public void testSerialization() throws IOException { assertEquals(queueStats.getTotal(), deserializedDiscoveryStats.getQueueStats().getTotal()); assertEquals(queueStats.getPending(), deserializedDiscoveryStats.getQueueStats().getPending()); } + ClusterStateStats stateStats = discoveryStats.getClusterStateStats(); + if (stateStats == null) { + assertNull(deserializedDiscoveryStats.getClusterStateStats()); + } else { + assertEquals(stateStats.getUpdateFailed(), deserializedDiscoveryStats.getClusterStateStats().getUpdateFailed()); + assertEquals(stateStats.getUpdateSuccess(), deserializedDiscoveryStats.getClusterStateStats().getUpdateSuccess()); + assertEquals( + stateStats.getUpdateTotalTimeInMillis(), + deserializedDiscoveryStats.getClusterStateStats().getUpdateTotalTimeInMillis() + ); + assertEquals(1, deserializedDiscoveryStats.getClusterStateStats().getPersistenceStats().size()); + PersistedStateStats deserializedRemoteStateStats = deserializedDiscoveryStats.getClusterStateStats() + .getPersistenceStats() + .get(0); + PersistedStateStats remoteStateStats = stateStats.getPersistenceStats().get(0); + assertEquals(remoteStateStats.getStatsName(), deserializedRemoteStateStats.getStatsName()); + assertEquals(remoteStateStats.getFailedCount(), deserializedRemoteStateStats.getFailedCount()); + assertEquals(remoteStateStats.getSuccessCount(), deserializedRemoteStateStats.getSuccessCount()); + assertEquals(remoteStateStats.getTotalTimeInMillis(), deserializedRemoteStateStats.getTotalTimeInMillis()); + } } IngestStats ingestStats = nodeStats.getIngestStats(); IngestStats deserializedIngestStats = deserializedNodeStats.getIngestStats(); @@ -725,12 +749,16 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { ScriptStats scriptStats = frequently() ? new ScriptStats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()) : null; + ClusterStateStats stateStats = new ClusterStateStats(); + RemotePersistenceStats remoteStateStats = new RemotePersistenceStats(); + stateStats.setPersistenceStats(Arrays.asList(remoteStateStats)); DiscoveryStats discoveryStats = frequently() ? new DiscoveryStats( randomBoolean() ? new PendingClusterStateStats(randomInt(), randomInt(), randomInt()) : null, randomBoolean() ? new PublishClusterStateStats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()) - : null + : null, + randomBoolean() ? stateStats : null ) : null; IngestStats ingestStats = null; diff --git a/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java b/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java index e40826915c848..cace66d8c6d9e 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java @@ -139,6 +139,7 @@ import static org.opensearch.indices.IndicesService.CLUSTER_MINIMUM_INDEX_REFRESH_INTERVAL_SETTING; import static org.opensearch.indices.IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING; import static org.opensearch.indices.IndicesService.CLUSTER_REPLICATION_TYPE_SETTING; +import static org.opensearch.indices.IndicesService.CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING; import static org.opensearch.indices.ShardLimitValidatorTests.createTestShardLimitService; import static org.opensearch.node.Node.NODE_ATTRIBUTES; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; @@ -1177,6 +1178,8 @@ public void testvalidateIndexSettings() { .put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING.getKey() + "zone.values", "a, b") .put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING.getKey() + "rack.values", "c, d, e") .put(AwarenessReplicaBalance.CLUSTER_ROUTING_ALLOCATION_AWARENESS_BALANCE_SETTING.getKey(), true) + .put(CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING.getKey(), true) + .put(SETTING_REPLICATION_TYPE, randomFrom(ReplicationType.values())) .build(); ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); when(clusterService.getSettings()).thenReturn(settings); @@ -1200,8 +1203,12 @@ public void testvalidateIndexSettings() { ); List validationErrors = checkerService.getIndexSettingsValidationErrors(settings, false, Optional.empty()); - assertThat(validationErrors.size(), is(1)); - assertThat(validationErrors.get(0), is("expected total copies needs to be a multiple of total awareness attributes [3]")); + assertThat(validationErrors.size(), is(2)); + assertThat( + validationErrors.get(0), + is("index setting [index.replication.type] is not allowed to be set as [cluster.restrict.index.replication_type=true]") + ); + assertThat(validationErrors.get(1), is("expected total copies needs to be a multiple of total awareness attributes [3]")); settings = Settings.builder() .put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.getKey(), "zone, rack") @@ -1209,8 +1216,13 @@ public void testvalidateIndexSettings() { .put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING.getKey() + "rack.values", "c, d, e") .put(AwarenessReplicaBalance.CLUSTER_ROUTING_ALLOCATION_AWARENESS_BALANCE_SETTING.getKey(), true) .put(SETTING_NUMBER_OF_REPLICAS, 2) + .put(CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING.getKey(), false) + .put(SETTING_REPLICATION_TYPE, randomFrom(ReplicationType.values())) .build(); + clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + when(clusterService.getClusterSettings()).thenReturn(clusterSettings); + validationErrors = checkerService.getIndexSettingsValidationErrors(settings, false, Optional.empty()); assertThat(validationErrors.size(), is(0)); diff --git a/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java b/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java index 9cdbe04e0a0e4..85f6c129944fa 100644 --- a/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java @@ -487,6 +487,9 @@ public void onFailure(String source, Exception e) { } }); assertBusy(mockAppender::assertAllExpectationsMatched); + // verify stats values after state is published + assertEquals(1, clusterManagerService.getClusterStateStats().getUpdateSuccess()); + assertEquals(0, clusterManagerService.getClusterStateStats().getUpdateFailed()); } } } diff --git a/server/src/test/java/org/opensearch/common/RoundingTests.java b/server/src/test/java/org/opensearch/common/RoundingTests.java index 1a499bac3e2e8..cc71ee08abcca 100644 --- a/server/src/test/java/org/opensearch/common/RoundingTests.java +++ b/server/src/test/java/org/opensearch/common/RoundingTests.java @@ -33,7 +33,6 @@ package org.opensearch.common; import org.opensearch.common.collect.Tuple; -import org.opensearch.common.rounding.DateTimeUnit; import org.opensearch.common.time.DateFormatter; import org.opensearch.common.time.DateFormatters; import org.opensearch.common.unit.TimeValue; @@ -236,7 +235,7 @@ public void testOffsetRounding() { /** * Randomized test on TimeUnitRounding. Test uses random - * {@link DateTimeUnit} and {@link ZoneId} and often (50% of the time) + * {@link org.opensearch.common.Rounding.DateTimeUnit} and {@link ZoneId} and often (50% of the time) * chooses test dates that are exactly on or close to offset changes (e.g. * DST) in the chosen time zone. *

diff --git a/server/src/test/java/org/opensearch/common/rounding/DateTimeUnitTests.java b/server/src/test/java/org/opensearch/common/rounding/DateTimeUnitTests.java deleted file mode 100644 index 7b87e136c5f38..0000000000000 --- a/server/src/test/java/org/opensearch/common/rounding/DateTimeUnitTests.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - -package org.opensearch.common.rounding; - -import org.opensearch.test.OpenSearchTestCase; - -import static org.opensearch.common.rounding.DateTimeUnit.DAY_OF_MONTH; -import static org.opensearch.common.rounding.DateTimeUnit.HOUR_OF_DAY; -import static org.opensearch.common.rounding.DateTimeUnit.MINUTES_OF_HOUR; -import static org.opensearch.common.rounding.DateTimeUnit.MONTH_OF_YEAR; -import static org.opensearch.common.rounding.DateTimeUnit.QUARTER; -import static org.opensearch.common.rounding.DateTimeUnit.SECOND_OF_MINUTE; -import static org.opensearch.common.rounding.DateTimeUnit.WEEK_OF_WEEKYEAR; -import static org.opensearch.common.rounding.DateTimeUnit.YEAR_OF_CENTURY; - -public class DateTimeUnitTests extends OpenSearchTestCase { - - /** - * test that we don't accidentally change enum ids - */ - public void testEnumIds() { - assertEquals(1, WEEK_OF_WEEKYEAR.id()); - assertEquals(WEEK_OF_WEEKYEAR, DateTimeUnit.resolve((byte) 1)); - - assertEquals(2, YEAR_OF_CENTURY.id()); - assertEquals(YEAR_OF_CENTURY, DateTimeUnit.resolve((byte) 2)); - - assertEquals(3, QUARTER.id()); - assertEquals(QUARTER, DateTimeUnit.resolve((byte) 3)); - - assertEquals(4, MONTH_OF_YEAR.id()); - assertEquals(MONTH_OF_YEAR, DateTimeUnit.resolve((byte) 4)); - - assertEquals(5, DAY_OF_MONTH.id()); - assertEquals(DAY_OF_MONTH, DateTimeUnit.resolve((byte) 5)); - - assertEquals(6, HOUR_OF_DAY.id()); - assertEquals(HOUR_OF_DAY, DateTimeUnit.resolve((byte) 6)); - - assertEquals(7, MINUTES_OF_HOUR.id()); - assertEquals(MINUTES_OF_HOUR, DateTimeUnit.resolve((byte) 7)); - - assertEquals(8, SECOND_OF_MINUTE.id()); - assertEquals(SECOND_OF_MINUTE, DateTimeUnit.resolve((byte) 8)); - } -} diff --git a/server/src/test/java/org/opensearch/common/rounding/RoundingDuelTests.java b/server/src/test/java/org/opensearch/common/rounding/RoundingDuelTests.java deleted file mode 100644 index 3088067cd1f84..0000000000000 --- a/server/src/test/java/org/opensearch/common/rounding/RoundingDuelTests.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - -package org.opensearch.common.rounding; - -import org.opensearch.common.unit.TimeValue; -import org.opensearch.test.OpenSearchTestCase; -import org.joda.time.DateTimeZone; - -import java.time.ZoneOffset; - -import static org.hamcrest.Matchers.is; - -public class RoundingDuelTests extends OpenSearchTestCase { - - // dont include nano/micro seconds as rounding would become zero then and throw an exception - private static final String[] ALLOWED_TIME_SUFFIXES = new String[] { "d", "h", "ms", "s", "m" }; - - public void testDuellingImplementations() { - org.opensearch.common.Rounding.DateTimeUnit randomDateTimeUnit = randomFrom(org.opensearch.common.Rounding.DateTimeUnit.values()); - org.opensearch.common.Rounding.Prepared rounding; - Rounding roundingJoda; - - if (randomBoolean()) { - rounding = org.opensearch.common.Rounding.builder(randomDateTimeUnit).timeZone(ZoneOffset.UTC).build().prepareForUnknown(); - DateTimeUnit dateTimeUnit = DateTimeUnit.resolve(randomDateTimeUnit.getId()); - roundingJoda = Rounding.builder(dateTimeUnit).timeZone(DateTimeZone.UTC).build(); - } else { - TimeValue interval = timeValue(); - rounding = org.opensearch.common.Rounding.builder(interval).timeZone(ZoneOffset.UTC).build().prepareForUnknown(); - roundingJoda = Rounding.builder(interval).timeZone(DateTimeZone.UTC).build(); - } - - long roundValue = randomLong(); - assertThat(roundingJoda.round(roundValue), is(rounding.round(roundValue))); - } - - static TimeValue timeValue() { - return TimeValue.parseTimeValue(randomIntBetween(1, 1000) + randomFrom(ALLOWED_TIME_SUFFIXES), "settingName"); - } -} diff --git a/server/src/test/java/org/opensearch/common/rounding/TimeZoneRoundingTests.java b/server/src/test/java/org/opensearch/common/rounding/TimeZoneRoundingTests.java deleted file mode 100644 index d1b3adcd55f0c..0000000000000 --- a/server/src/test/java/org/opensearch/common/rounding/TimeZoneRoundingTests.java +++ /dev/null @@ -1,822 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - -package org.opensearch.common.rounding; - -import org.opensearch.common.collect.Tuple; -import org.opensearch.common.rounding.Rounding.TimeIntervalRounding; -import org.opensearch.common.rounding.Rounding.TimeUnitRounding; -import org.opensearch.common.unit.TimeValue; -import org.opensearch.test.OpenSearchTestCase; -import org.joda.time.DateTime; -import org.joda.time.DateTimeConstants; -import org.joda.time.DateTimeZone; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; -import org.joda.time.format.ISODateTimeFormat; -import org.hamcrest.Description; -import org.hamcrest.Matcher; -import org.hamcrest.TypeSafeMatcher; - -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.TimeUnit; - -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.greaterThan; -import static org.hamcrest.Matchers.greaterThanOrEqualTo; -import static org.hamcrest.Matchers.lessThan; -import static org.hamcrest.Matchers.lessThanOrEqualTo; -import static org.hamcrest.Matchers.startsWith; - -public class TimeZoneRoundingTests extends OpenSearchTestCase { - - public void testUTCTimeUnitRounding() { - Rounding tzRounding = Rounding.builder(DateTimeUnit.MONTH_OF_YEAR).build(); - DateTimeZone tz = DateTimeZone.UTC; - assertThat(tzRounding.round(time("2009-02-03T01:01:01")), isDate(time("2009-02-01T00:00:00.000Z"), tz)); - assertThat(tzRounding.nextRoundingValue(time("2009-02-01T00:00:00.000Z")), isDate(time("2009-03-01T00:00:00.000Z"), tz)); - - tzRounding = Rounding.builder(DateTimeUnit.WEEK_OF_WEEKYEAR).build(); - assertThat(tzRounding.round(time("2012-01-10T01:01:01")), isDate(time("2012-01-09T00:00:00.000Z"), tz)); - assertThat(tzRounding.nextRoundingValue(time("2012-01-09T00:00:00.000Z")), isDate(time("2012-01-16T00:00:00.000Z"), tz)); - - tzRounding = Rounding.builder(DateTimeUnit.QUARTER).build(); - assertThat(tzRounding.round(time("2012-01-10T01:01:01")), isDate(time("2012-01-01T00:00:00.000Z"), tz)); - assertThat(tzRounding.nextRoundingValue(time("2012-01-09T00:00:00.000Z")), isDate(time("2012-04-01T00:00:00.000Z"), tz)); - - tzRounding = Rounding.builder(DateTimeUnit.HOUR_OF_DAY).build(); - assertThat(tzRounding.round(time("2012-01-10T01:01:01")), isDate(time("2012-01-10T01:00:00.000Z"), tz)); - assertThat(tzRounding.nextRoundingValue(time("2012-01-09T00:00:00.000Z")), isDate(time("2012-01-09T01:00:00.000Z"), tz)); - - tzRounding = Rounding.builder(DateTimeUnit.DAY_OF_MONTH).build(); - assertThat(tzRounding.round(time("2012-01-10T01:01:01")), isDate(time("2012-01-10T00:00:00.000Z"), tz)); - assertThat(tzRounding.nextRoundingValue(time("2012-01-09T00:00:00.000Z")), isDate(time("2012-01-10T00:00:00.000Z"), tz)); - - tzRounding = Rounding.builder(DateTimeUnit.YEAR_OF_CENTURY).build(); - assertThat(tzRounding.round(time("2012-01-10T01:01:01")), isDate(time("2012-01-01T00:00:00.000Z"), tz)); - assertThat(tzRounding.nextRoundingValue(time("2012-01-09T00:00:00.000Z")), isDate(time("2013-01-01T00:00:00.000Z"), tz)); - - tzRounding = Rounding.builder(DateTimeUnit.MINUTES_OF_HOUR).build(); - assertThat(tzRounding.round(time("2012-01-10T01:01:01")), isDate(time("2012-01-10T01:01:00.000Z"), tz)); - assertThat(tzRounding.nextRoundingValue(time("2012-01-09T00:00:00.000Z")), isDate(time("2012-01-09T00:01:00.000Z"), tz)); - - tzRounding = Rounding.builder(DateTimeUnit.SECOND_OF_MINUTE).build(); - assertThat(tzRounding.round(time("2012-01-10T01:01:01")), isDate(time("2012-01-10T01:01:01.000Z"), tz)); - assertThat(tzRounding.nextRoundingValue(time("2012-01-09T00:00:00.000Z")), isDate(time("2012-01-09T00:00:01.000Z"), tz)); - } - - public void testUTCIntervalRounding() { - Rounding tzRounding = Rounding.builder(TimeValue.timeValueHours(12)).build(); - DateTimeZone tz = DateTimeZone.UTC; - assertThat(tzRounding.round(time("2009-02-03T01:01:01")), isDate(time("2009-02-03T00:00:00.000Z"), tz)); - assertThat(tzRounding.nextRoundingValue(time("2009-02-03T00:00:00.000Z")), isDate(time("2009-02-03T12:00:00.000Z"), tz)); - assertThat(tzRounding.round(time("2009-02-03T13:01:01")), isDate(time("2009-02-03T12:00:00.000Z"), tz)); - assertThat(tzRounding.nextRoundingValue(time("2009-02-03T12:00:00.000Z")), isDate(time("2009-02-04T00:00:00.000Z"), tz)); - - tzRounding = Rounding.builder(TimeValue.timeValueHours(48)).build(); - assertThat(tzRounding.round(time("2009-02-03T01:01:01")), isDate(time("2009-02-03T00:00:00.000Z"), tz)); - assertThat(tzRounding.nextRoundingValue(time("2009-02-03T00:00:00.000Z")), isDate(time("2009-02-05T00:00:00.000Z"), tz)); - assertThat(tzRounding.round(time("2009-02-05T13:01:01")), isDate(time("2009-02-05T00:00:00.000Z"), tz)); - assertThat(tzRounding.nextRoundingValue(time("2009-02-05T00:00:00.000Z")), isDate(time("2009-02-07T00:00:00.000Z"), tz)); - } - - /** - * test TimeIntervalRounding, (interval < 12h) with time zone shift - */ - public void testTimeIntervalRounding() { - DateTimeZone tz = DateTimeZone.forOffsetHours(-1); - Rounding tzRounding = Rounding.builder(TimeValue.timeValueHours(6)).timeZone(tz).build(); - assertThat(tzRounding.round(time("2009-02-03T00:01:01")), isDate(time("2009-02-02T19:00:00.000Z"), tz)); - assertThat(tzRounding.nextRoundingValue(time("2009-02-02T19:00:00.000Z")), isDate(time("2009-02-03T01:00:00.000Z"), tz)); - - assertThat(tzRounding.round(time("2009-02-03T13:01:01")), isDate(time("2009-02-03T13:00:00.000Z"), tz)); - assertThat(tzRounding.nextRoundingValue(time("2009-02-03T13:00:00.000Z")), isDate(time("2009-02-03T19:00:00.000Z"), tz)); - } - - /** - * test DayIntervalRounding, (interval >= 12h) with time zone shift - */ - public void testDayIntervalRounding() { - DateTimeZone tz = DateTimeZone.forOffsetHours(-8); - Rounding tzRounding = Rounding.builder(TimeValue.timeValueHours(12)).timeZone(tz).build(); - assertThat(tzRounding.round(time("2009-02-03T00:01:01")), isDate(time("2009-02-02T20:00:00.000Z"), tz)); - assertThat(tzRounding.nextRoundingValue(time("2009-02-02T20:00:00.000Z")), isDate(time("2009-02-03T08:00:00.000Z"), tz)); - - assertThat(tzRounding.round(time("2009-02-03T13:01:01")), isDate(time("2009-02-03T08:00:00.000Z"), tz)); - assertThat(tzRounding.nextRoundingValue(time("2009-02-03T08:00:00.000Z")), isDate(time("2009-02-03T20:00:00.000Z"), tz)); - } - - public void testDayRounding() { - int timezoneOffset = -2; - Rounding tzRounding = Rounding.builder(DateTimeUnit.DAY_OF_MONTH).timeZone(DateTimeZone.forOffsetHours(timezoneOffset)).build(); - assertThat(tzRounding.round(0), equalTo(0L - TimeValue.timeValueHours(24 + timezoneOffset).millis())); - assertThat( - tzRounding.nextRoundingValue(0L - TimeValue.timeValueHours(24 + timezoneOffset).millis()), - equalTo(TimeValue.timeValueHours(-timezoneOffset).millis()) - ); - - DateTimeZone tz = DateTimeZone.forID("-08:00"); - tzRounding = Rounding.builder(DateTimeUnit.DAY_OF_MONTH).timeZone(tz).build(); - assertThat(tzRounding.round(time("2012-04-01T04:15:30Z")), isDate(time("2012-03-31T08:00:00Z"), tz)); - - tzRounding = Rounding.builder(DateTimeUnit.MONTH_OF_YEAR).timeZone(tz).build(); - assertThat(tzRounding.round(time("2012-04-01T04:15:30Z")), equalTo(time("2012-03-01T08:00:00Z"))); - - // date in Feb-3rd, but still in Feb-2nd in -02:00 timezone - tz = DateTimeZone.forID("-02:00"); - tzRounding = Rounding.builder(DateTimeUnit.DAY_OF_MONTH).timeZone(tz).build(); - assertThat(tzRounding.round(time("2009-02-03T01:01:01")), isDate(time("2009-02-02T02:00:00"), tz)); - assertThat(tzRounding.nextRoundingValue(time("2009-02-02T02:00:00")), isDate(time("2009-02-03T02:00:00"), tz)); - - // date in Feb-3rd, also in -02:00 timezone - tzRounding = Rounding.builder(DateTimeUnit.DAY_OF_MONTH).timeZone(tz).build(); - assertThat(tzRounding.round(time("2009-02-03T02:01:01")), isDate(time("2009-02-03T02:00:00"), tz)); - assertThat(tzRounding.nextRoundingValue(time("2009-02-03T02:00:00")), isDate(time("2009-02-04T02:00:00"), tz)); - } - - public void testTimeRounding() { - // hour unit - DateTimeZone tz = DateTimeZone.forOffsetHours(-2); - Rounding tzRounding = Rounding.builder(DateTimeUnit.HOUR_OF_DAY).timeZone(tz).build(); - assertThat(tzRounding.round(0), equalTo(0L)); - assertThat(tzRounding.nextRoundingValue(0L), equalTo(TimeValue.timeValueHours(1L).getMillis())); - - assertThat(tzRounding.round(time("2009-02-03T01:01:01")), isDate(time("2009-02-03T01:00:00"), tz)); - assertThat(tzRounding.nextRoundingValue(time("2009-02-03T01:00:00")), isDate(time("2009-02-03T02:00:00"), tz)); - } - - public void testTimeUnitRoundingDST() { - Rounding tzRounding; - // testing savings to non savings switch - DateTimeZone cet = DateTimeZone.forID("CET"); - tzRounding = Rounding.builder(DateTimeUnit.HOUR_OF_DAY).timeZone(cet).build(); - assertThat(tzRounding.round(time("2014-10-26T01:01:01", cet)), isDate(time("2014-10-26T01:00:00+02:00"), cet)); - assertThat(tzRounding.nextRoundingValue(time("2014-10-26T01:00:00", cet)), isDate(time("2014-10-26T02:00:00+02:00"), cet)); - assertThat(tzRounding.nextRoundingValue(time("2014-10-26T02:00:00", cet)), isDate(time("2014-10-26T02:00:00+01:00"), cet)); - - // testing non savings to savings switch - tzRounding = Rounding.builder(DateTimeUnit.HOUR_OF_DAY).timeZone(cet).build(); - assertThat(tzRounding.round(time("2014-03-30T01:01:01", cet)), isDate(time("2014-03-30T01:00:00+01:00"), cet)); - assertThat(tzRounding.nextRoundingValue(time("2014-03-30T01:00:00", cet)), isDate(time("2014-03-30T03:00:00", cet), cet)); - assertThat(tzRounding.nextRoundingValue(time("2014-03-30T03:00:00", cet)), isDate(time("2014-03-30T04:00:00", cet), cet)); - - // testing non savings to savings switch (America/Chicago) - DateTimeZone chg = DateTimeZone.forID("America/Chicago"); - Rounding tzRounding_utc = Rounding.builder(DateTimeUnit.HOUR_OF_DAY).timeZone(DateTimeZone.UTC).build(); - assertThat(tzRounding.round(time("2014-03-09T03:01:01", chg)), isDate(time("2014-03-09T03:00:00", chg), chg)); - - Rounding tzRounding_chg = Rounding.builder(DateTimeUnit.HOUR_OF_DAY).timeZone(chg).build(); - assertThat(tzRounding_chg.round(time("2014-03-09T03:01:01", chg)), isDate(time("2014-03-09T03:00:00", chg), chg)); - - // testing savings to non savings switch 2013 (America/Chicago) - assertThat(tzRounding_utc.round(time("2013-11-03T06:01:01", chg)), isDate(time("2013-11-03T06:00:00", chg), chg)); - assertThat(tzRounding_chg.round(time("2013-11-03T06:01:01", chg)), isDate(time("2013-11-03T06:00:00", chg), chg)); - - // testing savings to non savings switch 2014 (America/Chicago) - assertThat(tzRounding_utc.round(time("2014-11-02T06:01:01", chg)), isDate(time("2014-11-02T06:00:00", chg), chg)); - assertThat(tzRounding_chg.round(time("2014-11-02T06:01:01", chg)), isDate(time("2014-11-02T06:00:00", chg), chg)); - } - - /** - * Randomized test on TimeUnitRounding. Test uses random - * {@link DateTimeUnit} and {@link DateTimeZone} and often (50% of the time) - * chooses test dates that are exactly on or close to offset changes (e.g. - * DST) in the chosen time zone. - *

- * It rounds the test date down and up and performs various checks on the - * rounding unit interval that is defined by this. Assumptions tested are - * described in - * {@link #assertInterval(long, long, long, Rounding, DateTimeZone)} - */ - public void testRoundingRandom() { - for (int i = 0; i < 1000; ++i) { - DateTimeUnit timeUnit = randomTimeUnit(); - DateTimeZone tz = randomDateTimeZone(); - Rounding rounding = new Rounding.TimeUnitRounding(timeUnit, tz); - long date = Math.abs(randomLong() % (2 * (long) 10e11)); // 1970-01-01T00:00:00Z - 2033-05-18T05:33:20.000+02:00 - long unitMillis = timeUnit.field(tz).getDurationField().getUnitMillis(); - if (randomBoolean()) { - nastyDate(date, tz, unitMillis); - } - final long roundedDate = rounding.round(date); - final long nextRoundingValue = rounding.nextRoundingValue(roundedDate); - - assertInterval(roundedDate, date, nextRoundingValue, rounding, tz); - - // check correct unit interval width for units smaller than a day, they should be fixed size except for transitions - if (unitMillis <= DateTimeConstants.MILLIS_PER_DAY) { - // if the interval defined didn't cross timezone offset transition, it should cover unitMillis width - if (tz.getOffset(roundedDate - 1) == tz.getOffset(nextRoundingValue + 1)) { - assertThat( - "unit interval width not as expected for [" + timeUnit + "], [" + tz + "] at " + new DateTime(roundedDate), - nextRoundingValue - roundedDate, - equalTo(unitMillis) - ); - } - } - } - } - - /** - * To be even more nasty, go to a transition in the selected time zone. - * In one third of the cases stay there, otherwise go half a unit back or forth - */ - private static long nastyDate(long initialDate, DateTimeZone timezone, long unitMillis) { - long date = timezone.nextTransition(initialDate); - if (randomBoolean()) { - return date + (randomLong() % unitMillis); // positive and negative offset possible - } else { - return date; - } - } - - /** - * test DST end with interval rounding - * CET: 25 October 2015, 03:00:00 clocks were turned backward 1 hour to 25 October 2015, 02:00:00 local standard time - */ - public void testTimeIntervalCET_DST_End() { - long interval = TimeUnit.MINUTES.toMillis(20); - DateTimeZone tz = DateTimeZone.forID("CET"); - Rounding rounding = new TimeIntervalRounding(interval, tz); - - assertThat(rounding.round(time("2015-10-25T01:55:00+02:00")), isDate(time("2015-10-25T01:40:00+02:00"), tz)); - assertThat(rounding.round(time("2015-10-25T02:15:00+02:00")), isDate(time("2015-10-25T02:00:00+02:00"), tz)); - assertThat(rounding.round(time("2015-10-25T02:35:00+02:00")), isDate(time("2015-10-25T02:20:00+02:00"), tz)); - assertThat(rounding.round(time("2015-10-25T02:55:00+02:00")), isDate(time("2015-10-25T02:40:00+02:00"), tz)); - // after DST shift - assertThat(rounding.round(time("2015-10-25T02:15:00+01:00")), isDate(time("2015-10-25T02:00:00+01:00"), tz)); - assertThat(rounding.round(time("2015-10-25T02:35:00+01:00")), isDate(time("2015-10-25T02:20:00+01:00"), tz)); - assertThat(rounding.round(time("2015-10-25T02:55:00+01:00")), isDate(time("2015-10-25T02:40:00+01:00"), tz)); - assertThat(rounding.round(time("2015-10-25T03:15:00+01:00")), isDate(time("2015-10-25T03:00:00+01:00"), tz)); - } - - /** - * test DST start with interval rounding - * CET: 27 March 2016, 02:00:00 clocks were turned forward 1 hour to 27 March 2016, 03:00:00 local daylight time - */ - public void testTimeIntervalCET_DST_Start() { - long interval = TimeUnit.MINUTES.toMillis(20); - DateTimeZone tz = DateTimeZone.forID("CET"); - Rounding rounding = new TimeIntervalRounding(interval, tz); - // test DST start - assertThat(rounding.round(time("2016-03-27T01:55:00+01:00")), isDate(time("2016-03-27T01:40:00+01:00"), tz)); - assertThat(rounding.round(time("2016-03-27T02:00:00+01:00")), isDate(time("2016-03-27T03:00:00+02:00"), tz)); - assertThat(rounding.round(time("2016-03-27T03:15:00+02:00")), isDate(time("2016-03-27T03:00:00+02:00"), tz)); - assertThat(rounding.round(time("2016-03-27T03:35:00+02:00")), isDate(time("2016-03-27T03:20:00+02:00"), tz)); - } - - /** - * test DST start with offset not fitting interval, e.g. Asia/Kathmandu - * adding 15min on 1986-01-01T00:00:00 the interval from - * 1986-01-01T00:15:00+05:45 to 1986-01-01T00:20:00+05:45 to only be 5min - * long - */ - public void testTimeInterval_Kathmandu_DST_Start() { - long interval = TimeUnit.MINUTES.toMillis(20); - DateTimeZone tz = DateTimeZone.forID("Asia/Kathmandu"); - Rounding rounding = new TimeIntervalRounding(interval, tz); - assertThat(rounding.round(time("1985-12-31T23:55:00+05:30")), isDate(time("1985-12-31T23:40:00+05:30"), tz)); - assertThat(rounding.round(time("1986-01-01T00:16:00+05:45")), isDate(time("1986-01-01T00:15:00+05:45"), tz)); - assertThat(time("1986-01-01T00:15:00+05:45") - time("1985-12-31T23:40:00+05:30"), equalTo(TimeUnit.MINUTES.toMillis(20))); - assertThat(rounding.round(time("1986-01-01T00:26:00+05:45")), isDate(time("1986-01-01T00:20:00+05:45"), tz)); - assertThat(time("1986-01-01T00:20:00+05:45") - time("1986-01-01T00:15:00+05:45"), equalTo(TimeUnit.MINUTES.toMillis(5))); - assertThat(rounding.round(time("1986-01-01T00:46:00+05:45")), isDate(time("1986-01-01T00:40:00+05:45"), tz)); - assertThat(time("1986-01-01T00:40:00+05:45") - time("1986-01-01T00:20:00+05:45"), equalTo(TimeUnit.MINUTES.toMillis(20))); - } - - /** - * Special test for intervals that don't fit evenly into rounding interval. - * In this case, when interval crosses DST transition point, rounding in local - * time can land in a DST gap which results in wrong UTC rounding values. - */ - public void testIntervalRounding_NotDivisibleInteval() { - DateTimeZone tz = DateTimeZone.forID("CET"); - long interval = TimeUnit.MINUTES.toMillis(14); - Rounding rounding = new Rounding.TimeIntervalRounding(interval, tz); - - assertThat(rounding.round(time("2016-03-27T01:41:00+01:00")), isDate(time("2016-03-27T01:30:00+01:00"), tz)); - assertThat(rounding.round(time("2016-03-27T01:51:00+01:00")), isDate(time("2016-03-27T01:44:00+01:00"), tz)); - assertThat(rounding.round(time("2016-03-27T01:59:00+01:00")), isDate(time("2016-03-27T01:58:00+01:00"), tz)); - assertThat(rounding.round(time("2016-03-27T03:05:00+02:00")), isDate(time("2016-03-27T03:00:00+02:00"), tz)); - assertThat(rounding.round(time("2016-03-27T03:12:00+02:00")), isDate(time("2016-03-27T03:08:00+02:00"), tz)); - assertThat(rounding.round(time("2016-03-27T03:25:00+02:00")), isDate(time("2016-03-27T03:22:00+02:00"), tz)); - assertThat(rounding.round(time("2016-03-27T03:39:00+02:00")), isDate(time("2016-03-27T03:36:00+02:00"), tz)); - } - - /** - * Test for half day rounding intervals scrossing DST. - */ - public void testIntervalRounding_HalfDay_DST() { - DateTimeZone tz = DateTimeZone.forID("CET"); - long interval = TimeUnit.HOURS.toMillis(12); - Rounding rounding = new Rounding.TimeIntervalRounding(interval, tz); - - assertThat(rounding.round(time("2016-03-26T01:00:00+01:00")), isDate(time("2016-03-26T00:00:00+01:00"), tz)); - assertThat(rounding.round(time("2016-03-26T13:00:00+01:00")), isDate(time("2016-03-26T12:00:00+01:00"), tz)); - assertThat(rounding.round(time("2016-03-27T01:00:00+01:00")), isDate(time("2016-03-27T00:00:00+01:00"), tz)); - assertThat(rounding.round(time("2016-03-27T13:00:00+02:00")), isDate(time("2016-03-27T12:00:00+02:00"), tz)); - assertThat(rounding.round(time("2016-03-28T01:00:00+02:00")), isDate(time("2016-03-28T00:00:00+02:00"), tz)); - assertThat(rounding.round(time("2016-03-28T13:00:00+02:00")), isDate(time("2016-03-28T12:00:00+02:00"), tz)); - } - - /** - * randomized test on {@link TimeIntervalRounding} with random interval and time zone offsets - */ - public void testIntervalRoundingRandom() { - for (int i = 0; i < 1000; i++) { - TimeUnit unit = randomFrom(new TimeUnit[] { TimeUnit.MINUTES, TimeUnit.HOURS, TimeUnit.DAYS }); - long interval = unit.toMillis(randomIntBetween(1, 365)); - DateTimeZone tz = randomDateTimeZone(); - Rounding rounding = new Rounding.TimeIntervalRounding(interval, tz); - long mainDate = Math.abs(randomLong() % (2 * (long) 10e11)); // 1970-01-01T00:00:00Z - 2033-05-18T05:33:20.000+02:00 - if (randomBoolean()) { - mainDate = nastyDate(mainDate, tz, interval); - } - // check two intervals around date - long previousRoundedValue = Long.MIN_VALUE; - for (long date = mainDate - 2 * interval; date < mainDate + 2 * interval; date += interval / 2) { - try { - final long roundedDate = rounding.round(date); - final long nextRoundingValue = rounding.nextRoundingValue(roundedDate); - assertThat("Rounding should be idempotent", roundedDate, equalTo(rounding.round(roundedDate))); - assertThat("Rounded value smaller or equal than unrounded", roundedDate, lessThanOrEqualTo(date)); - assertThat( - "Values smaller than rounded value should round further down", - rounding.round(roundedDate - 1), - lessThan(roundedDate) - ); - assertThat("Rounding should be >= previous rounding value", roundedDate, greaterThanOrEqualTo(previousRoundedValue)); - - if (tz.isFixed()) { - assertThat("NextRounding value should be greater than date", nextRoundingValue, greaterThan(roundedDate)); - assertThat( - "NextRounding value should be interval from rounded value", - nextRoundingValue - roundedDate, - equalTo(interval) - ); - assertThat( - "NextRounding value should be a rounded date", - nextRoundingValue, - equalTo(rounding.round(nextRoundingValue)) - ); - } - previousRoundedValue = roundedDate; - } catch (AssertionError e) { - logger.error("Rounding error at {}, timezone {}, interval: {},", new DateTime(date, tz), tz, interval); - throw e; - } - } - } - } - - /** - * Test that rounded values are always greater or equal to last rounded value if date is increasing. - * The example covers an interval around 2011-10-30T02:10:00+01:00, time zone CET, interval: 2700000ms - */ - public void testIntervalRoundingMonotonic_CET() { - long interval = TimeUnit.MINUTES.toMillis(45); - DateTimeZone tz = DateTimeZone.forID("CET"); - Rounding rounding = new Rounding.TimeIntervalRounding(interval, tz); - List> expectedDates = new ArrayList<>(); - // first date is the date to be rounded, second the expected result - expectedDates.add(new Tuple<>("2011-10-30T01:40:00.000+02:00", "2011-10-30T01:30:00.000+02:00")); - expectedDates.add(new Tuple<>("2011-10-30T02:02:30.000+02:00", "2011-10-30T01:30:00.000+02:00")); - expectedDates.add(new Tuple<>("2011-10-30T02:25:00.000+02:00", "2011-10-30T02:15:00.000+02:00")); - expectedDates.add(new Tuple<>("2011-10-30T02:47:30.000+02:00", "2011-10-30T02:15:00.000+02:00")); - expectedDates.add(new Tuple<>("2011-10-30T02:10:00.000+01:00", "2011-10-30T02:15:00.000+02:00")); - expectedDates.add(new Tuple<>("2011-10-30T02:32:30.000+01:00", "2011-10-30T02:15:00.000+01:00")); - expectedDates.add(new Tuple<>("2011-10-30T02:55:00.000+01:00", "2011-10-30T02:15:00.000+01:00")); - expectedDates.add(new Tuple<>("2011-10-30T03:17:30.000+01:00", "2011-10-30T03:00:00.000+01:00")); - - long previousDate = Long.MIN_VALUE; - for (Tuple dates : expectedDates) { - final long roundedDate = rounding.round(time(dates.v1())); - assertThat(roundedDate, isDate(time(dates.v2()), tz)); - assertThat(roundedDate, greaterThanOrEqualTo(previousDate)); - previousDate = roundedDate; - } - // here's what this means for interval widths - assertEquals(TimeUnit.MINUTES.toMillis(45), time("2011-10-30T02:15:00.000+02:00") - time("2011-10-30T01:30:00.000+02:00")); - assertEquals(TimeUnit.MINUTES.toMillis(60), time("2011-10-30T02:15:00.000+01:00") - time("2011-10-30T02:15:00.000+02:00")); - assertEquals(TimeUnit.MINUTES.toMillis(45), time("2011-10-30T03:00:00.000+01:00") - time("2011-10-30T02:15:00.000+01:00")); - } - - /** - * special test for DST switch from #9491 - */ - public void testAmbiguousHoursAfterDSTSwitch() { - Rounding tzRounding; - final DateTimeZone tz = DateTimeZone.forID("Asia/Jerusalem"); - tzRounding = Rounding.builder(DateTimeUnit.HOUR_OF_DAY).timeZone(tz).build(); - assertThat(tzRounding.round(time("2014-10-26T00:30:00+03:00")), isDate(time("2014-10-26T00:00:00+03:00"), tz)); - assertThat(tzRounding.round(time("2014-10-26T01:30:00+03:00")), isDate(time("2014-10-26T01:00:00+03:00"), tz)); - // the utc date for "2014-10-25T03:00:00+03:00" and "2014-10-25T03:00:00+02:00" is the same, local time turns back 1h here - assertThat(time("2014-10-26T03:00:00+03:00"), isDate(time("2014-10-26T02:00:00+02:00"), tz)); - assertThat(tzRounding.round(time("2014-10-26T01:30:00+02:00")), isDate(time("2014-10-26T01:00:00+02:00"), tz)); - assertThat(tzRounding.round(time("2014-10-26T02:30:00+02:00")), isDate(time("2014-10-26T02:00:00+02:00"), tz)); - - // Day interval - tzRounding = Rounding.builder(DateTimeUnit.DAY_OF_MONTH).timeZone(tz).build(); - assertThat(tzRounding.round(time("2014-11-11T17:00:00", tz)), isDate(time("2014-11-11T00:00:00", tz), tz)); - // DST on - assertThat(tzRounding.round(time("2014-08-11T17:00:00", tz)), isDate(time("2014-08-11T00:00:00", tz), tz)); - // Day of switching DST on -> off - assertThat(tzRounding.round(time("2014-10-26T17:00:00", tz)), isDate(time("2014-10-26T00:00:00", tz), tz)); - // Day of switching DST off -> on - assertThat(tzRounding.round(time("2015-03-27T17:00:00", tz)), isDate(time("2015-03-27T00:00:00", tz), tz)); - - // Month interval - tzRounding = Rounding.builder(DateTimeUnit.MONTH_OF_YEAR).timeZone(tz).build(); - assertThat(tzRounding.round(time("2014-11-11T17:00:00", tz)), isDate(time("2014-11-01T00:00:00", tz), tz)); - // DST on - assertThat(tzRounding.round(time("2014-10-10T17:00:00", tz)), isDate(time("2014-10-01T00:00:00", tz), tz)); - - // Year interval - tzRounding = Rounding.builder(DateTimeUnit.YEAR_OF_CENTURY).timeZone(tz).build(); - assertThat(tzRounding.round(time("2014-11-11T17:00:00", tz)), isDate(time("2014-01-01T00:00:00", tz), tz)); - - // Two timestamps in same year and different timezone offset ("Double buckets" issue - #9491) - tzRounding = Rounding.builder(DateTimeUnit.YEAR_OF_CENTURY).timeZone(tz).build(); - assertThat(tzRounding.round(time("2014-11-11T17:00:00", tz)), isDate(tzRounding.round(time("2014-08-11T17:00:00", tz)), tz)); - } - - /** - * test for #10025, strict local to UTC conversion can cause joda exceptions - * on DST start - */ - public void testLenientConversionDST() { - DateTimeZone tz = DateTimeZone.forID("America/Sao_Paulo"); - long start = time("2014-10-18T20:50:00.000", tz); - long end = time("2014-10-19T01:00:00.000", tz); - Rounding tzRounding = new Rounding.TimeUnitRounding(DateTimeUnit.MINUTES_OF_HOUR, tz); - Rounding dayTzRounding = new Rounding.TimeIntervalRounding(60000, tz); - for (long time = start; time < end; time = time + 60000) { - assertThat(tzRounding.nextRoundingValue(time), greaterThan(time)); - assertThat(dayTzRounding.nextRoundingValue(time), greaterThan(time)); - } - } - - public void testEdgeCasesTransition() { - { - // standard +/-1 hour DST transition, CET - DateTimeUnit timeUnit = DateTimeUnit.HOUR_OF_DAY; - DateTimeZone tz = DateTimeZone.forID("CET"); - Rounding rounding = new Rounding.TimeUnitRounding(timeUnit, tz); - - // 29 Mar 2015 - Daylight Saving Time Started - // at 02:00:00 clocks were turned forward 1 hour to 03:00:00 - assertInterval(time("2015-03-29T00:00:00.000+01:00"), time("2015-03-29T01:00:00.000+01:00"), rounding, 60, tz); - assertInterval(time("2015-03-29T01:00:00.000+01:00"), time("2015-03-29T03:00:00.000+02:00"), rounding, 60, tz); - assertInterval(time("2015-03-29T03:00:00.000+02:00"), time("2015-03-29T04:00:00.000+02:00"), rounding, 60, tz); - - // 25 Oct 2015 - Daylight Saving Time Ended - // at 03:00:00 clocks were turned backward 1 hour to 02:00:00 - assertInterval(time("2015-10-25T01:00:00.000+02:00"), time("2015-10-25T02:00:00.000+02:00"), rounding, 60, tz); - assertInterval(time("2015-10-25T02:00:00.000+02:00"), time("2015-10-25T02:00:00.000+01:00"), rounding, 60, tz); - assertInterval(time("2015-10-25T02:00:00.000+01:00"), time("2015-10-25T03:00:00.000+01:00"), rounding, 60, tz); - } - - { - // time zone "Asia/Kathmandu" - // 1 Jan 1986 - Time Zone Change (IST → NPT), at 00:00:00 clocks were turned forward 00:15 minutes - // - // hour rounding is stable before 1985-12-31T23:00:00.000 and after 1986-01-01T01:00:00.000+05:45 - // the interval between is 105 minutes long because the hour after transition starts at 00:15 - // which is not a round value for hourly rounding - DateTimeUnit timeUnit = DateTimeUnit.HOUR_OF_DAY; - DateTimeZone tz = DateTimeZone.forID("Asia/Kathmandu"); - Rounding rounding = new Rounding.TimeUnitRounding(timeUnit, tz); - - assertInterval(time("1985-12-31T22:00:00.000+05:30"), time("1985-12-31T23:00:00.000+05:30"), rounding, 60, tz); - assertInterval(time("1985-12-31T23:00:00.000+05:30"), time("1986-01-01T01:00:00.000+05:45"), rounding, 105, tz); - assertInterval(time("1986-01-01T01:00:00.000+05:45"), time("1986-01-01T02:00:00.000+05:45"), rounding, 60, tz); - } - - { - // time zone "Australia/Lord_Howe" - // 3 Mar 1991 - Daylight Saving Time Ended - // at 02:00:00 clocks were turned backward 0:30 hours to Sunday, 3 March 1991, 01:30:00 - DateTimeUnit timeUnit = DateTimeUnit.HOUR_OF_DAY; - DateTimeZone tz = DateTimeZone.forID("Australia/Lord_Howe"); - Rounding rounding = new Rounding.TimeUnitRounding(timeUnit, tz); - - assertInterval(time("1991-03-03T00:00:00.000+11:00"), time("1991-03-03T01:00:00.000+11:00"), rounding, 60, tz); - assertInterval(time("1991-03-03T01:00:00.000+11:00"), time("1991-03-03T02:00:00.000+10:30"), rounding, 90, tz); - assertInterval(time("1991-03-03T02:00:00.000+10:30"), time("1991-03-03T03:00:00.000+10:30"), rounding, 60, tz); - - // 27 Oct 1991 - Daylight Saving Time Started - // at 02:00:00 clocks were turned forward 0:30 hours to 02:30:00 - assertInterval(time("1991-10-27T00:00:00.000+10:30"), time("1991-10-27T01:00:00.000+10:30"), rounding, 60, tz); - // the interval containing the switch time is 90 minutes long - assertInterval(time("1991-10-27T01:00:00.000+10:30"), time("1991-10-27T03:00:00.000+11:00"), rounding, 90, tz); - assertInterval(time("1991-10-27T03:00:00.000+11:00"), time("1991-10-27T04:00:00.000+11:00"), rounding, 60, tz); - } - - { - // time zone "Pacific/Chatham" - // 5 Apr 2015 - Daylight Saving Time Ended - // at 03:45:00 clocks were turned backward 1 hour to 02:45:00 - DateTimeUnit timeUnit = DateTimeUnit.HOUR_OF_DAY; - DateTimeZone tz = DateTimeZone.forID("Pacific/Chatham"); - Rounding rounding = new Rounding.TimeUnitRounding(timeUnit, tz); - - assertInterval(time("2015-04-05T02:00:00.000+13:45"), time("2015-04-05T03:00:00.000+13:45"), rounding, 60, tz); - assertInterval(time("2015-04-05T03:00:00.000+13:45"), time("2015-04-05T03:00:00.000+12:45"), rounding, 60, tz); - assertInterval(time("2015-04-05T03:00:00.000+12:45"), time("2015-04-05T04:00:00.000+12:45"), rounding, 60, tz); - - // 27 Sep 2015 - Daylight Saving Time Started - // at 02:45:00 clocks were turned forward 1 hour to 03:45:00 - - assertInterval(time("2015-09-27T01:00:00.000+12:45"), time("2015-09-27T02:00:00.000+12:45"), rounding, 60, tz); - assertInterval(time("2015-09-27T02:00:00.000+12:45"), time("2015-09-27T04:00:00.000+13:45"), rounding, 60, tz); - assertInterval(time("2015-09-27T04:00:00.000+13:45"), time("2015-09-27T05:00:00.000+13:45"), rounding, 60, tz); - } - } - - public void testDST_Europe_Rome() { - // time zone "Europe/Rome", rounding to days. Rome had two midnights on the day the clocks went back in 1978, and - // timeZone.convertLocalToUTC() gives the later of the two because Rome is east of UTC, whereas we want the earlier. - - DateTimeUnit timeUnit = DateTimeUnit.DAY_OF_MONTH; - DateTimeZone tz = DateTimeZone.forID("Europe/Rome"); - Rounding rounding = new TimeUnitRounding(timeUnit, tz); - - { - long timeBeforeFirstMidnight = time("1978-09-30T23:59:00+02:00"); - long floor = rounding.round(timeBeforeFirstMidnight); - assertThat(floor, isDate(time("1978-09-30T00:00:00+02:00"), tz)); - } - - { - long timeBetweenMidnights = time("1978-10-01T00:30:00+02:00"); - long floor = rounding.round(timeBetweenMidnights); - assertThat(floor, isDate(time("1978-10-01T00:00:00+02:00"), tz)); - } - - { - long timeAfterSecondMidnight = time("1978-10-01T00:30:00+01:00"); - long floor = rounding.round(timeAfterSecondMidnight); - assertThat(floor, isDate(time("1978-10-01T00:00:00+02:00"), tz)); - - long prevFloor = rounding.round(floor - 1); - assertThat(prevFloor, lessThan(floor)); - assertThat(prevFloor, isDate(time("1978-09-30T00:00:00+02:00"), tz)); - } - } - - /** - * Test for a time zone whose days overlap because the clocks are set back across midnight at the end of DST. - */ - public void testDST_America_St_Johns() { - // time zone "America/St_Johns", rounding to days. - DateTimeUnit timeUnit = DateTimeUnit.DAY_OF_MONTH; - DateTimeZone tz = DateTimeZone.forID("America/St_Johns"); - Rounding rounding = new TimeUnitRounding(timeUnit, tz); - - // 29 October 2006 - Daylight Saving Time ended, changing the UTC offset from -02:30 to -03:30. - // This happened at 02:31 UTC, 00:01 local time, so the clocks were set back 1 hour to 23:01 on the 28th. - // This means that 2006-10-29 has _two_ midnights, one in the -02:30 offset and one in the -03:30 offset. - // Only the first of these is considered "rounded". Moreover, the extra time between 23:01 and 23:59 - // should be considered as part of the 28th even though it comes after midnight on the 29th. - - { - // Times before the first midnight should be rounded up to the first midnight. - long timeBeforeFirstMidnight = time("2006-10-28T23:30:00.000-02:30"); - long floor = rounding.round(timeBeforeFirstMidnight); - assertThat(floor, isDate(time("2006-10-28T00:00:00.000-02:30"), tz)); - long ceiling = rounding.nextRoundingValue(timeBeforeFirstMidnight); - assertThat(ceiling, isDate(time("2006-10-29T00:00:00.000-02:30"), tz)); - assertInterval(floor, timeBeforeFirstMidnight, ceiling, rounding, tz); - } - - { - // Times between the two midnights which are on the later day should be rounded down to the later day's midnight. - long timeBetweenMidnights = time("2006-10-29T00:00:30.000-02:30"); - // (this is halfway through the last minute before the clocks changed, in which local time was ambiguous) - - long floor = rounding.round(timeBetweenMidnights); - assertThat(floor, isDate(time("2006-10-29T00:00:00.000-02:30"), tz)); - - long ceiling = rounding.nextRoundingValue(timeBetweenMidnights); - assertThat(ceiling, isDate(time("2006-10-30T00:00:00.000-03:30"), tz)); - - assertInterval(floor, timeBetweenMidnights, ceiling, rounding, tz); - } - - { - // Times between the two midnights which are on the earlier day should be rounded down to the earlier day's midnight. - long timeBetweenMidnights = time("2006-10-28T23:30:00.000-03:30"); - // (this is halfway through the hour after the clocks changed, in which local time was ambiguous) - - long floor = rounding.round(timeBetweenMidnights); - assertThat(floor, isDate(time("2006-10-28T00:00:00.000-02:30"), tz)); - - long ceiling = rounding.nextRoundingValue(timeBetweenMidnights); - assertThat(ceiling, isDate(time("2006-10-29T00:00:00.000-02:30"), tz)); - - assertInterval(floor, timeBetweenMidnights, ceiling, rounding, tz); - } - - { - // Times after the second midnight should be rounded down to the first midnight. - long timeAfterSecondMidnight = time("2006-10-29T06:00:00.000-03:30"); - long floor = rounding.round(timeAfterSecondMidnight); - assertThat(floor, isDate(time("2006-10-29T00:00:00.000-02:30"), tz)); - long ceiling = rounding.nextRoundingValue(timeAfterSecondMidnight); - assertThat(ceiling, isDate(time("2006-10-30T00:00:00.000-03:30"), tz)); - assertInterval(floor, timeAfterSecondMidnight, ceiling, rounding, tz); - } - } - - /** - * tests for dst transition with overlaps and day roundings. - */ - public void testDST_END_Edgecases() { - // First case, dst happens at 1am local time, switching back one hour. - // We want the overlapping hour to count for the next day, making it a 25h interval - - DateTimeUnit timeUnit = DateTimeUnit.DAY_OF_MONTH; - DateTimeZone tz = DateTimeZone.forID("Atlantic/Azores"); - Rounding rounding = new Rounding.TimeUnitRounding(timeUnit, tz); - - // Sunday, 29 October 2000, 01:00:00 clocks were turned backward 1 hour - // to Sunday, 29 October 2000, 00:00:00 local standard time instead - // which means there were two midnights that day. - - long midnightBeforeTransition = time("2000-10-29T00:00:00", tz); - long midnightOfTransition = time("2000-10-29T00:00:00-01:00"); - assertEquals(60L * 60L * 1000L, midnightOfTransition - midnightBeforeTransition); - long nextMidnight = time("2000-10-30T00:00:00", tz); - - assertInterval(midnightBeforeTransition, nextMidnight, rounding, 25 * 60, tz); - - assertThat(rounding.round(time("2000-10-29T06:00:00-01:00")), isDate(time("2000-10-29T00:00:00Z"), tz)); - - // Second case, dst happens at 0am local time, switching back one hour to 23pm local time. - // We want the overlapping hour to count for the previous day here - - tz = DateTimeZone.forID("America/Lima"); - rounding = new Rounding.TimeUnitRounding(timeUnit, tz); - - // Sunday, 1 April 1990, 00:00:00 clocks were turned backward 1 hour to - // Saturday, 31 March 1990, 23:00:00 local standard time instead - - midnightBeforeTransition = time("1990-03-31T00:00:00.000-04:00"); - nextMidnight = time("1990-04-01T00:00:00.000-05:00"); - assertInterval(midnightBeforeTransition, nextMidnight, rounding, 25 * 60, tz); - - // make sure the next interval is 24h long again - long midnightAfterTransition = time("1990-04-01T00:00:00.000-05:00"); - nextMidnight = time("1990-04-02T00:00:00.000-05:00"); - assertInterval(midnightAfterTransition, nextMidnight, rounding, 24 * 60, tz); - } - - /** - * Test that time zones are correctly parsed. There is a bug with - * Joda 2.9.4 (see https://github.com/JodaOrg/joda-time/issues/373) - */ - public void testsTimeZoneParsing() { - final DateTime expected = new DateTime(2016, 11, 10, 5, 37, 59, randomDateTimeZone()); - - // Formatter used to print and parse the sample date. - // Printing the date works but parsing it back fails - // with Joda 2.9.4 - DateTimeFormatter formatter = DateTimeFormat.forPattern("YYYY-MM-dd'T'HH:mm:ss " + randomFrom("ZZZ", "[ZZZ]", "'['ZZZ']'")); - - String dateTimeAsString = formatter.print(expected); - assertThat(dateTimeAsString, startsWith("2016-11-10T05:37:59 ")); - - DateTime parsedDateTime = formatter.parseDateTime(dateTimeAsString); - assertThat(parsedDateTime.getZone(), equalTo(expected.getZone())); - } - - private static void assertInterval(long rounded, long nextRoundingValue, Rounding rounding, int minutes, DateTimeZone tz) { - assertInterval(rounded, dateBetween(rounded, nextRoundingValue), nextRoundingValue, rounding, tz); - assertEquals(DateTimeConstants.MILLIS_PER_MINUTE * minutes, nextRoundingValue - rounded); - } - - /** - * perform a number on assertions and checks on {@link TimeUnitRounding} intervals - * @param rounded the expected low end of the rounding interval - * @param unrounded a date in the interval to be checked for rounding - * @param nextRoundingValue the expected upper end of the rounding interval - * @param rounding the rounding instance - */ - private static void assertInterval(long rounded, long unrounded, long nextRoundingValue, Rounding rounding, DateTimeZone tz) { - assertThat("rounding should be idempotent ", rounding.round(rounded), isDate(rounded, tz)); - assertThat("rounded value smaller or equal than unrounded" + rounding, rounded, lessThanOrEqualTo(unrounded)); - assertThat("values less than rounded should round further down" + rounding, rounding.round(rounded - 1), lessThan(rounded)); - assertThat("nextRounding value should be a rounded date", rounding.round(nextRoundingValue), isDate(nextRoundingValue, tz)); - assertThat( - "values above nextRounding should round down there", - rounding.round(nextRoundingValue + 1), - isDate(nextRoundingValue, tz) - ); - - if (isTimeWithWellDefinedRounding(tz, unrounded)) { - assertThat("nextRounding value should be greater than date" + rounding, nextRoundingValue, greaterThan(unrounded)); - - long dateBetween = dateBetween(rounded, nextRoundingValue); - assertThat( - "dateBetween [" + new DateTime(dateBetween, tz) + "] should round down to roundedDate", - rounding.round(dateBetween), - isDate(rounded, tz) - ); - assertThat( - "dateBetween [" + new DateTime(dateBetween, tz) + "] should round up to nextRoundingValue", - rounding.nextRoundingValue(dateBetween), - isDate(nextRoundingValue, tz) - ); - } - } - - private static boolean isTimeWithWellDefinedRounding(DateTimeZone tz, long t) { - if (tz.getID().equals("America/St_Johns") - || tz.getID().equals("America/Goose_Bay") - || tz.getID().equals("America/Moncton") - || tz.getID().equals("Canada/Newfoundland")) { - - // Clocks went back at 00:01 between 1987 and 2010, causing overlapping days. - // These timezones are otherwise uninteresting, so just skip this period. - - return t <= time("1987-10-01T00:00:00Z") || t >= time("2010-12-01T00:00:00Z"); - } - - if (tz.getID().equals("Antarctica/Casey")) { - - // Clocks went back 3 hours at 02:00 on 2010-03-05, causing overlapping days. - - return t <= time("2010-03-03T00:00:00Z") || t >= time("2010-03-07T00:00:00Z"); - } - - return true; - } - - private static long dateBetween(long lower, long upper) { - long dateBetween = randomLongBetween(lower, upper - 1); - assert lower <= dateBetween && dateBetween < upper; - return dateBetween; - } - - private static DateTimeUnit randomTimeUnit() { - byte id = (byte) randomIntBetween(1, 8); - return DateTimeUnit.resolve(id); - } - - private static long time(String time) { - return time(time, DateTimeZone.UTC); - } - - private static long time(String time, DateTimeZone zone) { - return ISODateTimeFormat.dateOptionalTimeParser().withZone(zone).parseMillis(time); - } - - private static Matcher isDate(final long expected, DateTimeZone tz) { - return new TypeSafeMatcher() { - @Override - public boolean matchesSafely(final Long item) { - return expected == item.longValue(); - } - - @Override - public void describeTo(Description description) { - description.appendText(new DateTime(expected, tz) + " [" + expected + "] "); - } - - @Override - protected void describeMismatchSafely(final Long actual, final Description mismatchDescription) { - mismatchDescription.appendText(" was ").appendValue(new DateTime(actual, tz) + " [" + actual + "]"); - } - }; - } -} diff --git a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java index 1d5c2a0f01b5c..74bae7b5eb7cf 100644 --- a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java +++ b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java @@ -68,6 +68,7 @@ import org.opensearch.gateway.PersistedClusterStateService.Writer; import org.opensearch.gateway.remote.ClusterMetadataManifest; import org.opensearch.gateway.remote.RemoteClusterStateService; +import org.opensearch.gateway.remote.RemotePersistenceStats; import org.opensearch.index.recovery.RemoteStoreRestoreService; import org.opensearch.index.recovery.RemoteStoreRestoreService.RemoteRestoreResult; import org.opensearch.node.Node; @@ -86,10 +87,12 @@ import java.util.Collections; import java.util.List; import java.util.Locale; +import java.util.Optional; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Supplier; +import org.mockito.ArgumentCaptor; import org.mockito.Mockito; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_INDEX_UUID; @@ -104,6 +107,7 @@ import static org.hamcrest.Matchers.nullValue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.Mockito.doCallRealMethod; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -763,6 +767,43 @@ public void testRemotePersistedState() throws IOException { assertThat(remotePersistedState.getLastAcceptedState().metadata().clusterUUIDCommitted(), equalTo(true)); } + public void testRemotePersistedStateNotCommitted() throws IOException { + final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); + final String previousClusterUUID = "prev-cluster-uuid"; + final ClusterMetadataManifest manifest = ClusterMetadataManifest.builder() + .previousClusterUUID(previousClusterUUID) + .clusterTerm(1L) + .stateVersion(5L) + .build(); + Mockito.when(remoteClusterStateService.getLatestClusterMetadataManifest(Mockito.any(), Mockito.any())) + .thenReturn(Optional.of(manifest)); + Mockito.when(remoteClusterStateService.writeFullMetadata(Mockito.any(), Mockito.any())).thenReturn(manifest); + + Mockito.when(remoteClusterStateService.writeIncrementalMetadata(Mockito.any(), Mockito.any(), Mockito.any())).thenReturn(manifest); + CoordinationState.PersistedState remotePersistedState = new RemotePersistedState( + remoteClusterStateService, + ClusterState.UNKNOWN_UUID + ); + + assertThat(remotePersistedState.getLastAcceptedState(), nullValue()); + assertThat(remotePersistedState.getCurrentTerm(), equalTo(0L)); + + final long clusterTerm = randomNonNegativeLong(); + ClusterState clusterState = createClusterState( + randomNonNegativeLong(), + Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build() + ); + clusterState = ClusterState.builder(clusterState) + .metadata(Metadata.builder(clusterState.getMetadata()).clusterUUID(randomAlphaOfLength(10)).clusterUUIDCommitted(false).build()) + .build(); + + remotePersistedState.setLastAcceptedState(clusterState); + ArgumentCaptor previousClusterUUIDCaptor = ArgumentCaptor.forClass(String.class); + ArgumentCaptor clusterStateCaptor = ArgumentCaptor.forClass(ClusterState.class); + Mockito.verify(remoteClusterStateService).writeFullMetadata(clusterStateCaptor.capture(), previousClusterUUIDCaptor.capture()); + assertEquals(previousClusterUUID, previousClusterUUIDCaptor.getValue()); + } + public void testRemotePersistedStateExceptionOnFullStateUpload() throws IOException { final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); final String previousClusterUUID = "prev-cluster-uuid"; @@ -779,6 +820,26 @@ public void testRemotePersistedStateExceptionOnFullStateUpload() throws IOExcept assertThrows(OpenSearchException.class, () -> remotePersistedState.setLastAcceptedState(clusterState)); } + public void testRemotePersistedStateFailureStats() throws IOException { + RemotePersistenceStats remoteStateStats = new RemotePersistenceStats(); + final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); + final String previousClusterUUID = "prev-cluster-uuid"; + Mockito.doThrow(IOException.class).when(remoteClusterStateService).writeFullMetadata(Mockito.any(), Mockito.any()); + when(remoteClusterStateService.getStats()).thenReturn(remoteStateStats); + doCallRealMethod().when(remoteClusterStateService).writeMetadataFailed(); + CoordinationState.PersistedState remotePersistedState = new RemotePersistedState(remoteClusterStateService, previousClusterUUID); + + final long clusterTerm = randomNonNegativeLong(); + final ClusterState clusterState = createClusterState( + randomNonNegativeLong(), + Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build() + ); + + assertThrows(OpenSearchException.class, () -> remotePersistedState.setLastAcceptedState(clusterState)); + assertEquals(1, remoteClusterStateService.getStats().getFailedCount()); + assertEquals(0, remoteClusterStateService.getStats().getSuccessCount()); + } + public void testGatewayForRemoteState() throws IOException { MockGatewayMetaState gateway = null; try { diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 4be5fc03c2a6d..65477051cdb30 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -68,6 +68,7 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Stream; @@ -230,10 +231,17 @@ public void testWriteFullMetadataInParallelSuccess() throws IOException { ArgumentCaptor> actionListenerArgumentCaptor = ArgumentCaptor.forClass(ActionListener.class); ArgumentCaptor writeContextArgumentCaptor = ArgumentCaptor.forClass(WriteContext.class); - + AtomicReference capturedWriteContext = new AtomicReference<>(); doAnswer((i) -> { actionListenerArgumentCaptor.getValue().onResponse(null); return null; + }).doAnswer((i) -> { + actionListenerArgumentCaptor.getValue().onResponse(null); + capturedWriteContext.set(writeContextArgumentCaptor.getValue()); + return null; + }).doAnswer((i) -> { + actionListenerArgumentCaptor.getValue().onResponse(null); + return null; }).when(container).asyncBlobUpload(writeContextArgumentCaptor.capture(), actionListenerArgumentCaptor.capture()); remoteClusterStateService.start(); @@ -262,27 +270,30 @@ public void testWriteFullMetadataInParallelSuccess() throws IOException { assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); assertThat(manifest.getPreviousClusterUUID(), is(expectedManifest.getPreviousClusterUUID())); - assertEquals(actionListenerArgumentCaptor.getAllValues().size(), 2); - assertEquals(writeContextArgumentCaptor.getAllValues().size(), 2); + assertEquals(actionListenerArgumentCaptor.getAllValues().size(), 3); + assertEquals(writeContextArgumentCaptor.getAllValues().size(), 3); - WriteContext capturedWriteContext = writeContextArgumentCaptor.getValue(); - byte[] writtenBytes = capturedWriteContext.getStreamProvider(Integer.MAX_VALUE).provideStream(0).getInputStream().readAllBytes(); + byte[] writtenBytes = capturedWriteContext.get() + .getStreamProvider(Integer.MAX_VALUE) + .provideStream(0) + .getInputStream() + .readAllBytes(); IndexMetadata writtenIndexMetadata = RemoteClusterStateService.INDEX_METADATA_FORMAT.deserialize( - capturedWriteContext.getFileName(), + capturedWriteContext.get().getFileName(), blobStoreRepository.getNamedXContentRegistry(), new BytesArray(writtenBytes) ); - assertEquals(capturedWriteContext.getWritePriority(), WritePriority.HIGH); + assertEquals(capturedWriteContext.get().getWritePriority(), WritePriority.URGENT); assertEquals(writtenIndexMetadata.getNumberOfShards(), 1); assertEquals(writtenIndexMetadata.getNumberOfReplicas(), 0); assertEquals(writtenIndexMetadata.getIndex().getName(), "test-index"); assertEquals(writtenIndexMetadata.getIndex().getUUID(), "index-uuid"); long expectedChecksum = RemoteTransferContainer.checksumOfChecksum(new ByteArrayIndexInput("metadata-filename", writtenBytes), 8); - if (capturedWriteContext.doRemoteDataIntegrityCheck()) { - assertEquals(capturedWriteContext.getExpectedChecksum().longValue(), expectedChecksum); + if (capturedWriteContext.get().doRemoteDataIntegrityCheck()) { + assertEquals(capturedWriteContext.get().getExpectedChecksum().longValue(), expectedChecksum); } else { - assertEquals(capturedWriteContext.getExpectedChecksum(), null); + assertEquals(capturedWriteContext.get().getExpectedChecksum(), null); } } @@ -294,17 +305,56 @@ public void testWriteFullMetadataFailureForGlobalMetadata() throws IOException { ArgumentCaptor> actionListenerArgumentCaptor = ArgumentCaptor.forClass(ActionListener.class); doAnswer((i) -> { - actionListenerArgumentCaptor.getValue().onFailure(new RuntimeException("Cannot upload to remote")); + // For async write action listener will be called from different thread, replicating same behaviour here. + new Thread(new Runnable() { + @Override + public void run() { + actionListenerArgumentCaptor.getValue().onFailure(new RuntimeException("Cannot upload to remote")); + } + }).start(); return null; }).when(container).asyncBlobUpload(any(WriteContext.class), actionListenerArgumentCaptor.capture()); remoteClusterStateService.start(); assertThrows( - RemoteClusterStateService.GlobalMetadataTransferException.class, + RemoteClusterStateService.RemoteStateTransferException.class, () -> remoteClusterStateService.writeFullMetadata(clusterState, randomAlphaOfLength(10)) ); } + public void testTimeoutWhileWritingManifestFile() throws IOException { + // verify update metadata manifest upload timeout + int metadataManifestUploadTimeout = 2; + Settings newSettings = Settings.builder() + .put("cluster.remote_store.state.metadata_manifest.upload_timeout", metadataManifestUploadTimeout + "s") + .build(); + clusterSettings.applySettings(newSettings); + + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + AsyncMultiStreamBlobContainer container = (AsyncMultiStreamBlobContainer) mockBlobStoreObjects(AsyncMultiStreamBlobContainer.class); + + ArgumentCaptor> actionListenerArgumentCaptor = ArgumentCaptor.forClass(ActionListener.class); + + doAnswer((i) -> { // For Global Metadata + actionListenerArgumentCaptor.getValue().onResponse(null); + return null; + }).doAnswer((i) -> { // For Index Metadata + actionListenerArgumentCaptor.getValue().onResponse(null); + return null; + }).doAnswer((i) -> { + // For Manifest file perform No Op, so latch in code will timeout + return null; + }).when(container).asyncBlobUpload(any(WriteContext.class), actionListenerArgumentCaptor.capture()); + + remoteClusterStateService.start(); + try { + remoteClusterStateService.writeFullMetadata(clusterState, randomAlphaOfLength(10)); + } catch (Exception e) { + assertTrue(e instanceof RemoteClusterStateService.RemoteStateTransferException); + assertTrue(e.getMessage().contains("Timed out waiting for transfer of manifest file to complete")); + } + } + public void testWriteFullMetadataInParallelFailureForIndexMetadata() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); AsyncMultiStreamBlobContainer container = (AsyncMultiStreamBlobContainer) mockBlobStoreObjects(AsyncMultiStreamBlobContainer.class); @@ -321,9 +371,10 @@ public void testWriteFullMetadataInParallelFailureForIndexMetadata() throws IOEx remoteClusterStateService.start(); assertThrows( - RemoteClusterStateService.IndexMetadataTransferException.class, + RemoteClusterStateService.RemoteStateTransferException.class, () -> remoteClusterStateService.writeFullMetadata(clusterState, randomAlphaOfLength(10)) ); + assertEquals(0, remoteClusterStateService.getStats().getSuccessCount()); } public void testFailWriteIncrementalMetadataNonClusterManagerNode() throws IOException { @@ -331,6 +382,7 @@ public void testFailWriteIncrementalMetadataNonClusterManagerNode() throws IOExc remoteClusterStateService.start(); final ClusterMetadataManifest manifest = remoteClusterStateService.writeIncrementalMetadata(clusterState, clusterState, null); Assert.assertThat(manifest, nullValue()); + assertEquals(0, remoteClusterStateService.getStats().getSuccessCount()); } public void testFailWriteIncrementalMetadataWhenTermChanged() { @@ -657,7 +709,8 @@ public void testReadLatestMetadataManifestSuccessButNoIndexMetadata() throws IOE remoteClusterStateService.start(); assertEquals( - remoteClusterStateService.getLatestMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID()) + remoteClusterStateService.getLatestClusterState(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID()) + .getMetadata() .getIndices() .size(), 0 @@ -686,8 +739,10 @@ public void testReadLatestMetadataManifestSuccessButIndexMetadataFetchIOExceptio remoteClusterStateService.start(); Exception e = assertThrows( IllegalStateException.class, - () -> remoteClusterStateService.getLatestMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID()) - .getIndices() + () -> remoteClusterStateService.getLatestClusterState( + clusterState.getClusterName().value(), + clusterState.metadata().clusterUUID() + ).getMetadata().getIndices() ); assertEquals(e.getMessage(), "Error while downloading IndexMetadata - " + uploadedIndexMetadata.getUploadedFilename()); } @@ -732,10 +787,11 @@ public void testReadGlobalMetadata() throws IOException { final ClusterState clusterState = generateClusterStateWithGlobalMetadata().nodes(nodesWithLocalNodeClusterManager()).build(); remoteClusterStateService.start(); + long prevClusterStateVersion = 13L; final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() .indices(List.of()) .clusterTerm(1L) - .stateVersion(1L) + .stateVersion(prevClusterStateVersion) .stateUUID("state-uuid") .clusterUUID("cluster-uuid") .codecVersion(MANIFEST_CURRENT_CODEC_VERSION) @@ -748,12 +804,20 @@ public void testReadGlobalMetadata() throws IOException { Metadata expactedMetadata = Metadata.builder().persistentSettings(Settings.builder().put("readonly", true).build()).build(); mockBlobContainerForGlobalMetadata(mockBlobStoreObjects(), expectedManifest, expactedMetadata); - Metadata metadata = remoteClusterStateService.getLatestMetadata( + ClusterState newClusterState = remoteClusterStateService.getLatestClusterState( clusterState.getClusterName().value(), clusterState.metadata().clusterUUID() ); - assertTrue(Metadata.isGlobalStateEquals(metadata, expactedMetadata)); + assertTrue(Metadata.isGlobalStateEquals(newClusterState.getMetadata(), expactedMetadata)); + + long newClusterStateVersion = newClusterState.getVersion(); + assert prevClusterStateVersion == newClusterStateVersion : String.format( + Locale.ROOT, + "ClusterState version is not restored. previousClusterVersion: [%s] is not equal to current [%s]", + prevClusterStateVersion, + newClusterStateVersion + ); } public void testReadGlobalMetadataIOException() throws IOException { @@ -785,7 +849,10 @@ public void testReadGlobalMetadataIOException() throws IOException { remoteClusterStateService.start(); Exception e = assertThrows( IllegalStateException.class, - () -> remoteClusterStateService.getLatestMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID()) + () -> remoteClusterStateService.getLatestClusterState( + clusterState.getClusterName().value(), + clusterState.metadata().clusterUUID() + ) ); assertEquals(e.getMessage(), "Error while downloading Global Metadata - " + globalIndexMetadataName); } @@ -816,16 +883,15 @@ public void testReadLatestIndexMetadataSuccess() throws IOException { .nodeId("nodeA") .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) .previousClusterUUID("prev-cluster-uuid") - .globalMetadataFileName("global-metadata-file") .codecVersion(ClusterMetadataManifest.CODEC_V0) .build(); mockBlobContainer(mockBlobStoreObjects(), expectedManifest, Map.of(index.getUUID(), indexMetadata)); - Map indexMetadataMap = remoteClusterStateService.getLatestMetadata( + Map indexMetadataMap = remoteClusterStateService.getLatestClusterState( clusterState.getClusterName().value(), clusterState.metadata().clusterUUID() - ).getIndices(); + ).getMetadata().getIndices(); assertEquals(indexMetadataMap.size(), 1); assertEquals(indexMetadataMap.get(index.getName()).getIndex().getName(), index.getName()); @@ -903,7 +969,7 @@ public void testGetValidPreviousClusterUUIDWithMultipleChains() throws IOExcepti "cluster-uuid3", "cluster-uuid1" ); - mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, randomBoolean()); + mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, randomBoolean(), Collections.emptyMap()); remoteClusterStateService.start(); String previousClusterUUID = remoteClusterStateService.getLastKnownUUIDFromRemote("test-cluster"); @@ -925,6 +991,23 @@ public void testGetValidPreviousClusterUUIDWithInvalidMultipleChains() throws IO assertThrows(IllegalStateException.class, () -> remoteClusterStateService.getLastKnownUUIDFromRemote("test-cluster")); } + public void testGetValidPreviousClusterUUIDWhenLastUUIDUncommitted() throws IOException { + Map clusterUUIDsPointers = Map.of( + "cluster-uuid1", + ClusterState.UNKNOWN_UUID, + "cluster-uuid2", + "cluster-uuid1", + "cluster-uuid3", + "cluster-uuid2" + ); + Map clusterUUIDCommitted = Map.of("cluster-uuid1", true, "cluster-uuid2", true, "cluster-uuid3", false); + mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, clusterUUIDCommitted); + + remoteClusterStateService.start(); + String previousClusterUUID = remoteClusterStateService.getLastKnownUUIDFromRemote("test-cluster"); + assertThat(previousClusterUUID, equalTo("cluster-uuid2")); + } + public void testDeleteStaleClusterUUIDs() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); ClusterMetadataManifest clusterMetadataManifest = ClusterMetadataManifest.builder() @@ -991,6 +1074,38 @@ public void testDeleteStaleClusterUUIDs() throws IOException { } } + public void testRemoteStateStats() throws IOException { + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + mockBlobStoreObjects(); + remoteClusterStateService.start(); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid"); + + assertTrue(remoteClusterStateService.getStats() != null); + assertEquals(1, remoteClusterStateService.getStats().getSuccessCount()); + assertEquals(0, remoteClusterStateService.getStats().getCleanupAttemptFailedCount()); + assertEquals(0, remoteClusterStateService.getStats().getFailedCount()); + } + + public void testRemoteStateCleanupFailureStats() throws IOException { + BlobContainer blobContainer = mock(BlobContainer.class); + doThrow(IOException.class).when(blobContainer).delete(); + when(blobStore.blobContainer(any())).thenReturn(blobContainer); + BlobPath blobPath = new BlobPath().add("random-path"); + when((blobStoreRepository.basePath())).thenReturn(blobPath); + remoteClusterStateService.start(); + remoteClusterStateService.deleteStaleUUIDsClusterMetadata("cluster1", Arrays.asList("cluster-uuid1")); + try { + assertBusy(() -> { + // wait for stats to get updated + assertTrue(remoteClusterStateService.getStats() != null); + assertEquals(0, remoteClusterStateService.getStats().getSuccessCount()); + assertEquals(1, remoteClusterStateService.getStats().getCleanupAttemptFailedCount()); + }); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + public void testFileNames() { final Index index = new Index("test-index", "index-uuid"); final Settings idxSettings = Settings.builder() @@ -1071,6 +1186,22 @@ public void testIndexMetadataUploadWaitTimeSetting() { assertEquals(indexMetadataUploadTimeout, remoteClusterStateService.getIndexMetadataUploadTimeout().seconds()); } + public void testMetadataManifestUploadWaitTimeSetting() { + // verify default value + assertEquals( + RemoteClusterStateService.METADATA_MANIFEST_UPLOAD_TIMEOUT_DEFAULT, + remoteClusterStateService.getMetadataManifestUploadTimeout() + ); + + // verify update metadata manifest upload timeout + int metadataManifestUploadTimeout = randomIntBetween(1, 10); + Settings newSettings = Settings.builder() + .put("cluster.remote_store.state.metadata_manifest.upload_timeout", metadataManifestUploadTimeout + "s") + .build(); + clusterSettings.applySettings(newSettings); + assertEquals(metadataManifestUploadTimeout, remoteClusterStateService.getMetadataManifestUploadTimeout().seconds()); + } + public void testGlobalMetadataUploadWaitTimeSetting() { // verify default value assertEquals( @@ -1088,11 +1219,21 @@ public void testGlobalMetadataUploadWaitTimeSetting() { } private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers) throws IOException { - mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false); + mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false, Collections.emptyMap()); } - private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers, boolean differGlobalMetadata) - throws IOException { + private void mockObjectsForGettingPreviousClusterUUID( + Map clusterUUIDsPointers, + Map clusterUUIDCommitted + ) throws IOException { + mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false, clusterUUIDCommitted); + } + + private void mockObjectsForGettingPreviousClusterUUID( + Map clusterUUIDsPointers, + boolean differGlobalMetadata, + Map clusterUUIDCommitted + ) throws IOException { final BlobPath blobPath = mock(BlobPath.class); when((blobStoreRepository.basePath())).thenReturn(blobPath); when(blobPath.add(anyString())).thenReturn(blobPath); @@ -1115,7 +1256,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste clusterUUIDsPointers.get("cluster-uuid1"), randomAlphaOfLength(10), uploadedIndexMetadataList1, - "test-metadata1" + "test-metadata1", + clusterUUIDCommitted.getOrDefault("cluster-uuid1", true) ); Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build(); IndexMetadata indexMetadata1 = IndexMetadata.builder("index1") @@ -1144,7 +1286,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste clusterUUIDsPointers.get("cluster-uuid2"), randomAlphaOfLength(10), uploadedIndexMetadataList2, - "test-metadata2" + "test-metadata2", + clusterUUIDCommitted.getOrDefault("cluster-uuid2", true) ); IndexMetadata indexMetadata3 = IndexMetadata.builder("index1") .settings(indexSettings) @@ -1189,7 +1332,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste clusterUUIDsPointers.get("cluster-uuid3"), randomAlphaOfLength(10), uploadedIndexMetadataList3, - "test-metadata3" + "test-metadata3", + clusterUUIDCommitted.getOrDefault("cluster-uuid3", true) ); mockBlobContainerForGlobalMetadata(blobContainer3, clusterManifest3, metadata3); mockBlobContainer(blobContainer3, clusterManifest3, indexMetadataMap3, ClusterMetadataManifest.CODEC_V1); @@ -1217,7 +1361,8 @@ private ClusterMetadataManifest generateClusterMetadataManifest( String previousClusterUUID, String stateUUID, List uploadedIndexMetadata, - String globalMetadataFileName + String globalMetadataFileName, + Boolean isUUIDCommitted ) { return ClusterMetadataManifest.builder() .indices(uploadedIndexMetadata) @@ -1229,7 +1374,7 @@ private ClusterMetadataManifest generateClusterMetadataManifest( .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) .previousClusterUUID(previousClusterUUID) .committed(true) - .clusterUUIDCommitted(true) + .clusterUUIDCommitted(isUUIDCommitted) .globalMetadataFileName(globalMetadataFileName) .codecVersion(ClusterMetadataManifest.CODEC_V1) .build(); diff --git a/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java b/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java index 305c3a3acbf75..81d8bccb86c60 100644 --- a/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java +++ b/server/src/test/java/org/opensearch/index/engine/InternalEngineTests.java @@ -40,7 +40,6 @@ import org.apache.logging.log4j.core.LogEvent; import org.apache.logging.log4j.core.appender.AbstractAppender; import org.apache.logging.log4j.core.filter.RegexFilter; -import org.apache.lucene.codecs.LiveDocsFormat; import org.apache.lucene.document.Field; import org.apache.lucene.document.KeywordField; import org.apache.lucene.document.LongPoint; @@ -3237,22 +3236,10 @@ public void testUnreferencedFileCleanUpOnSegmentMergeFailureWithCleanUpEnabled() MockDirectoryWrapper wrapper = newMockDirectory(); final CountDownLatch cleanupCompleted = new CountDownLatch(1); MockDirectoryWrapper.Failure fail = new MockDirectoryWrapper.Failure() { - public boolean didFail1; - public boolean didFail2; - @Override public void eval(MockDirectoryWrapper dir) throws IOException { - if (!doFail) { - return; - } - - // Fail segment merge with diskfull during merging terms. - if (callStackContainsAnyOf("mergeTerms") && !didFail1) { - didFail1 = true; - throw new IOException("No space left on device"); - } - if (callStackContains(LiveDocsFormat.class, "writeLiveDocs") && !didFail2) { - didFail2 = true; + // Fail segment merge with diskfull during merging terms + if (callStackContainsAnyOf("mergeTerms")) { throw new IOException("No space left on device"); } } @@ -3325,7 +3312,6 @@ public void onFailedEngine(String reason, Exception e) { segments = engine.segments(false); assertThat(segments.size(), equalTo(2)); - fail.setDoFail(); // IndexWriter can throw either IOException or IllegalStateException depending on whether tragedy is set or not. expectThrowsAnyOf( Arrays.asList(IOException.class, IllegalStateException.class), @@ -3345,20 +3331,10 @@ public void testUnreferencedFileCleanUpOnSegmentMergeFailureWithCleanUpDisabled( MockDirectoryWrapper wrapper = newMockDirectory(); final CountDownLatch cleanupCompleted = new CountDownLatch(1); MockDirectoryWrapper.Failure fail = new MockDirectoryWrapper.Failure() { - public boolean didFail1; - public boolean didFail2; @Override public void eval(MockDirectoryWrapper dir) throws IOException { - if (!doFail) { - return; - } - if (callStackContainsAnyOf("mergeTerms") && !didFail1) { - didFail1 = true; - throw new IOException("No space left on device"); - } - if (callStackContains(LiveDocsFormat.class, "writeLiveDocs") && !didFail2) { - didFail2 = true; + if (callStackContainsAnyOf("mergeTerms")) { throw new IOException("No space left on device"); } } @@ -3439,7 +3415,6 @@ public void onFailedEngine(String reason, Exception e) { segments = engine.segments(false); assertThat(segments.size(), equalTo(2)); - fail.setDoFail(); // IndexWriter can throw either IOException or IllegalStateException depending on whether tragedy is set or not. expectThrowsAnyOf( Arrays.asList(IOException.class, IllegalStateException.class), @@ -3459,20 +3434,10 @@ public void testUnreferencedFileCleanUpFailsOnSegmentMergeFailureWhenDirectoryCl MockDirectoryWrapper wrapper = newMockDirectory(); final CountDownLatch cleanupCompleted = new CountDownLatch(1); MockDirectoryWrapper.Failure fail = new MockDirectoryWrapper.Failure() { - public boolean didFail1; - public boolean didFail2; @Override public void eval(MockDirectoryWrapper dir) throws IOException { - if (!doFail) { - return; - } - if (callStackContainsAnyOf("mergeTerms") && !didFail1) { - didFail1 = true; - throw new IOException("No space left on device"); - } - if (callStackContains(LiveDocsFormat.class, "writeLiveDocs") && !didFail2) { - didFail2 = true; + if (callStackContainsAnyOf("mergeTerms")) { throw new IOException("No space left on device"); } } @@ -3537,7 +3502,6 @@ public void onFailedEngine(String reason, Exception e) { segments = engine.segments(false); assertThat(segments.size(), equalTo(2)); - fail.setDoFail(); // Close the store so that unreferenced file cleanup will fail. store.close(); diff --git a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java index 9ef9bec01cb38..dc2111fdcfc56 100644 --- a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java @@ -2745,6 +2745,7 @@ public void testRelocatedForRemoteTranslogBackedIndexWithAsyncDurability() throw AllocationId.newRelocation(routing.allocationId()) ); IndexShardTestCase.updateRoutingEntry(indexShard, routing); + indexDoc(indexShard, "_doc", "0"); assertTrue(indexShard.isSyncNeeded()); try { indexShard.relocated(routing.getTargetRelocatingShard().allocationId().getId(), primaryContext -> {}, () -> {}); @@ -2849,6 +2850,7 @@ public void testSyncSegmentsFromGivenRemoteSegmentStore() throws IOException { indexDoc(source, "_doc", "1"); indexDoc(source, "_doc", "2"); source.refresh("test"); + assertTrue("At lease one remote sync should have been completed", source.isRemoteSegmentStoreInSync()); assertDocs(source, "1", "2"); indexDoc(source, "_doc", "3"); source.refresh("test"); diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardCorruptionTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardCorruptionTests.java new file mode 100644 index 0000000000000..21bf580712761 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardCorruptionTests.java @@ -0,0 +1,75 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.shard; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.opensearch.core.util.FileSystemUtils; +import org.opensearch.test.CorruptionUtils; + +import java.io.IOException; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.stream.Stream; + +@LuceneTestCase.SuppressFileSystems("WindowsFS") +public class RemoteIndexShardCorruptionTests extends IndexShardTestCase { + + public void testLocalDirectoryContains() throws IOException { + IndexShard indexShard = newStartedShard(true); + int numDocs = between(1, 10); + for (int i = 0; i < numDocs; i++) { + indexDoc(indexShard, "_doc", Integer.toString(i)); + } + flushShard(indexShard); + indexShard.store().incRef(); + Directory localDirectory = indexShard.store().directory(); + Path shardPath = indexShard.shardPath().getDataPath().resolve(ShardPath.INDEX_FOLDER_NAME); + Path tempDir = createTempDir(); + for (String file : localDirectory.listAll()) { + if (file.equals("write.lock") || file.startsWith("extra")) { + continue; + } + boolean corrupted = randomBoolean(); + long checksum = 0; + try (IndexInput indexInput = localDirectory.openInput(file, IOContext.DEFAULT)) { + checksum = CodecUtil.retrieveChecksum(indexInput); + } + if (corrupted) { + Files.copy(shardPath.resolve(file), tempDir.resolve(file)); + try (FileChannel raf = FileChannel.open(shardPath.resolve(file), StandardOpenOption.READ, StandardOpenOption.WRITE)) { + CorruptionUtils.corruptAt(shardPath.resolve(file), raf, (int) (raf.size() - 8)); + } + } + if (corrupted == false) { + assertTrue(indexShard.localDirectoryContains(localDirectory, file, checksum)); + } else { + assertFalse(indexShard.localDirectoryContains(localDirectory, file, checksum)); + assertFalse(Files.exists(shardPath.resolve(file))); + } + } + try (Stream files = Files.list(tempDir)) { + files.forEach(p -> { + try { + Files.copy(p, shardPath.resolve(p.getFileName())); + } catch (IOException e) { + // Ignore + } + }); + } + FileSystemUtils.deleteSubDirectories(tempDir); + indexShard.store().decRef(); + closeShards(indexShard); + } +} diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java index 703a7d457d5b6..20cec90d79e3e 100644 --- a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java @@ -31,18 +31,20 @@ import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; import org.opensearch.indices.replication.common.ReplicationFailedException; import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.test.CorruptionUtils; import org.hamcrest.MatcherAssert; import org.junit.Assert; import java.io.IOException; +import java.nio.channels.FileChannel; import java.nio.file.Path; +import java.nio.file.StandardOpenOption; import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.CountDownLatch; -import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; import java.util.stream.Collectors; @@ -360,6 +362,7 @@ public void testPrimaryRestart() throws Exception { * prevent FileAlreadyExistsException. It does so by only copying files in first round of segment replication without * committing locally so that in next round of segment replication those files are not considered for download again */ + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/10885") public void testSegRepSucceedsOnPreviousCopiedFiles() throws Exception { try (ReplicationGroup shards = createGroup(1, getIndexSettings(), new NRTReplicationEngineFactory())) { shards.startAll(); @@ -371,37 +374,9 @@ public void testSegRepSucceedsOnPreviousCopiedFiles() throws Exception { final SegmentReplicationSourceFactory sourceFactory = mock(SegmentReplicationSourceFactory.class); final SegmentReplicationTargetService targetService = newTargetService(sourceFactory); - Runnable[] runAfterGetFiles = { () -> { throw new RuntimeException("Simulated"); }, () -> {} }; - AtomicInteger index = new AtomicInteger(0); - RemoteStoreReplicationSource testRSReplicationSource = new RemoteStoreReplicationSource(replica) { - @Override - public void getCheckpointMetadata( - long replicationId, - ReplicationCheckpoint checkpoint, - ActionListener listener - ) { - super.getCheckpointMetadata(replicationId, checkpoint, listener); - } - - @Override - public void getSegmentFiles( - long replicationId, - ReplicationCheckpoint checkpoint, - List filesToFetch, - IndexShard indexShard, - BiConsumer fileProgressTracker, - ActionListener listener - ) { - super.getSegmentFiles(replicationId, checkpoint, filesToFetch, indexShard, (fileName, bytesRecovered) -> {}, listener); - runAfterGetFiles[index.getAndIncrement()].run(); - } - - @Override - public String getDescription() { - return "TestRemoteStoreReplicationSource"; - } - }; - when(sourceFactory.get(any())).thenReturn(testRSReplicationSource); + when(sourceFactory.get(any())).thenReturn( + getRemoteStoreReplicationSource(replica, () -> { throw new RuntimeException("Simulated"); }) + ); CountDownLatch latch = new CountDownLatch(1); // Start first round of segment replication. This should fail with simulated error but with replica having @@ -412,6 +387,7 @@ public String getDescription() { new SegmentReplicationTargetService.SegmentReplicationListener() { @Override public void onReplicationDone(SegmentReplicationState state) { + latch.countDown(); Assert.fail("Replication should fail with simulated error"); } @@ -421,9 +397,9 @@ public void onReplicationFailure( ReplicationFailedException e, boolean sendShardFailure ) { + latch.countDown(); assertFalse(sendShardFailure); logger.error("Replication error", e); - latch.countDown(); } } ); @@ -439,7 +415,8 @@ public void onReplicationFailure( assertEquals("Files should be copied to disk", false, onDiskFiles.isEmpty()); assertEquals(target.state().getStage(), SegmentReplicationState.Stage.GET_FILES); - // Start next round of segment replication + // Start next round of segment replication and not throwing exception resulting in commit on replica + when(sourceFactory.get(any())).thenReturn(getRemoteStoreReplicationSource(replica, () -> {})); CountDownLatch waitForSecondRound = new CountDownLatch(1); final SegmentReplicationTarget newTarget = targetService.startReplication( replica, @@ -456,9 +433,9 @@ public void onReplicationFailure( ReplicationFailedException e, boolean sendShardFailure ) { + waitForSecondRound.countDown(); logger.error("Replication error", e); Assert.fail("Replication should not fail"); - waitForSecondRound.countDown(); } } ); @@ -471,6 +448,120 @@ public void onReplicationFailure( } } + /** + * This test validates that local non-readable (corrupt, partially) on disk are deleted vs failing the + * replication event. This test mimics local files (not referenced by reader) by throwing exception post file copy and + * blocking update of reader. Once this is done, it corrupts one segment file and ensure that file is deleted in next + * round of segment replication by ensuring doc count. + */ + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/10885") + public void testNoFailuresOnFileReads() throws Exception { + try (ReplicationGroup shards = createGroup(1, getIndexSettings(), new NRTReplicationEngineFactory())) { + shards.startAll(); + IndexShard primary = shards.getPrimary(); + final IndexShard replica = shards.getReplicas().get(0); + + final int docCount = 10; + shards.indexDocs(docCount); + primary.refresh("Test"); + + final SegmentReplicationSourceFactory sourceFactory = mock(SegmentReplicationSourceFactory.class); + final SegmentReplicationTargetService targetService = newTargetService(sourceFactory); + when(sourceFactory.get(any())).thenReturn( + getRemoteStoreReplicationSource(replica, () -> { throw new RuntimeException("Simulated"); }) + ); + CountDownLatch waitOnReplicationCompletion = new CountDownLatch(1); + + // Start first round of segment replication. This should fail with simulated error but with replica having + // files in its local store but not in active reader. + SegmentReplicationTarget segmentReplicationTarget = targetService.startReplication( + replica, + primary.getLatestReplicationCheckpoint(), + new SegmentReplicationTargetService.SegmentReplicationListener() { + @Override + public void onReplicationDone(SegmentReplicationState state) { + waitOnReplicationCompletion.countDown(); + Assert.fail("Replication should fail with simulated error"); + } + + @Override + public void onReplicationFailure( + SegmentReplicationState state, + ReplicationFailedException e, + boolean sendShardFailure + ) { + waitOnReplicationCompletion.countDown(); + assertFalse(sendShardFailure); + } + } + ); + waitOnReplicationCompletion.await(); + assertBusy(() -> { assertEquals("Target should be closed", 0, segmentReplicationTarget.refCount()); }); + String fileToCorrupt = null; + // Corrupt one data file + Path shardPath = replica.shardPath().getDataPath().resolve(ShardPath.INDEX_FOLDER_NAME); + for (String file : replica.store().directory().listAll()) { + if (file.equals("write.lock") || file.startsWith("extra") || file.startsWith("segment")) { + continue; + } + fileToCorrupt = file; + logger.info("--> Corrupting file {}", fileToCorrupt); + try (FileChannel raf = FileChannel.open(shardPath.resolve(file), StandardOpenOption.READ, StandardOpenOption.WRITE)) { + CorruptionUtils.corruptAt(shardPath.resolve(file), raf, (int) (raf.size() - 8)); + } + break; + } + Assert.assertNotNull(fileToCorrupt); + + // Ingest more data and start next round of segment replication + shards.indexDocs(docCount); + primary.refresh("Post corruption"); + replicateSegments(primary, List.of(replica)); + + assertDocCount(primary, 2 * docCount); + assertDocCount(replica, 2 * docCount); + + final Store.RecoveryDiff diff = Store.segmentReplicationDiff(primary.getSegmentMetadataMap(), replica.getSegmentMetadataMap()); + assertTrue(diff.missing.isEmpty()); + assertTrue(diff.different.isEmpty()); + + // clean up + shards.removeReplica(replica); + closeShards(replica); + } + } + + private RemoteStoreReplicationSource getRemoteStoreReplicationSource(IndexShard shard, Runnable postGetFilesRunnable) { + return new RemoteStoreReplicationSource(shard) { + @Override + public void getCheckpointMetadata( + long replicationId, + ReplicationCheckpoint checkpoint, + ActionListener listener + ) { + super.getCheckpointMetadata(replicationId, checkpoint, listener); + } + + @Override + public void getSegmentFiles( + long replicationId, + ReplicationCheckpoint checkpoint, + List filesToFetch, + IndexShard indexShard, + BiConsumer fileProgressTracker, + ActionListener listener + ) { + super.getSegmentFiles(replicationId, checkpoint, filesToFetch, indexShard, (fileName, bytesRecovered) -> {}, listener); + postGetFilesRunnable.run(); + } + + @Override + public String getDescription() { + return "TestRemoteStoreReplicationSource"; + } + }; + } + @Override protected void validateShardIdleWithNoReplicas(IndexShard primary) { // ensure search idle conditions are met. diff --git a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java index af596e7df02c2..e34bc078896f9 100644 --- a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java +++ b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java @@ -10,6 +10,7 @@ import org.apache.lucene.tests.util.LuceneTestCase; import org.opensearch.action.LatchedActionListener; +import org.opensearch.common.SetOnce; import org.opensearch.common.blobstore.BlobContainer; import org.opensearch.common.blobstore.BlobMetadata; import org.opensearch.common.blobstore.BlobPath; @@ -35,6 +36,7 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.LinkedList; @@ -180,6 +182,93 @@ public void onUploadFailed(TransferSnapshot transferSnapshot, Exception ex) { assertEquals(4, fileTransferTracker.allUploaded().size()); } + public void testTransferSnapshotOnUploadTimeout() throws Exception { + doAnswer(invocationOnMock -> { + Thread.sleep(31 * 1000); + return null; + }).when(transferService).uploadBlobs(anySet(), anyMap(), any(ActionListener.class), any(WritePriority.class)); + FileTransferTracker fileTransferTracker = new FileTransferTracker( + new ShardId("index", "indexUUid", 0), + remoteTranslogTransferTracker + ); + TranslogTransferManager translogTransferManager = new TranslogTransferManager( + shardId, + transferService, + remoteBaseTransferPath, + fileTransferTracker, + remoteTranslogTransferTracker + ); + SetOnce exception = new SetOnce<>(); + translogTransferManager.transferSnapshot(createTransferSnapshot(), new TranslogTransferListener() { + @Override + public void onUploadComplete(TransferSnapshot transferSnapshot) {} + + @Override + public void onUploadFailed(TransferSnapshot transferSnapshot, Exception ex) { + exception.set(ex); + } + }); + assertNotNull(exception.get()); + assertTrue(exception.get() instanceof TranslogUploadFailedException); + assertEquals("Timed out waiting for transfer of snapshot test-to-string to complete", exception.get().getMessage()); + } + + public void testTransferSnapshotOnThreadInterrupt() throws Exception { + SetOnce uploadThread = new SetOnce<>(); + doAnswer(invocationOnMock -> { + uploadThread.set(new Thread(() -> { + ActionListener listener = invocationOnMock.getArgument(2); + try { + Thread.sleep(31 * 1000); + } catch (InterruptedException ignore) { + List list = new ArrayList<>(invocationOnMock.getArgument(0)); + listener.onFailure(new FileTransferException(list.get(0), ignore)); + } + })); + uploadThread.get().start(); + return null; + }).when(transferService).uploadBlobs(anySet(), anyMap(), any(ActionListener.class), any(WritePriority.class)); + FileTransferTracker fileTransferTracker = new FileTransferTracker( + new ShardId("index", "indexUUid", 0), + remoteTranslogTransferTracker + ); + TranslogTransferManager translogTransferManager = new TranslogTransferManager( + shardId, + transferService, + remoteBaseTransferPath, + fileTransferTracker, + remoteTranslogTransferTracker + ); + SetOnce exception = new SetOnce<>(); + + Thread thread = new Thread(() -> { + try { + translogTransferManager.transferSnapshot(createTransferSnapshot(), new TranslogTransferListener() { + @Override + public void onUploadComplete(TransferSnapshot transferSnapshot) {} + + @Override + public void onUploadFailed(TransferSnapshot transferSnapshot, Exception ex) { + exception.set(ex); + } + }); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + thread.start(); + + Thread.sleep(1000); + // Interrupt the thread + thread.interrupt(); + assertBusy(() -> { + assertNotNull(exception.get()); + assertTrue(exception.get() instanceof TranslogUploadFailedException); + assertEquals("Failed to upload test-to-string", exception.get().getMessage()); + }); + uploadThread.get().interrupt(); + } + private TransferSnapshot createTransferSnapshot() { return new TransferSnapshot() { @Override @@ -232,6 +321,11 @@ public Set getTranslogFileSnapshots() { public TranslogTransferMetadata getTranslogTransferMetadata() { return new TranslogTransferMetadata(primaryTerm, generation, minTranslogGeneration, randomInt(5)); } + + @Override + public String toString() { + return "test-to-string"; + } }; } diff --git a/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java b/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java index c114b56bd0b39..c5f36fcc01983 100644 --- a/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java +++ b/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java @@ -43,6 +43,7 @@ import org.opensearch.common.blobstore.fs.FsBlobStore; import org.opensearch.common.blobstore.stream.read.ReadContext; import org.opensearch.common.blobstore.stream.write.WriteContext; +import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.common.compress.DeflateCompressor; import org.opensearch.common.io.Streams; import org.opensearch.common.io.stream.BytesStreamOutput; @@ -65,8 +66,13 @@ import java.util.Map; import java.util.concurrent.CountDownLatch; +import org.mockito.ArgumentCaptor; + import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.greaterThan; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; public class BlobStoreFormatTests extends OpenSearchTestCase { @@ -128,44 +134,36 @@ public void testBlobStoreAsyncOperations() throws IOException, InterruptedExcept BlobPath.cleanPath(), null ); + MockFsVerifyingBlobContainer spyContainer = spy(mockBlobContainer); ChecksumBlobStoreFormat checksumSMILE = new ChecksumBlobStoreFormat<>(BLOB_CODEC, "%s", BlobObj::fromXContent); - + ArgumentCaptor> actionListenerArgumentCaptor = ArgumentCaptor.forClass(ActionListener.class); + ArgumentCaptor writeContextArgumentCaptor = ArgumentCaptor.forClass(WriteContext.class); CountDownLatch latch = new CountDownLatch(2); - ActionListener actionListener = new ActionListener<>() { - @Override - public void onResponse(Void unused) { - logger.info("---> Async write succeeded"); - latch.countDown(); - } - - @Override - public void onFailure(Exception e) { - logger.info("---> Failure in async write"); - throw new RuntimeException("async write should not fail"); - } - }; - // Write blobs in different formats checksumSMILE.writeAsync( new BlobObj("checksum smile"), - mockBlobContainer, + spyContainer, "check-smile", CompressorRegistry.none(), - actionListener, + getVoidActionListener(latch), ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS ); checksumSMILE.writeAsync( new BlobObj("checksum smile compressed"), - mockBlobContainer, + spyContainer, "check-smile-comp", CompressorRegistry.getCompressor(DeflateCompressor.NAME), - actionListener, + getVoidActionListener(latch), ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS ); latch.await(); + verify(spyContainer, times(2)).asyncBlobUpload(writeContextArgumentCaptor.capture(), actionListenerArgumentCaptor.capture()); + assertEquals(2, writeContextArgumentCaptor.getAllValues().size()); + writeContextArgumentCaptor.getAllValues() + .forEach(writeContext -> assertEquals(WritePriority.NORMAL, writeContext.getWritePriority())); // Assert that all checksum blobs can be read assertEquals(checksumSMILE.read(mockBlobContainer.getDelegate(), "check-smile", xContentRegistry()).getText(), "checksum smile"); assertEquals( @@ -174,6 +172,39 @@ public void onFailure(Exception e) { ); } + public void testBlobStorePriorityAsyncOperation() throws IOException, InterruptedException { + BlobStore blobStore = createTestBlobStore(); + MockFsVerifyingBlobContainer mockBlobContainer = new MockFsVerifyingBlobContainer( + (FsBlobStore) blobStore, + BlobPath.cleanPath(), + null + ); + MockFsVerifyingBlobContainer spyContainer = spy(mockBlobContainer); + ChecksumBlobStoreFormat checksumSMILE = new ChecksumBlobStoreFormat<>(BLOB_CODEC, "%s", BlobObj::fromXContent); + + ArgumentCaptor> actionListenerArgumentCaptor = ArgumentCaptor.forClass(ActionListener.class); + ArgumentCaptor writeContextArgumentCaptor = ArgumentCaptor.forClass(WriteContext.class); + CountDownLatch latch = new CountDownLatch(1); + + // Write blobs in different formats + checksumSMILE.writeAsyncWithUrgentPriority( + new BlobObj("cluster state diff"), + spyContainer, + "cluster-state-diff", + CompressorRegistry.none(), + getVoidActionListener(latch), + ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS + ); + latch.await(); + + verify(spyContainer).asyncBlobUpload(writeContextArgumentCaptor.capture(), actionListenerArgumentCaptor.capture()); + assertEquals(WritePriority.URGENT, writeContextArgumentCaptor.getValue().getWritePriority()); + assertEquals( + checksumSMILE.read(mockBlobContainer.getDelegate(), "cluster-state-diff", xContentRegistry()).getText(), + "cluster state diff" + ); + } + public void testBlobStoreOperations() throws IOException { BlobStore blobStore = createTestBlobStore(); BlobContainer blobContainer = blobStore.blobContainer(BlobPath.cleanPath()); @@ -228,6 +259,24 @@ public void testBlobCorruption() throws IOException { } } + private ActionListener getVoidActionListener(CountDownLatch latch) { + ActionListener actionListener = new ActionListener<>() { + @Override + public void onResponse(Void unused) { + logger.info("---> Async write succeeded"); + latch.countDown(); + } + + @Override + public void onFailure(Exception e) { + logger.info("---> Failure in async write"); + throw new RuntimeException("async write should not fail"); + } + }; + + return actionListener; + } + protected BlobStore createTestBlobStore() throws IOException { return new FsBlobStore(randomIntBetween(1, 8) * 1024, createTempDir(), false); } diff --git a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java index d24cc24d28579..28d7706fb1493 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java @@ -1016,6 +1016,11 @@ public void setLastAcceptedState(ClusterState clusterState) { delegate.setLastAcceptedState(clusterState); } + @Override + public PersistedStateStats getStats() { + return null; + } + @Override public void close() { assertTrue(openPersistedStates.remove(this)); diff --git a/test/framework/src/main/java/org/opensearch/test/CorruptionUtils.java b/test/framework/src/main/java/org/opensearch/test/CorruptionUtils.java index 0dce5e78bf91f..67522bb618cf1 100644 --- a/test/framework/src/main/java/org/opensearch/test/CorruptionUtils.java +++ b/test/framework/src/main/java/org/opensearch/test/CorruptionUtils.java @@ -121,7 +121,7 @@ public static void corruptFile(Random random, Path... files) throws IOException } } - static void corruptAt(Path path, FileChannel channel, int position) throws IOException { + public static void corruptAt(Path path, FileChannel channel, int position) throws IOException { // read channel.position(position); long filePointer = channel.position(); diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java index ad27d9834f159..0c6c81103922f 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java @@ -1666,6 +1666,11 @@ public void indexRandom(boolean forceRefresh, boolean dummyDocuments, boolean ma } } assertThat(actualErrors, emptyIterable()); + + if (dummyDocuments) { + bogusIds.addAll(indexRandomForMultipleSlices(indicesArray)); + } + if (!bogusIds.isEmpty()) { // delete the bogus types again - it might trigger merges or at least holes in the segments and enforces deleted docs! for (List doc : bogusIds) { @@ -1683,6 +1688,52 @@ public void indexRandom(boolean forceRefresh, boolean dummyDocuments, boolean ma } } + /* + * This method ingests bogus documents for the given indices such that multiple slices + * are formed. This is useful for testing with the concurrent search use-case as it creates + * multiple slices based on segment count. + * @param indices the indices in which bogus documents should be ingested + * */ + protected Set> indexRandomForMultipleSlices(String... indices) throws InterruptedException { + Set> bogusIds = new HashSet<>(); + int refreshCount = randomIntBetween(2, 3); + for (String index : indices) { + int numDocs = getNumShards(index).totalNumShards * randomIntBetween(2, 10); + while (refreshCount-- > 0) { + final CopyOnWriteArrayList> errors = new CopyOnWriteArrayList<>(); + List inFlightAsyncOperations = new ArrayList<>(); + for (int i = 0; i < numDocs; i++) { + String id = "bogus_doc_" + randomRealisticUnicodeOfLength(between(1, 10)) + dummmyDocIdGenerator.incrementAndGet(); + IndexRequestBuilder indexRequestBuilder = client().prepareIndex() + .setIndex(index) + .setId(id) + .setSource("{}", MediaTypeRegistry.JSON) + .setRouting(id); + indexRequestBuilder.execute( + new PayloadLatchedActionListener<>(indexRequestBuilder, newLatch(inFlightAsyncOperations), errors) + ); + bogusIds.add(Arrays.asList(index, id)); + } + for (CountDownLatch operation : inFlightAsyncOperations) { + operation.await(); + } + final List actualErrors = new ArrayList<>(); + for (Tuple tuple : errors) { + Throwable t = ExceptionsHelper.unwrapCause(tuple.v2()); + if (t instanceof OpenSearchRejectedExecutionException) { + logger.debug("Error indexing doc: " + t.getMessage() + ", reindexing."); + tuple.v1().execute().actionGet(); // re-index if rejected + } else { + actualErrors.add(tuple.v2()); + } + } + assertThat(actualErrors, emptyIterable()); + refresh(index); + } + } + return bogusIds; + } + private final AtomicInteger dummmyDocIdGenerator = new AtomicInteger(); /** Disables an index block for the specified index */ diff --git a/test/framework/src/main/java/org/opensearch/test/ParameterizedOpenSearchIntegTestCase.java b/test/framework/src/main/java/org/opensearch/test/ParameterizedOpenSearchIntegTestCase.java index 636064d8e4f9d..f8813a8c5afa9 100644 --- a/test/framework/src/main/java/org/opensearch/test/ParameterizedOpenSearchIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/ParameterizedOpenSearchIntegTestCase.java @@ -13,6 +13,8 @@ import org.junit.After; import org.junit.Before; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; + /** * Base class for running the tests with parameterization of the dynamic settings * For any class that wants to use parameterization, use @ParametersFactory to generate @@ -44,4 +46,10 @@ public void afterTests() { dynamicSettings.keySet().forEach(settingsToUnset::putNull); client().admin().cluster().prepareUpdateSettings().setPersistentSettings(settingsToUnset).get(); } + + public void indexRandomForConcurrentSearch(String... indices) throws InterruptedException { + if (dynamicSettings.get(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey()).equals("true")) { + indexRandomForMultipleSlices(indices); + } + } }