From 52a5e3f6e0ca599e3193807134ea42660ecdd195 Mon Sep 17 00:00:00 2001 From: Ryan Bogan <10944539+ryanbogan@users.noreply.github.com> Date: Fri, 7 Jul 2023 15:36:31 -0700 Subject: [PATCH 01/29] Change extension integration testing to reflect new initialization sequence (#8145) Signed-off-by: Ryan Bogan --- .../testclusters/OpenSearchCluster.java | 4 +- .../gradle/testclusters/OpenSearchNode.java | 46 ++----------------- .../TestClusterConfiguration.java | 2 +- 3 files changed, 7 insertions(+), 45 deletions(-) diff --git a/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchCluster.java b/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchCluster.java index b61acc934d590..ffb3360e3cc55 100644 --- a/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchCluster.java +++ b/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchCluster.java @@ -206,8 +206,8 @@ public void setTestDistribution(TestDistribution distribution) { } @Override - public void extension(ExtensionsProperties extension) { - nodes.all(each -> each.extension(extension)); + public void extension(boolean extensionsEnabled) { + nodes.all(each -> each.extension(extensionsEnabled)); } @Override diff --git a/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java b/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java index 7b4b5d354a00b..97c97c18bb012 100644 --- a/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java +++ b/buildSrc/src/main/java/org/opensearch/gradle/testclusters/OpenSearchNode.java @@ -46,8 +46,6 @@ import org.opensearch.gradle.Version; import org.opensearch.gradle.VersionProperties; import org.opensearch.gradle.info.BuildParams; -import org.yaml.snakeyaml.DumperOptions; -import org.yaml.snakeyaml.Yaml; import org.gradle.api.Action; import org.gradle.api.Named; import org.gradle.api.NamedDomainObjectContainer; @@ -94,7 +92,6 @@ import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.Scanner; import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; @@ -144,7 +141,7 @@ public class OpenSearchNode implements TestClusterConfiguration { private final Map pluginAndModuleConfigurations = new HashMap<>(); private final List> plugins = new ArrayList<>(); private final List> modules = new ArrayList<>(); - private final List extensions = new ArrayList<>(); + private boolean extensionsEnabled = false; final LazyPropertyMap settings = new LazyPropertyMap<>("Settings", this); private final LazyPropertyMap keystoreSettings = new LazyPropertyMap<>("Keystore", this); private final LazyPropertyMap keystoreFiles = new LazyPropertyMap<>("Keystore files", this, FileEntry::new); @@ -346,39 +343,8 @@ public void module(String moduleProjectPath) { } @Override - public void extension(ExtensionsProperties extensions) { - this.extensions.add(extensions); - } - - public void writeExtensionFiles() { - try { - // Creates extensions.yml in the target directory - Path destination = getDistroDir().resolve("extensions").resolve("extensions.yml"); - if (!Files.exists(getDistroDir().resolve("extensions"))) { - Files.createDirectory(getDistroDir().resolve("extensions")); - } - DumperOptions dumperOptions = new DumperOptions(); - TestExtensionsList extensionsList = new TestExtensionsList(this.extensions); - dumperOptions.setDefaultFlowStyle(DumperOptions.FlowStyle.BLOCK); - Yaml yaml = new Yaml(dumperOptions); - Files.write(destination, yaml.dump(extensionsList).getBytes()); - - /* - * SnakeYaml creates a Yaml file with an unnecessary line at the top with the class name - * This section of code removes that line while keeping everything else the same. - */ - - Scanner scanner = new Scanner(destination); - scanner.nextLine(); - StringBuilder extensionsString = new StringBuilder(); - while (scanner.hasNextLine()) { - extensionsString.append("\n" + scanner.nextLine()); - } - Files.write(destination, extensionsString.toString().getBytes()); - - } catch (IOException e) { - throw new UncheckedIOException("Failed to write to extensions.yml", e); - } + public void extension(boolean extensionsEnabled) { + this.extensionsEnabled = extensionsEnabled; } @Override @@ -551,10 +517,6 @@ public synchronized void start() { logToProcessStdout("installed plugins"); } - if (!extensions.isEmpty()) { - writeExtensionFiles(); - } - logToProcessStdout("Creating opensearch keystore with password set to [" + keystorePassword + "]"); if (keystorePassword.length() > 0) { runOpenSearchBinScriptWithInput(keystorePassword + "\n" + keystorePassword, "opensearch-keystore", "create", "-p"); @@ -829,7 +791,7 @@ private void startOpenSearchProcess() { environment.clear(); environment.putAll(getOpenSearchEnvironment()); - if (!extensions.isEmpty()) { + if (extensionsEnabled) { environment.put("OPENSEARCH_JAVA_OPTS", "-Dopensearch.experimental.feature.extensions.enabled=true"); } diff --git a/buildSrc/src/main/java/org/opensearch/gradle/testclusters/TestClusterConfiguration.java b/buildSrc/src/main/java/org/opensearch/gradle/testclusters/TestClusterConfiguration.java index 7378804d59d41..87cce2f0b32c0 100644 --- a/buildSrc/src/main/java/org/opensearch/gradle/testclusters/TestClusterConfiguration.java +++ b/buildSrc/src/main/java/org/opensearch/gradle/testclusters/TestClusterConfiguration.java @@ -55,7 +55,7 @@ public interface TestClusterConfiguration { void setTestDistribution(TestDistribution distribution); - void extension(ExtensionsProperties extension); + void extension(boolean extensionsEnabled); void plugin(Provider plugin); From 2bced5de0f7e36b4eb9a78487bf5c08490bb57dc Mon Sep 17 00:00:00 2001 From: Ashish Date: Sat, 8 Jul 2023 09:46:06 +0530 Subject: [PATCH 02/29] [Remote Store] Mute testRemoteRefreshRetryOnFailure test case (#8542) Signed-off-by: Ashish Singh --- .../org/opensearch/remotestore/RemoteStoreRefreshListenerIT.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRefreshListenerIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRefreshListenerIT.java index 4005e6359a2f7..13f76adc8a5a7 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRefreshListenerIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRefreshListenerIT.java @@ -27,6 +27,7 @@ @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) public class RemoteStoreRefreshListenerIT extends AbstractRemoteStoreMockRepositoryIntegTestCase { + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/7703") public void testRemoteRefreshRetryOnFailure() throws Exception { Path location = randomRepoPath().toAbsolutePath(); From 516685d5f8e59423552b668b3034789aa9d77961 Mon Sep 17 00:00:00 2001 From: Suraj Singh Date: Sat, 8 Jul 2023 16:36:01 -0700 Subject: [PATCH 03/29] [Segment Replication] Prevent store clean up post reader close and refactor (#8463) * [Segment Replication] Prevent store clean up on reader close action Signed-off-by: Suraj Singh * [Segment Replication] Self review Signed-off-by: Suraj Singh * Address review comment Signed-off-by: Suraj Singh * Address review comments & refactor Signed-off-by: Suraj Singh * Comment Signed-off-by: Suraj Singh * Fix unit test Signed-off-by: Suraj Singh * Unit test to verify temporary files are not deleted from commits Signed-off-by: Suraj Singh * Compilation error fix Signed-off-by: Suraj Singh * Javadoc Signed-off-by: Suraj Singh * Skip testScrollWithConcurrentIndexAndSearch with remote store Signed-off-by: Suraj Singh --------- Signed-off-by: Suraj Singh --- .../replication/SegmentReplicationIT.java | 55 +++++ .../index/engine/NRTReplicationEngine.java | 20 +- .../engine/NRTReplicationReaderManager.java | 2 + .../org/opensearch/index/store/Store.java | 117 ++++++----- .../indices/recovery/MultiFileWriter.java | 4 + .../replication/SegmentReplicationTarget.java | 16 +- .../engine/NRTReplicationEngineTests.java | 33 +++ .../SegmentReplicationIndexShardTests.java | 198 +++++++++++++++++- .../opensearch/index/store/StoreTests.java | 9 +- .../index/shard/IndexShardTestCase.java | 156 ++++++++++---- 10 files changed, 480 insertions(+), 130 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java index ac57c78d20b73..873c05843fb56 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationIT.java @@ -25,6 +25,7 @@ import org.opensearch.action.admin.indices.flush.FlushRequest; import org.opensearch.action.admin.indices.stats.IndicesStatsRequest; import org.opensearch.action.admin.indices.stats.IndicesStatsResponse; +import org.opensearch.action.index.IndexResponse; import org.opensearch.action.search.CreatePitAction; import org.opensearch.action.search.CreatePitRequest; import org.opensearch.action.search.CreatePitResponse; @@ -79,6 +80,7 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import static java.util.Arrays.asList; @@ -272,6 +274,59 @@ public void testIndexReopenClose() throws Exception { verifyStoreContent(); } + public void testScrollWithConcurrentIndexAndSearch() throws Exception { + assumeFalse("Skipping the test with Remote store as its flaky.", segmentReplicationWithRemoteEnabled()); + final String primary = internalCluster().startDataOnlyNode(); + final String replica = internalCluster().startDataOnlyNode(); + createIndex(INDEX_NAME); + ensureGreen(INDEX_NAME); + final List> pendingIndexResponses = new ArrayList<>(); + final List> pendingSearchResponse = new ArrayList<>(); + final int searchCount = randomIntBetween(10, 20); + final WriteRequest.RefreshPolicy refreshPolicy = randomFrom(WriteRequest.RefreshPolicy.values()); + + for (int i = 0; i < searchCount; i++) { + pendingIndexResponses.add( + client().prepareIndex(INDEX_NAME) + .setId(Integer.toString(i)) + .setRefreshPolicy(refreshPolicy) + .setSource("field", "value" + i) + .execute() + ); + flush(INDEX_NAME); + forceMerge(); + } + + final SearchResponse searchResponse = client().prepareSearch() + .setQuery(matchAllQuery()) + .setIndices(INDEX_NAME) + .setRequestCache(false) + .setScroll(TimeValue.timeValueDays(1)) + .setSize(10) + .get(); + + for (int i = searchCount; i < searchCount * 2; i++) { + pendingIndexResponses.add( + client().prepareIndex(INDEX_NAME) + .setId(Integer.toString(i)) + .setRefreshPolicy(refreshPolicy) + .setSource("field", "value" + i) + .execute() + ); + } + flush(INDEX_NAME); + forceMerge(); + client().prepareClearScroll().addScrollId(searchResponse.getScrollId()).get(); + + assertBusy(() -> { + client().admin().indices().prepareRefresh().execute().actionGet(); + assertTrue(pendingIndexResponses.stream().allMatch(ActionFuture::isDone)); + assertTrue(pendingSearchResponse.stream().allMatch(ActionFuture::isDone)); + }, 1, TimeUnit.MINUTES); + verifyStoreContent(); + waitForSearchableDocs(INDEX_NAME, 2 * searchCount, List.of(primary, replica)); + } + public void testMultipleShards() throws Exception { Settings indexSettings = Settings.builder() .put(super.indexSettings()) diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java index 50b5fbb8596a6..b5552ed552f09 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java @@ -123,21 +123,7 @@ private NRTReplicationReaderManager buildReaderManager() throws IOException { return new NRTReplicationReaderManager( OpenSearchDirectoryReader.wrap(getDirectoryReader(), shardId), store::incRefFileDeleter, - (files) -> { - store.decRefFileDeleter(files); - try { - store.cleanupAndPreserveLatestCommitPoint( - "On reader closed", - getLatestSegmentInfos(), - getLastCommittedSegmentInfos(), - false - ); - } catch (IOException e) { - // Log but do not rethrow - we can try cleaning up again after next replication cycle. - // If that were to fail, the shard will as well. - logger.error("Unable to clean store after reader closed", e); - } - } + store::decRefFileDeleter ); } @@ -147,9 +133,9 @@ public TranslogManager translogManager() { } public synchronized void updateSegments(final SegmentInfos infos) throws IOException { - // Update the current infos reference on the Engine's reader. - ensureOpen(); try (ReleasableLock lock = writeLock.acquire()) { + // Update the current infos reference on the Engine's reader. + ensureOpen(); final long maxSeqNo = Long.parseLong(infos.userData.get(MAX_SEQ_NO)); final long incomingGeneration = infos.getGeneration(); readerManager.updateSegments(infos); diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java index 9ec484ebfd383..268ba1a436393 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationReaderManager.java @@ -72,6 +72,8 @@ protected OpenSearchDirectoryReader refreshIfNeeded(OpenSearchDirectoryReader re for (LeafReaderContext ctx : standardDirectoryReader.leaves()) { subs.add(ctx.reader()); } + // Segment_n here is ignored because it is either already committed on disk as part of previous commit point or + // does not yet exist on store (not yet committed) final Collection files = currentInfos.files(false); DirectoryReader innerReader = StandardDirectoryReader.open(referenceToRefresh.directory(), currentInfos, subs, null); final DirectoryReader softDeletesDirectoryReaderWrapper = new SoftDeletesDirectoryReaderWrapper( diff --git a/server/src/main/java/org/opensearch/index/store/Store.java b/server/src/main/java/org/opensearch/index/store/Store.java index 90832b4c77756..46e5e627ef415 100644 --- a/server/src/main/java/org/opensearch/index/store/Store.java +++ b/server/src/main/java/org/opensearch/index/store/Store.java @@ -50,6 +50,7 @@ import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.BufferedChecksum; +import org.apache.lucene.store.BufferedChecksumIndexInput; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.Directory; @@ -64,7 +65,7 @@ import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.Version; import org.opensearch.ExceptionsHelper; -import org.opensearch.common.Nullable; +import org.opensearch.common.CheckedConsumer; import org.opensearch.common.UUIDs; import org.opensearch.common.bytes.BytesReference; import org.opensearch.common.io.stream.BytesStreamOutput; @@ -789,69 +790,33 @@ public void cleanupAndVerify(String reason, MetadataSnapshot sourceMetadata) thr /** * Segment Replication method - * This method deletes every file in this store that is not referenced by the passed in SegmentInfos or - * part of the latest on-disk commit point. + * This method deletes files in store that are not referenced by latest on-disk commit point * - * This method is used for segment replication when the in memory SegmentInfos can be ahead of the on disk segment file. - * In this case files from both snapshots must be preserved. Verification has been done that all files are present on disk. * @param reason the reason for this cleanup operation logged for each deleted file - * @param infos {@link SegmentInfos} Files from this infos will be preserved on disk if present. - * @throws IllegalStateException if the latest snapshot in this store differs from the given one after the cleanup. - */ - public void cleanupAndPreserveLatestCommitPoint(String reason, SegmentInfos infos) throws IOException { - this.cleanupAndPreserveLatestCommitPoint(reason, infos, readLastCommittedSegmentsInfo(), true); - } - - /** - * Segment Replication method - * - * Similar to {@link Store#cleanupAndPreserveLatestCommitPoint(String, SegmentInfos)} with extra parameters for cleanup - * - * This method deletes every file in this store. Except - * 1. Files referenced by the passed in SegmentInfos, usually in-memory segment infos copied from primary - * 2. Files part of the passed in segment infos, typically the last committed segment info - * 3. Files incremented by active reader for pit/scroll queries - * 4. Temporary replication file if passed in deleteTempFiles is true. - * - * @param reason the reason for this cleanup operation logged for each deleted file - * @param infos {@link SegmentInfos} Files from this infos will be preserved on disk if present. - * @param lastCommittedSegmentInfos {@link SegmentInfos} Last committed segment infos - * @param deleteTempFiles Does this clean up delete temporary replication files + * @param fileToConsiderForCleanUp Files to consider for clean up. * - * @throws IllegalStateException if the latest snapshot in this store differs from the given one after the cleanup. + * @throws IOException Exception on locking. */ - public void cleanupAndPreserveLatestCommitPoint( - String reason, - SegmentInfos infos, - SegmentInfos lastCommittedSegmentInfos, - boolean deleteTempFiles - ) throws IOException { + public void cleanupAndPreserveLatestCommitPoint(Collection fileToConsiderForCleanUp, String reason) throws IOException { assert indexSettings.isSegRepEnabled(); // fetch a snapshot from the latest on disk Segments_N file. This can be behind // the passed in local in memory snapshot, so we want to ensure files it references are not removed. metadataLock.writeLock().lock(); try (Lock writeLock = directory.obtainLock(IndexWriter.WRITE_LOCK_NAME)) { - cleanupFiles(reason, lastCommittedSegmentInfos.files(true), infos.files(true), deleteTempFiles); + cleanupFiles(fileToConsiderForCleanUp, reason, this.readLastCommittedSegmentsInfo().files(true)); } finally { metadataLock.writeLock().unlock(); } } - private void cleanupFiles( - String reason, - Collection localSnapshot, - @Nullable Collection additionalFiles, - boolean deleteTempFiles - ) throws IOException { + private void cleanupFiles(Collection filesToConsiderForCleanup, String reason, Collection lastCommittedSegmentInfos) { assert metadataLock.isWriteLockedByCurrentThread(); - for (String existingFile : directory.listAll()) { - if (Store.isAutogenerated(existingFile) - || localSnapshot != null && localSnapshot.contains(existingFile) - || (additionalFiles != null && additionalFiles.contains(existingFile)) - // also ensure we are not deleting a file referenced by an active reader. + for (String existingFile : filesToConsiderForCleanup) { + if (Store.isAutogenerated(existingFile) || lastCommittedSegmentInfos != null && lastCommittedSegmentInfos.contains(existingFile) + // also ensure we are not deleting a file referenced by an active reader. || replicaFileTracker != null && replicaFileTracker.canDelete(existingFile) == false - // prevent temporary file deletion during reader cleanup - || deleteTempFiles == false && existingFile.startsWith(REPLICATION_PREFIX)) { + // Prevent temporary replication files as it should be cleaned up MultiFileWriter + || existingFile.startsWith(REPLICATION_PREFIX)) { // don't delete snapshot file, or the checksums file (note, this is extra protection since the Store won't delete // checksum) continue; @@ -871,6 +836,53 @@ private void cleanupFiles( } } + /** + * Segment replication method + * + * This method takes the segment info bytes to build SegmentInfos. It inc'refs files pointed by passed in SegmentInfos + * bytes to ensure they are not deleted. + * + * @param tmpToFileName Map of temporary replication file to actual file name + * @param infosBytes bytes[] of SegmentInfos supposed to be sent over by primary excluding segment_N file + * @param segmentsGen segment generation number + * @param consumer consumer for generated SegmentInfos + * @throws IOException Exception while reading store and building segment infos + */ + public void buildInfosFromBytes( + Map tmpToFileName, + byte[] infosBytes, + long segmentsGen, + CheckedConsumer consumer + ) throws IOException { + metadataLock.writeLock().lock(); + try { + final List values = new ArrayList<>(tmpToFileName.values()); + incRefFileDeleter(values); + try { + renameTempFilesSafe(tmpToFileName); + consumer.accept(buildSegmentInfos(infosBytes, segmentsGen)); + } finally { + decRefFileDeleter(values); + } + } finally { + metadataLock.writeLock().unlock(); + } + } + + private SegmentInfos buildSegmentInfos(byte[] infosBytes, long segmentsGen) throws IOException { + try (final ChecksumIndexInput input = toIndexInput(infosBytes)) { + return SegmentInfos.readCommit(directory, input, segmentsGen); + } + } + + /** + * This method formats byte[] containing the primary's SegmentInfos into lucene's {@link ChecksumIndexInput} that can be + * passed to SegmentInfos.readCommit + */ + private ChecksumIndexInput toIndexInput(byte[] input) { + return new BufferedChecksumIndexInput(new ByteArrayIndexInput("Snapshot of SegmentInfos", input)); + } + // pkg private for testing final void verifyAfterCleanup(MetadataSnapshot sourceMetadata, MetadataSnapshot targetMetadata) { final RecoveryDiff recoveryDiff = targetMetadata.recoveryDiff(sourceMetadata); @@ -945,7 +957,7 @@ public void commitSegmentInfos(SegmentInfos latestSegmentInfos, long maxSeqNo, l latestSegmentInfos.commit(directory()); directory.sync(latestSegmentInfos.files(true)); directory.syncMetaData(); - cleanupAndPreserveLatestCommitPoint("After commit", latestSegmentInfos); + cleanupAndPreserveLatestCommitPoint(List.of(this.directory.listAll()), "After commit"); } finally { metadataLock.writeLock().unlock(); } @@ -1961,6 +1973,13 @@ public void incRefFileDeleter(Collection files) { public void decRefFileDeleter(Collection files) { if (this.indexSettings.isSegRepEnabled()) { this.replicaFileTracker.decRef(files); + try { + this.cleanupAndPreserveLatestCommitPoint(files, "On reader close"); + } catch (IOException e) { + // Log but do not rethrow - we can try cleaning up again after next replication cycle. + // If that were to fail, the shard will as well. + logger.error("Unable to clean store after reader closed", e); + } } } } diff --git a/server/src/main/java/org/opensearch/indices/recovery/MultiFileWriter.java b/server/src/main/java/org/opensearch/indices/recovery/MultiFileWriter.java index 4f9db27ffc9db..c27852b27960b 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/MultiFileWriter.java +++ b/server/src/main/java/org/opensearch/indices/recovery/MultiFileWriter.java @@ -97,6 +97,10 @@ String getTempNameForFile(String origFile) { return tempFilePrefix + origFile; } + public Map getTempFileNames() { + return tempFileNames; + } + public IndexOutput getOpenIndexOutput(String key) { ensureOpen.run(); return openIndexOutputs.get(key); diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java index 22c68ad46fea6..9d724d6cc9dcf 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java @@ -21,6 +21,7 @@ import org.opensearch.OpenSearchException; import org.opensearch.action.ActionListener; import org.opensearch.action.StepListener; +import org.opensearch.common.CheckedConsumer; import org.opensearch.common.UUIDs; import org.opensearch.common.bytes.BytesReference; import org.opensearch.common.lucene.Lucene; @@ -221,18 +222,15 @@ private void finalizeReplication(CheckpointInfoResponse checkpointInfoResponse, state.setStage(SegmentReplicationState.Stage.FINALIZE_REPLICATION); Store store = null; try { - multiFileWriter.renameAllTempFiles(); store = store(); store.incRef(); - // Deserialize the new SegmentInfos object sent from the primary. - final ReplicationCheckpoint responseCheckpoint = checkpointInfoResponse.getCheckpoint(); - SegmentInfos infos = SegmentInfos.readCommit( - store.directory(), - toIndexInput(checkpointInfoResponse.getInfosBytes()), - responseCheckpoint.getSegmentsGen() + CheckedConsumer finalizeReplication = indexShard::finalizeReplication; + store.buildInfosFromBytes( + multiFileWriter.getTempFileNames(), + checkpointInfoResponse.getInfosBytes(), + checkpointInfoResponse.getCheckpoint().getSegmentsGen(), + finalizeReplication ); - cancellableThreads.checkForCancel(); - indexShard.finalizeReplication(infos); } catch (CorruptIndexException | IndexFormatTooNewException | IndexFormatTooOldException ex) { // this is a fatal exception at this stage. // this means we transferred files from the remote that have not be checksummed and they are diff --git a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java index 3e1112ae3069b..e25e6ea206d84 100644 --- a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java +++ b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java @@ -31,6 +31,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; @@ -160,6 +161,38 @@ public void testUpdateSegments_replicaReceivesSISWithLowerGen() throws IOExcepti } } + public void testSimultaneousEngineCloseAndCommit() throws IOException, InterruptedException { + final AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED); + try ( + final Store nrtEngineStore = createStore(INDEX_SETTINGS, newDirectory()); + final NRTReplicationEngine nrtEngine = buildNrtReplicaEngine(globalCheckpoint, nrtEngineStore) + ) { + CountDownLatch latch = new CountDownLatch(1); + Thread commitThread = new Thread(() -> { + try { + nrtEngine.updateSegments(store.readLastCommittedSegmentsInfo()); + latch.countDown(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + Thread closeThread = new Thread(() -> { + try { + latch.await(); + nrtEngine.close(); + } catch (IOException e) { + throw new RuntimeException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + }); + commitThread.start(); + closeThread.start(); + commitThread.join(); + closeThread.join(); + } + } + public void testUpdateSegments_replicaCommitsFirstReceivedInfos() throws IOException { final AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED); diff --git a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java index 0c859c5f6a64a..b3876a8ea8fd0 100644 --- a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java @@ -13,13 +13,14 @@ import org.junit.Assert; import org.opensearch.ExceptionsHelper; import org.opensearch.action.ActionListener; +import org.opensearch.action.admin.indices.flush.FlushRequest; +import org.opensearch.action.admin.indices.forcemerge.ForceMergeRequest; import org.opensearch.action.delete.DeleteRequest; import org.opensearch.action.index.IndexRequest; import org.opensearch.action.support.PlainActionFuture; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.routing.ShardRoutingHelper; -import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.collect.Tuple; import org.opensearch.common.concurrent.GatedCloseable; import org.opensearch.common.settings.ClusterSettings; @@ -30,6 +31,7 @@ import org.opensearch.common.lease.Releasable; import org.opensearch.index.IndexSettings; import org.opensearch.index.engine.DocIdSeqNoAndSource; +import org.opensearch.index.engine.Engine; import org.opensearch.index.engine.InternalEngine; import org.opensearch.index.engine.InternalEngineFactory; import org.opensearch.index.engine.NRTReplicationEngine; @@ -63,6 +65,8 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; @@ -70,7 +74,10 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Consumer; import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static java.util.Arrays.asList; import static org.hamcrest.Matchers.containsString; @@ -78,6 +85,7 @@ import static org.hamcrest.Matchers.hasToString; import static org.hamcrest.Matchers.instanceOf; import static org.mockito.Mockito.any; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -169,6 +177,186 @@ public void testIsSegmentReplicationAllowed_WrongEngineType() throws IOException closeShards(indexShard); } + /** + * This test mimics the segment replication failure due to CorruptIndexException exception which happens when + * reader close operation on replica shard deletes the segment files copied in current round of segment replication. + * It does this by blocking the finalizeReplication on replica shard and performing close operation on acquired + * searcher that triggers the reader close operation. + * @throws Exception + */ + public void testSegmentReplication_With_ReaderClosedConcurrently() throws Exception { + String mappings = "{ \"" + MapperService.SINGLE_MAPPING_NAME + "\": { \"properties\": { \"foo\": { \"type\": \"keyword\"} }}}"; + try (ReplicationGroup shards = createGroup(1, settings, mappings, new NRTReplicationEngineFactory())) { + shards.startAll(); + IndexShard primaryShard = shards.getPrimary(); + final IndexShard replicaShard = shards.getReplicas().get(0); + + // Step 1. Ingest numDocs documents & replicate to replica shard + final int numDocs = randomIntBetween(100, 200); + logger.info("--> Inserting documents {}", numDocs); + for (int i = 0; i < numDocs; i++) { + shards.index(new IndexRequest(index.getName()).id(String.valueOf(i)).source("{\"foo\": \"bar\"}", XContentType.JSON)); + } + assertEqualTranslogOperations(shards, primaryShard); + primaryShard.refresh("Test"); + primaryShard.flush(new FlushRequest().waitIfOngoing(true).force(true)); + replicateSegments(primaryShard, shards.getReplicas()); + + IndexShard spyShard = spy(replicaShard); + Engine.Searcher test = replicaShard.getEngine().acquireSearcher("testSegmentReplication_With_ReaderClosedConcurrently"); + shards.assertAllEqual(numDocs); + + // Step 2. Ingest numDocs documents again & replicate to replica shard + logger.info("--> Ingest {} docs again", numDocs); + for (int i = 0; i < numDocs; i++) { + shards.index(new IndexRequest(index.getName()).id(String.valueOf(i)).source("{\"foo\": \"bar\"}", XContentType.JSON)); + } + assertEqualTranslogOperations(shards, primaryShard); + primaryShard.flush(new FlushRequest().waitIfOngoing(true).force(true)); + replicateSegments(primaryShard, shards.getReplicas()); + + // Step 3. Perform force merge down to 1 segment on primary + primaryShard.forceMerge(new ForceMergeRequest().maxNumSegments(1).flush(true)); + logger.info("--> primary store after force merge {}", Arrays.toString(primaryShard.store().directory().listAll())); + // Perform close on searcher before IndexShard::finalizeReplication + doAnswer(n -> { + test.close(); + n.callRealMethod(); + return null; + }).when(spyShard).finalizeReplication(any()); + replicateSegments(primaryShard, List.of(spyShard)); + shards.assertAllEqual(numDocs); + } + } + + /** + * Similar to test above, this test shows the issue where an engine close operation during active segment replication + * can result in Lucene CorruptIndexException. + * @throws Exception + */ + public void testSegmentReplication_With_EngineClosedConcurrently() throws Exception { + String mappings = "{ \"" + MapperService.SINGLE_MAPPING_NAME + "\": { \"properties\": { \"foo\": { \"type\": \"keyword\"} }}}"; + try (ReplicationGroup shards = createGroup(1, settings, mappings, new NRTReplicationEngineFactory())) { + shards.startAll(); + IndexShard primaryShard = shards.getPrimary(); + final IndexShard replicaShard = shards.getReplicas().get(0); + + // Step 1. Ingest numDocs documents + final int numDocs = randomIntBetween(100, 200); + logger.info("--> Inserting documents {}", numDocs); + for (int i = 0; i < numDocs; i++) { + shards.index(new IndexRequest(index.getName()).id(String.valueOf(i)).source("{\"foo\": \"bar\"}", XContentType.JSON)); + } + assertEqualTranslogOperations(shards, primaryShard); + primaryShard.refresh("Test"); + primaryShard.flush(new FlushRequest().waitIfOngoing(true).force(true)); + replicateSegments(primaryShard, shards.getReplicas()); + shards.assertAllEqual(numDocs); + + // Step 2. Ingest numDocs documents again to create a new commit + logger.info("--> Ingest {} docs again", numDocs); + for (int i = 0; i < numDocs; i++) { + shards.index(new IndexRequest(index.getName()).id(String.valueOf(i)).source("{\"foo\": \"bar\"}", XContentType.JSON)); + } + assertEqualTranslogOperations(shards, primaryShard); + primaryShard.flush(new FlushRequest().waitIfOngoing(true).force(true)); + logger.info("--> primary store after final flush {}", Arrays.toString(primaryShard.store().directory().listAll())); + + // Step 3. Before replicating segments, block finalizeReplication and perform engine commit directly that + // cleans up recently copied over files + IndexShard spyShard = spy(replicaShard); + doAnswer(n -> { + NRTReplicationEngine engine = (NRTReplicationEngine) replicaShard.getEngine(); + // Using engine.close() prevents indexShard.finalizeReplication execution due to engine AlreadyClosedException, + // thus as workaround, use updateSegments which eventually calls commitSegmentInfos on latest segment infos. + engine.updateSegments(engine.getSegmentInfosSnapshot().get()); + n.callRealMethod(); + return null; + }).when(spyShard).finalizeReplication(any()); + replicateSegments(primaryShard, List.of(spyShard)); + shards.assertAllEqual(numDocs); + } + } + + /** + * Verifies that commits on replica engine resulting from engine or reader close does not cleanup the temporary + * replication files from ongoing round of segment replication + * @throws Exception + */ + public void testTemporaryFilesNotCleanup() throws Exception { + String mappings = "{ \"" + MapperService.SINGLE_MAPPING_NAME + "\": { \"properties\": { \"foo\": { \"type\": \"keyword\"} }}}"; + try (ReplicationGroup shards = createGroup(1, settings, mappings, new NRTReplicationEngineFactory())) { + shards.startAll(); + IndexShard primaryShard = shards.getPrimary(); + final IndexShard replica = shards.getReplicas().get(0); + + // Step 1. Ingest numDocs documents, commit to create commit point on primary & replicate + final int numDocs = randomIntBetween(100, 200); + logger.info("--> Inserting documents {}", numDocs); + for (int i = 0; i < numDocs; i++) { + shards.index(new IndexRequest(index.getName()).id(String.valueOf(i)).source("{\"foo\": \"bar\"}", XContentType.JSON)); + } + assertEqualTranslogOperations(shards, primaryShard); + primaryShard.flush(new FlushRequest().waitIfOngoing(true).force(true)); + replicateSegments(primaryShard, shards.getReplicas()); + shards.assertAllEqual(numDocs); + + // Step 2. Ingest numDocs documents again to create a new commit on primary + logger.info("--> Ingest {} docs again", numDocs); + for (int i = 0; i < numDocs; i++) { + shards.index(new IndexRequest(index.getName()).id(String.valueOf(i)).source("{\"foo\": \"bar\"}", XContentType.JSON)); + } + assertEqualTranslogOperations(shards, primaryShard); + primaryShard.flush(new FlushRequest().waitIfOngoing(true).force(true)); + + // Step 3. Copy segment files to replica shard but prevent commit + final CountDownLatch countDownLatch = new CountDownLatch(1); + Map primaryMetadata; + try (final GatedCloseable segmentInfosSnapshot = primaryShard.getSegmentInfosSnapshot()) { + final SegmentInfos primarySegmentInfos = segmentInfosSnapshot.get(); + primaryMetadata = primaryShard.store().getSegmentMetadataMap(primarySegmentInfos); + } + final SegmentReplicationSourceFactory sourceFactory = mock(SegmentReplicationSourceFactory.class); + final IndicesService indicesService = mock(IndicesService.class); + when(indicesService.getShardOrNull(replica.shardId)).thenReturn(replica); + final SegmentReplicationTargetService targetService = new SegmentReplicationTargetService( + threadPool, + new RecoverySettings(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), + mock(TransportService.class), + sourceFactory, + indicesService, + clusterService + ); + final Consumer runnablePostGetFiles = (indexShard) -> { + try { + Collection temporaryFiles = Stream.of(indexShard.store().directory().listAll()) + .filter(name -> name.startsWith(SegmentReplicationTarget.REPLICATION_PREFIX)) + .collect(Collectors.toList()); + + // Step 4. Perform a commit on replica shard. + NRTReplicationEngine engine = (NRTReplicationEngine) indexShard.getEngine(); + engine.updateSegments(engine.getSegmentInfosSnapshot().get()); + + // Step 5. Validate temporary files are not deleted from store. + Collection replicaStoreFiles = List.of(indexShard.store().directory().listAll()); + assertTrue(replicaStoreFiles.containsAll(temporaryFiles)); + } catch (IOException e) { + throw new RuntimeException(e); + } + }; + SegmentReplicationSource segmentReplicationSource = getSegmentReplicationSource( + primaryShard, + (repId) -> targetService.get(repId), + runnablePostGetFiles + ); + when(sourceFactory.get(any())).thenReturn(segmentReplicationSource); + targetService.startReplication(replica, getTargetListener(primaryShard, replica, primaryMetadata, countDownLatch)); + countDownLatch.await(30, TimeUnit.SECONDS); + assertEquals("Replication failed", 0, countDownLatch.getCount()); + shards.assertAllEqual(numDocs); + } + } + public void testSegmentReplication_Index_Update_Delete() throws Exception { String mappings = "{ \"" + MapperService.SINGLE_MAPPING_NAME + "\": { \"properties\": { \"foo\": { \"type\": \"keyword\"} }}}"; try (ReplicationGroup shards = createGroup(2, settings, mappings, new NRTReplicationEngineFactory())) { @@ -296,13 +484,7 @@ public void testPublishCheckpointAfterRelocationHandOff() throws IOException { public void testRejectCheckpointOnShardRoutingPrimary() throws IOException { IndexShard primaryShard = newStartedShard(true); SegmentReplicationTargetService sut; - sut = prepareForReplication( - primaryShard, - null, - mock(TransportService.class), - mock(IndicesService.class), - mock(ClusterService.class) - ); + sut = prepareForReplication(primaryShard, null); SegmentReplicationTargetService spy = spy(sut); // Starting a new shard in PrimaryMode and shard routing primary. diff --git a/server/src/test/java/org/opensearch/index/store/StoreTests.java b/server/src/test/java/org/opensearch/index/store/StoreTests.java index b11e8554027b1..d9493e63fd34c 100644 --- a/server/src/test/java/org/opensearch/index/store/StoreTests.java +++ b/server/src/test/java/org/opensearch/index/store/StoreTests.java @@ -100,6 +100,7 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.Date; import java.util.HashMap; @@ -108,6 +109,8 @@ import java.util.Map; import java.util.Random; import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static java.util.Collections.unmodifiableMap; import static org.hamcrest.Matchers.anyOf; @@ -1191,8 +1194,12 @@ public void testCleanupAndPreserveLatestCommitPoint() throws IOException { } assertFalse(additionalSegments.isEmpty()); + Collection filesToConsiderForCleanUp = Stream.of(store.readLastCommittedSegmentsInfo().files(true), additionalSegments) + .flatMap(Collection::stream) + .collect(Collectors.toList()); + // clean up everything not in the latest commit point. - store.cleanupAndPreserveLatestCommitPoint("test", store.readLastCommittedSegmentsInfo()); + store.cleanupAndPreserveLatestCommitPoint(filesToConsiderForCleanUp, "test"); // we want to ensure commitMetadata files are preserved after calling cleanup for (String existingFile : store.directory().listAll()) { diff --git a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java index f5af84bb9f128..984605007dad7 100644 --- a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java @@ -1330,15 +1330,22 @@ public static Engine.Warmer createTestWarmer(IndexSettings indexSettings) { * Segment Replication specific test method - Creates a {@link SegmentReplicationTargetService} to perform replications that has * been configured to return the given primaryShard's current segments. * - * @param primaryShard {@link IndexShard} - The primary shard to replicate from. - * @param target {@link IndexShard} - The target replica shard in segment replication. + * @param primaryShard {@link IndexShard} - The target replica shard in segment replication. + * @param target {@link IndexShard} - The source primary shard in segment replication. + * @param transportService {@link TransportService} - Transport service to be used on target + * @param indicesService {@link IndicesService} - The indices service to be used on target + * @param clusterService {@link ClusterService} - The cluster service to be used on target + * @param postGetFilesRunnable - Consumer which is executed after file copy operation. This can be used to stub operations + * which are desired right after files are copied. e.g. To work with temp files + * @return Returns SegmentReplicationTargetService */ public final SegmentReplicationTargetService prepareForReplication( IndexShard primaryShard, IndexShard target, TransportService transportService, IndicesService indicesService, - ClusterService clusterService + ClusterService clusterService, + Consumer postGetFilesRunnable ) { final SegmentReplicationSourceFactory sourceFactory = mock(SegmentReplicationSourceFactory.class); final SegmentReplicationTargetService targetService = new SegmentReplicationTargetService( @@ -1349,7 +1356,102 @@ public final SegmentReplicationTargetService prepareForReplication( indicesService, clusterService ); - final SegmentReplicationSource replicationSource = new TestReplicationSource() { + final SegmentReplicationSource replicationSource = getSegmentReplicationSource( + primaryShard, + (repId) -> targetService.get(repId), + postGetFilesRunnable + ); + when(sourceFactory.get(any())).thenReturn(replicationSource); + when(indicesService.getShardOrNull(any())).thenReturn(target); + return targetService; + } + + /** + * Segment Replication specific test method - Creates a {@link SegmentReplicationTargetService} to perform replications that has + * been configured to return the given primaryShard's current segments. + * + * @param primaryShard {@link IndexShard} - The primary shard to replicate from. + * @param target {@link IndexShard} - The target replica shard. + * @return Returns SegmentReplicationTargetService + */ + public final SegmentReplicationTargetService prepareForReplication(IndexShard primaryShard, IndexShard target) { + return prepareForReplication( + primaryShard, + target, + mock(TransportService.class), + mock(IndicesService.class), + mock(ClusterService.class), + (indexShard) -> {} + ); + } + + public final SegmentReplicationTargetService prepareForReplication( + IndexShard primaryShard, + IndexShard target, + TransportService transportService, + IndicesService indicesService, + ClusterService clusterService + ) { + return prepareForReplication(primaryShard, target, transportService, indicesService, clusterService, (indexShard) -> {}); + } + + /** + * Get listener on started segment replication event which verifies replica shard store with primary's after completion + * @param primaryShard - source of segment replication + * @param replicaShard - target of segment replication + * @param primaryMetadata - primary shard metadata before start of segment replication + * @param latch - Latch which allows consumers of this utility to ensure segment replication completed successfully + * @return Returns SegmentReplicationTargetService.SegmentReplicationListener + */ + public SegmentReplicationTargetService.SegmentReplicationListener getTargetListener( + IndexShard primaryShard, + IndexShard replicaShard, + Map primaryMetadata, + CountDownLatch latch + ) { + return new SegmentReplicationTargetService.SegmentReplicationListener() { + @Override + public void onReplicationDone(SegmentReplicationState state) { + try (final GatedCloseable snapshot = replicaShard.getSegmentInfosSnapshot()) { + final SegmentInfos replicaInfos = snapshot.get(); + final Map replicaMetadata = replicaShard.store().getSegmentMetadataMap(replicaInfos); + final Store.RecoveryDiff recoveryDiff = Store.segmentReplicationDiff(primaryMetadata, replicaMetadata); + assertTrue(recoveryDiff.missing.isEmpty()); + assertTrue(recoveryDiff.different.isEmpty()); + assertEquals(recoveryDiff.identical.size(), primaryMetadata.size()); + primaryShard.updateVisibleCheckpointForShard( + replicaShard.routingEntry().allocationId().getId(), + primaryShard.getLatestReplicationCheckpoint() + ); + } catch (Exception e) { + throw ExceptionsHelper.convertToRuntime(e); + } finally { + latch.countDown(); + } + } + + @Override + public void onReplicationFailure(SegmentReplicationState state, ReplicationFailedException e, boolean sendShardFailure) { + logger.error("Unexpected replication failure in test", e); + Assert.fail("test replication should not fail: " + e); + } + }; + } + + /** + * Utility method which creates a segment replication source, which copies files from primary shard to target shard + * @param primaryShard Primary IndexShard - source of segment replication + * @param getTargetFunc - provides replication target from target service using replication id + * @param postGetFilesRunnable - Consumer which is executed after file copy operation. This can be used to stub operations + * which are desired right after files are copied. e.g. To work with temp files + * @return Return SegmentReplicationSource + */ + public SegmentReplicationSource getSegmentReplicationSource( + IndexShard primaryShard, + Function> getTargetFunc, + Consumer postGetFilesRunnable + ) { + return new TestReplicationSource() { @Override public void getCheckpointMetadata( long replicationId, @@ -1380,18 +1482,16 @@ public void getSegmentFiles( ActionListener listener ) { try ( - final ReplicationCollection.ReplicationRef replicationRef = targetService.get(replicationId) + final ReplicationCollection.ReplicationRef replicationRef = getTargetFunc.apply(replicationId) ) { writeFileChunks(replicationRef.get(), primaryShard, filesToFetch.toArray(new StoreFileMetadata[] {})); } catch (IOException e) { listener.onFailure(e); } + postGetFilesRunnable.accept(indexShard); listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); } }; - when(sourceFactory.get(any())).thenReturn(replicationSource); - when(indicesService.getShardOrNull(any())).thenReturn(target); - return targetService; } /** @@ -1412,46 +1512,10 @@ public final List replicateSegments(IndexShard primary } List ids = new ArrayList<>(); for (IndexShard replica : replicaShards) { - final SegmentReplicationTargetService targetService = prepareForReplication( - primaryShard, - replica, - mock(TransportService.class), - mock(IndicesService.class), - mock(ClusterService.class) - ); + final SegmentReplicationTargetService targetService = prepareForReplication(primaryShard, replica); final SegmentReplicationTarget target = targetService.startReplication( replica, - new SegmentReplicationTargetService.SegmentReplicationListener() { - @Override - public void onReplicationDone(SegmentReplicationState state) { - try (final GatedCloseable snapshot = replica.getSegmentInfosSnapshot()) { - final SegmentInfos replicaInfos = snapshot.get(); - final Map replicaMetadata = replica.store().getSegmentMetadataMap(replicaInfos); - final Store.RecoveryDiff recoveryDiff = Store.segmentReplicationDiff(primaryMetadata, replicaMetadata); - assertTrue(recoveryDiff.missing.isEmpty()); - assertTrue(recoveryDiff.different.isEmpty()); - assertEquals(recoveryDiff.identical.size(), primaryMetadata.size()); - primaryShard.updateVisibleCheckpointForShard( - replica.routingEntry().allocationId().getId(), - primaryShard.getLatestReplicationCheckpoint() - ); - } catch (Exception e) { - throw ExceptionsHelper.convertToRuntime(e); - } finally { - countDownLatch.countDown(); - } - } - - @Override - public void onReplicationFailure( - SegmentReplicationState state, - ReplicationFailedException e, - boolean sendShardFailure - ) { - logger.error("Unexpected replication failure in test", e); - Assert.fail("test replication should not fail: " + e); - } - } + getTargetListener(primaryShard, replica, primaryMetadata, countDownLatch) ); ids.add(target); } From fa3412131280f2e3943a08f06d82aaaccc063df4 Mon Sep 17 00:00:00 2001 From: Ashish Date: Sun, 9 Jul 2023 08:38:00 +0530 Subject: [PATCH 04/29] Fix flakyness in RemoteStoreRefreshListenerIT (#8547) --------- Signed-off-by: Ashish Singh --- .../remotestore/RemoteStoreRefreshListenerIT.java | 1 - .../index/shard/RemoteStoreRefreshListener.java | 14 ++++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRefreshListenerIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRefreshListenerIT.java index 13f76adc8a5a7..4005e6359a2f7 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRefreshListenerIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRefreshListenerIT.java @@ -27,7 +27,6 @@ @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) public class RemoteStoreRefreshListenerIT extends AbstractRemoteStoreMockRepositoryIntegTestCase { - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/7703") public void testRemoteRefreshRetryOnFailure() throws Exception { Path location = randomRepoPath().toAbsolutePath(); diff --git a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java index cd3e7aa3b11a9..aaba74cd54341 100644 --- a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java +++ b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java @@ -47,7 +47,6 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.ExecutionException; -import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; @@ -96,9 +95,9 @@ public final class RemoteStoreRefreshListener implements ReferenceManager.Refres private long primaryTerm; /** - * Semaphore that ensures there is only 1 retry scheduled at any time. + * This boolean is used to ensure that there is only 1 retry scheduled/running at any time. */ - private final Semaphore SCHEDULE_RETRY_PERMITS = new Semaphore(1); + private final AtomicBoolean retryScheduled = new AtomicBoolean(false); private volatile Iterator backoffDelayIterator; @@ -321,6 +320,9 @@ private void onSuccessfulSegmentsSync( private void cancelAndResetScheduledCancellableRetry() { if (scheduledCancellableRetry != null && scheduledCancellableRetry.getDelay(TimeUnit.NANOSECONDS) > 0) { scheduledCancellableRetry.cancel(); + // Since we are cancelling the retry attempt as an internal/external refresh happened already before the retry job could be + // started and the current run successfully uploaded the segments. + retryScheduled.set(false); } scheduledCancellableRetry = null; } @@ -333,14 +335,14 @@ private void resetBackOffDelayIterator() { } private void afterSegmentsSync(boolean isRetry, boolean shouldRetry) { - // If this was a retry attempt, then we release the semaphore at the end so that further retries can be scheduled + // If this was a retry attempt, then we set the retryScheduled to false so that the next retry (if needed) can be scheduled if (isRetry) { - SCHEDULE_RETRY_PERMITS.release(); + retryScheduled.set(false); } // If there are failures in uploading segments, then we should retry as search idle can lead to // refresh not occurring until write happens. - if (shouldRetry && indexShard.state() != IndexShardState.CLOSED && SCHEDULE_RETRY_PERMITS.tryAcquire()) { + if (shouldRetry && indexShard.state() != IndexShardState.CLOSED && retryScheduled.compareAndSet(false, true)) { scheduledCancellableRetry = indexShard.getThreadPool() .schedule(() -> this.syncSegments(true), backoffDelayIterator.next(), ThreadPool.Names.REMOTE_REFRESH); } From 9fd1ddb6a2c2df60a9b201a6b5e47041fa145ecd Mon Sep 17 00:00:00 2001 From: Ashish Date: Mon, 10 Jul 2023 10:31:15 +0530 Subject: [PATCH 05/29] Fix flakyness in RemoteStoreRefreshListenerTests (#8550) --------- Signed-off-by: Ashish Singh --- .../RemoteStoreRefreshListenerTests.java | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java index 688f29fa1f4bf..21a9393408529 100644 --- a/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java @@ -21,9 +21,9 @@ import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.collect.Tuple; import org.opensearch.common.concurrent.GatedCloseable; +import org.opensearch.common.lease.Releasable; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; -import org.opensearch.common.lease.Releasable; import org.opensearch.index.engine.InternalEngineFactory; import org.opensearch.index.remote.RemoteRefreshSegmentPressureService; import org.opensearch.index.remote.RemoteRefreshSegmentTracker; @@ -249,10 +249,7 @@ public void testRefreshSuccessOnFirstAttempt() throws Exception { assertBusy(() -> assertEquals(0, successLatch.getCount())); RemoteRefreshSegmentPressureService pressureService = tuple.v2(); RemoteRefreshSegmentTracker segmentTracker = pressureService.getRemoteRefreshSegmentTracker(indexShard.shardId()); - assertEquals(0, segmentTracker.getBytesLag()); - assertEquals(0, segmentTracker.getRefreshSeqNoLag()); - assertEquals(0, segmentTracker.getTimeMsLag()); - assertEquals(0, segmentTracker.getTotalUploadsFailed()); + assertNoLagAndTotalUploadsFailed(segmentTracker, 0); } public void testRefreshSuccessOnSecondAttempt() throws Exception { @@ -273,10 +270,7 @@ public void testRefreshSuccessOnSecondAttempt() throws Exception { assertBusy(() -> assertEquals(0, successLatch.getCount())); RemoteRefreshSegmentPressureService pressureService = tuple.v2(); RemoteRefreshSegmentTracker segmentTracker = pressureService.getRemoteRefreshSegmentTracker(indexShard.shardId()); - assertEquals(0, segmentTracker.getBytesLag()); - assertEquals(0, segmentTracker.getRefreshSeqNoLag()); - assertEquals(0, segmentTracker.getTimeMsLag()); - assertEquals(1, segmentTracker.getTotalUploadsFailed()); + assertNoLagAndTotalUploadsFailed(segmentTracker, 1); } /** @@ -304,7 +298,7 @@ public void testRefreshSuccessAfterFailureInFirstAttemptAfterSnapshotAndMetadata assertBusy(() -> assertEquals(0, reachedCheckpointPublishLatch.getCount())); } - public void testRefreshSuccessOnThirdAttemptAttempt() throws Exception { + public void testRefreshSuccessOnThirdAttempt() throws Exception { // This covers 3 cases - 1) isRetry=false, shouldRetry=true 2) isRetry=true, shouldRetry=false 3) isRetry=True, shouldRetry=true // Succeed on 3rd attempt int succeedOnAttempt = 3; @@ -322,11 +316,16 @@ public void testRefreshSuccessOnThirdAttemptAttempt() throws Exception { assertBusy(() -> assertEquals(0, successLatch.getCount())); RemoteRefreshSegmentPressureService pressureService = tuple.v2(); RemoteRefreshSegmentTracker segmentTracker = pressureService.getRemoteRefreshSegmentTracker(indexShard.shardId()); - assertEquals(0, segmentTracker.getBytesLag()); - assertEquals(0, segmentTracker.getRefreshSeqNoLag()); - assertEquals(0, segmentTracker.getTimeMsLag()); - assertEquals(2, segmentTracker.getTotalUploadsFailed()); + assertNoLagAndTotalUploadsFailed(segmentTracker, 2); + } + private void assertNoLagAndTotalUploadsFailed(RemoteRefreshSegmentTracker segmentTracker, long totalUploadsFailed) throws Exception { + assertBusy(() -> { + assertEquals(0, segmentTracker.getBytesLag()); + assertEquals(0, segmentTracker.getRefreshSeqNoLag()); + assertEquals(0, segmentTracker.getTimeMsLag()); + assertEquals(totalUploadsFailed, segmentTracker.getTotalUploadsFailed()); + }); } public void testTrackerData() throws Exception { From cb0d13b9cab950b39269eb28691e6075cd9cf1aa Mon Sep 17 00:00:00 2001 From: Sandesh Kumar Date: Mon, 10 Jul 2023 00:35:41 -0700 Subject: [PATCH 06/29] [Optimization] Cluster State Update Optimization (#7853) * Cluster State Update Optimization - Optimize Metadata build() to skip redundant computations of indicesLookup as part of ClusterState build Signed-off-by: Sandesh Kumar --------- Signed-off-by: Sandesh Kumar --- CHANGELOG.md | 2 +- .../opensearch/cluster/metadata/Metadata.java | 44 +++++++- .../cluster/metadata/MetadataTests.java | 103 ++++++++++++++++++ 3 files changed, 146 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0146af42ee3b..e7998d4022cb8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -157,7 +157,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Update ZSTD default compression level ([#8471](https://github.com/opensearch-project/OpenSearch/pull/8471)) - [Search Pipelines] Pass pipeline creation context to processor factories ([#8164](https://github.com/opensearch-project/OpenSearch/pull/8164)) - Enabling compression levels for zstd and zstd_no_dict ([#8312](https://github.com/opensearch-project/OpenSearch/pull/8312)) - +- Optimize Metadata build() to skip redundant computations as part of ClusterState build ([#7853](https://github.com/opensearch-project/OpenSearch/pull/7853)) ### Deprecated diff --git a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java index dde9ebfb54a49..5d0e4f9aa7e3f 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java @@ -1121,12 +1121,14 @@ public static class Builder { private final Map indices; private final Map templates; private final Map customs; + private final Metadata previousMetadata; public Builder() { clusterUUID = UNKNOWN_CLUSTER_UUID; indices = new HashMap<>(); templates = new HashMap<>(); customs = new HashMap<>(); + previousMetadata = null; indexGraveyard(IndexGraveyard.builder().build()); // create new empty index graveyard to initialize } @@ -1141,6 +1143,7 @@ public Builder(Metadata metadata) { this.indices = new HashMap<>(metadata.indices); this.templates = new HashMap<>(metadata.templates); this.customs = new HashMap<>(metadata.customs); + this.previousMetadata = metadata; } public Builder put(IndexMetadata.Builder indexMetadataBuilder) { @@ -1425,6 +1428,44 @@ public Builder generateClusterUuidIfNeeded() { } public Metadata build() { + DataStreamMetadata dataStreamMetadata = (DataStreamMetadata) this.customs.get(DataStreamMetadata.TYPE); + DataStreamMetadata previousDataStreamMetadata = (previousMetadata != null) + ? (DataStreamMetadata) this.previousMetadata.customs.get(DataStreamMetadata.TYPE) + : null; + + boolean recomputeRequiredforIndicesLookups = (previousMetadata == null) + || (indices.equals(previousMetadata.indices) == false) + || (previousDataStreamMetadata != null && previousDataStreamMetadata.equals(dataStreamMetadata) == false) + || (dataStreamMetadata != null && dataStreamMetadata.equals(previousDataStreamMetadata) == false); + + return (recomputeRequiredforIndicesLookups == false) + ? buildMetadataWithPreviousIndicesLookups() + : buildMetadataWithRecomputedIndicesLookups(); + } + + protected Metadata buildMetadataWithPreviousIndicesLookups() { + return new Metadata( + clusterUUID, + clusterUUIDCommitted, + version, + coordinationMetadata, + transientSettings, + persistentSettings, + hashesOfConsistentSettings, + indices, + templates, + customs, + Arrays.copyOf(previousMetadata.allIndices, previousMetadata.allIndices.length), + Arrays.copyOf(previousMetadata.visibleIndices, previousMetadata.visibleIndices.length), + Arrays.copyOf(previousMetadata.allOpenIndices, previousMetadata.allOpenIndices.length), + Arrays.copyOf(previousMetadata.visibleOpenIndices, previousMetadata.visibleOpenIndices.length), + Arrays.copyOf(previousMetadata.allClosedIndices, previousMetadata.allClosedIndices.length), + Arrays.copyOf(previousMetadata.visibleClosedIndices, previousMetadata.visibleClosedIndices.length), + Collections.unmodifiableSortedMap(previousMetadata.indicesLookup) + ); + } + + protected Metadata buildMetadataWithRecomputedIndicesLookups() { // TODO: We should move these datastructures to IndexNameExpressionResolver, this will give the following benefits: // 1) The datastructures will be rebuilt only when needed. Now during serializing we rebuild these datastructures // while these datastructures aren't even used. @@ -1586,8 +1627,7 @@ private SortedMap buildIndicesLookup() { IndexAbstraction existing = indicesLookup.put(indexMetadata.getIndex().getName(), index); assert existing == null : "duplicate for " + indexMetadata.getIndex(); - for (final AliasMetadata aliasCursor : indexMetadata.getAliases().values()) { - AliasMetadata aliasMetadata = aliasCursor; + for (final AliasMetadata aliasMetadata : indexMetadata.getAliases().values()) { indicesLookup.compute(aliasMetadata.getAlias(), (aliasName, alias) -> { if (alias == null) { return new IndexAbstraction.Alias(aliasMetadata, indexMetadata); diff --git a/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java b/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java index a744c181ee341..29904aa9e3ff8 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java @@ -58,6 +58,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -76,6 +77,10 @@ import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.startsWith; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; public class MetadataTests extends OpenSearchTestCase { @@ -1364,6 +1369,62 @@ public void testValidateDataStreamsForNullDataStreamMetadata() { } } + public void testMetadataBuildInvocations() { + final Metadata previousMetadata = randomMetadata(); + Metadata builtMetadata; + Metadata.Builder spyBuilder; + + // previous Metadata state was not provided to Builder during assignment - indices lookups should get re-computed + spyBuilder = spy(Metadata.builder()); + builtMetadata = spyBuilder.build(); + verify(spyBuilder, times(1)).buildMetadataWithRecomputedIndicesLookups(); + verify(spyBuilder, times(0)).buildMetadataWithPreviousIndicesLookups(); + compareMetadata(Metadata.EMPTY_METADATA, builtMetadata, true, true, false); + + // no changes in builder method after initialization from previous Metadata - indices lookups should not be re-computed + spyBuilder = spy(Metadata.builder(previousMetadata)); + builtMetadata = spyBuilder.version(previousMetadata.version() + 1).build(); + verify(spyBuilder, times(0)).buildMetadataWithRecomputedIndicesLookups(); + verify(spyBuilder, times(1)).buildMetadataWithPreviousIndicesLookups(); + compareMetadata(previousMetadata, builtMetadata, true, true, true); + reset(spyBuilder); + + // adding new index - all indices lookups should get re-computed + spyBuilder = spy(Metadata.builder(previousMetadata)); + String index = "new_index_" + randomAlphaOfLength(3); + builtMetadata = spyBuilder.indices( + Collections.singletonMap( + index, + IndexMetadata.builder(index).settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1).build() + ) + ).build(); + verify(spyBuilder, times(1)).buildMetadataWithRecomputedIndicesLookups(); + verify(spyBuilder, times(0)).buildMetadataWithPreviousIndicesLookups(); + compareMetadata(previousMetadata, builtMetadata, false, true, false); + reset(spyBuilder); + + // adding new templates - indices lookups should not get recomputed + spyBuilder = spy(Metadata.builder(previousMetadata)); + builtMetadata = spyBuilder.put("component_template_new_" + randomAlphaOfLength(3), ComponentTemplateTests.randomInstance()) + .put("index_template_v2_new_" + randomAlphaOfLength(3), ComposableIndexTemplateTests.randomInstance()) + .build(); + verify(spyBuilder, times(0)).buildMetadataWithRecomputedIndicesLookups(); + verify(spyBuilder, times(1)).buildMetadataWithPreviousIndicesLookups(); + compareMetadata(previousMetadata, builtMetadata, true, false, false); + reset(spyBuilder); + + // adding new data stream - indices lookups should get re-computed + spyBuilder = spy(Metadata.builder(previousMetadata)); + DataStream dataStream = DataStreamTests.randomInstance(); + for (Index backingIndex : dataStream.getIndices()) { + spyBuilder.put(DataStreamTestHelper.getIndexMetadataBuilderForIndex(backingIndex)); + } + builtMetadata = spyBuilder.put(dataStream).version(previousMetadata.version() + 1).build(); + verify(spyBuilder, times(1)).buildMetadataWithRecomputedIndicesLookups(); + verify(spyBuilder, times(0)).buildMetadataWithPreviousIndicesLookups(); + compareMetadata(previousMetadata, builtMetadata, false, true, true); + } + public static Metadata randomMetadata() { Metadata.Builder md = Metadata.builder() .put(buildIndexMetadata("index", "alias", randomBoolean() ? null : randomBoolean()).build(), randomBoolean()) @@ -1435,4 +1496,46 @@ private static class CreateIndexResult { this.metadata = metadata; } } + + private static void compareMetadata( + final Metadata previousMetadata, + final Metadata newMetadata, + final boolean compareIndicesLookups, + final boolean compareTemplates, + final boolean checkVersionIncrement + ) { + assertEquals(previousMetadata.clusterUUID(), newMetadata.clusterUUID()); + assertEquals(previousMetadata.clusterUUIDCommitted(), newMetadata.clusterUUIDCommitted()); + assertEquals(previousMetadata.coordinationMetadata(), newMetadata.coordinationMetadata()); + assertEquals(previousMetadata.settings(), newMetadata.settings()); + assertEquals(previousMetadata.transientSettings(), newMetadata.transientSettings()); + assertEquals(previousMetadata.persistentSettings(), newMetadata.persistentSettings()); + assertEquals(previousMetadata.hashesOfConsistentSettings(), newMetadata.hashesOfConsistentSettings()); + + if (compareIndicesLookups == true) { + assertEquals(previousMetadata.indices(), newMetadata.indices()); + assertEquals(previousMetadata.getConcreteAllIndices(), newMetadata.getConcreteAllIndices()); + assertEquals(previousMetadata.getConcreteAllClosedIndices(), newMetadata.getConcreteAllClosedIndices()); + assertEquals(previousMetadata.getConcreteAllOpenIndices(), newMetadata.getConcreteAllOpenIndices()); + assertEquals(previousMetadata.getConcreteVisibleIndices(), newMetadata.getConcreteVisibleIndices()); + assertEquals(previousMetadata.getConcreteVisibleClosedIndices(), newMetadata.getConcreteVisibleClosedIndices()); + assertEquals(previousMetadata.getConcreteVisibleOpenIndices(), newMetadata.getConcreteVisibleOpenIndices()); + assertEquals(previousMetadata.getIndicesLookup(), newMetadata.getIndicesLookup()); + assertEquals(previousMetadata.getCustoms().get(DataStreamMetadata.TYPE), newMetadata.getCustoms().get(DataStreamMetadata.TYPE)); + } + + if (compareTemplates == true) { + assertEquals(previousMetadata.templates(), newMetadata.templates()); + assertEquals(previousMetadata.templatesV2(), newMetadata.templatesV2()); + assertEquals(previousMetadata.componentTemplates(), newMetadata.componentTemplates()); + } + + if (compareIndicesLookups == true && compareTemplates == true) { + assertEquals(previousMetadata.getCustoms(), newMetadata.getCustoms()); + } + + if (checkVersionIncrement == true) { + assertEquals(previousMetadata.version() + 1, newMetadata.version()); + } + } } From 418ab51718cb68b7a57d4cf524b1dec9ed02b224 Mon Sep 17 00:00:00 2001 From: Sachin Kale Date: Mon, 10 Jul 2023 13:10:41 +0530 Subject: [PATCH 07/29] [Remote Segment Store] Fix flaky RemoteStoreIT.testStaleCommitDeletion tests (#8523) Signed-off-by: Sachin Kale --- .../org/opensearch/remotestore/RemoteStoreIT.java | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java index 85208a33cc9f5..f01e4969b1fe7 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java @@ -8,6 +8,7 @@ package org.opensearch.remotestore; +import org.hamcrest.MatcherAssert; import org.junit.Before; import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreRequest; import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; @@ -35,6 +36,8 @@ import java.util.concurrent.TimeUnit; import static org.hamcrest.Matchers.comparesEqualTo; +import static org.hamcrest.Matchers.oneOf; +import static org.hamcrest.Matchers.is; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; @@ -285,7 +288,6 @@ public void testRemoteTranslogCleanup() throws Exception { verifyRemoteStoreCleanup(true); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/8504") public void testStaleCommitDeletionWithInvokeFlush() throws Exception { internalCluster().startDataOnlyNodes(3); createIndex(INDEX_NAME, remoteStoreIndexSettings(1, 10000l)); @@ -301,16 +303,15 @@ public void testStaleCommitDeletionWithInvokeFlush() throws Exception { assertBusy(() -> { int actualFileCount = getFileCount(indexPath); if (numberOfIterations <= RemoteStoreRefreshListener.LAST_N_METADATA_FILES_TO_KEEP) { - assertEquals(numberOfIterations, actualFileCount); + MatcherAssert.assertThat(actualFileCount, is(oneOf(numberOfIterations, numberOfIterations + 1))); } else { // As delete is async its possible that the file gets created before the deletion or after // deletion. - assertTrue(actualFileCount >= 10 || actualFileCount <= 11); + MatcherAssert.assertThat(actualFileCount, is(oneOf(10, 11))); } }, 30, TimeUnit.SECONDS); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/8504") public void testStaleCommitDeletionWithoutInvokeFlush() throws Exception { internalCluster().startDataOnlyNodes(3); createIndex(INDEX_NAME, remoteStoreIndexSettings(1, 10000l)); @@ -322,6 +323,8 @@ public void testStaleCommitDeletionWithoutInvokeFlush() throws Exception { .get() .getSetting(INDEX_NAME, IndexMetadata.SETTING_INDEX_UUID); Path indexPath = Path.of(String.valueOf(absolutePath), indexUUID, "/0/segments/metadata"); - assertEquals(numberOfIterations, getFileCount(indexPath)); + int actualFileCount = getFileCount(indexPath); + // We also allow (numberOfIterations + 1) as index creation also triggers refresh. + MatcherAssert.assertThat(actualFileCount, is(oneOf(numberOfIterations, numberOfIterations + 1))); } } From 6d39aaa4db00dcb2b33fd1c3837026f337a7c344 Mon Sep 17 00:00:00 2001 From: Ashish Date: Mon, 10 Jul 2023 19:24:44 +0530 Subject: [PATCH 08/29] Extend existing IndexRecoveryIT for remote indexes (#8505) Signed-off-by: Ashish Singh --- .../indices/recovery/IndexRecoveryIT.java | 40 ++++++--- .../remotestore/RemoteIndexRecoveryIT.java | 88 +++++++++++++++++++ .../opensearch/index/engine/NoOpEngine.java | 14 +-- .../opensearch/test/InternalTestCluster.java | 16 ++-- 4 files changed, 137 insertions(+), 21 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteIndexRecoveryIT.java diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java index d04c31c0d6e24..72b9b32236371 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java @@ -34,6 +34,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.IndexCommit; +import org.hamcrest.Matcher; import org.opensearch.OpenSearchException; import org.opensearch.Version; import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; @@ -101,8 +102,8 @@ import org.opensearch.indices.IndicesService; import org.opensearch.indices.NodeIndicesStats; import org.opensearch.indices.analysis.AnalysisModule; -import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.indices.recovery.RecoveryState.Stage; +import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.node.NodeClosedException; import org.opensearch.node.RecoverySettingsChunkSizePlugin; import org.opensearch.plugins.AnalysisPlugin; @@ -577,21 +578,25 @@ public void testRerouteRecovery() throws Exception { .clear() .setIndices(new CommonStatsFlags(CommonStatsFlags.Flag.Recovery)) .get(); - assertThat(statsResponse1.getNodes(), hasSize(2)); - for (NodeStats nodeStats : statsResponse1.getNodes()) { + List dataNodeStats = statsResponse1.getNodes() + .stream() + .filter(nodeStats -> nodeStats.getNode().isDataNode()) + .collect(Collectors.toList()); + assertThat(dataNodeStats, hasSize(2)); + for (NodeStats nodeStats : dataNodeStats) { final RecoveryStats recoveryStats = nodeStats.getIndices().getRecoveryStats(); if (nodeStats.getNode().getName().equals(nodeA)) { assertThat( "node A throttling should increase", recoveryStats.throttleTime().millis(), - greaterThan(finalNodeAThrottling) + getMatcherForThrottling(finalNodeAThrottling) ); } if (nodeStats.getNode().getName().equals(nodeB)) { assertThat( "node B throttling should increase", recoveryStats.throttleTime().millis(), - greaterThan(finalNodeBThrottling) + getMatcherForThrottling(finalNodeBThrottling) ); } } @@ -623,7 +628,7 @@ public void testRerouteRecovery() throws Exception { final RecoveryStats recoveryStats = nodeStats.getIndices().getRecoveryStats(); assertThat(recoveryStats.currentAsSource(), equalTo(0)); assertThat(recoveryStats.currentAsTarget(), equalTo(0)); - assertThat(nodeName + " throttling should be >0", recoveryStats.throttleTime().millis(), greaterThan(0L)); + assertThat(nodeName + " throttling should be >0", recoveryStats.throttleTime().millis(), getMatcherForThrottling(0)); }; // we have to use assertBusy as recovery counters are decremented only when the last reference to the RecoveryTarget // is decremented, which may happen after the recovery was done. @@ -644,7 +649,8 @@ public void testRerouteRecovery() throws Exception { logger.info("--> start node C"); String nodeC = internalCluster().startNode(); - assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes("3").get().isTimedOut()); + int nodeCount = internalCluster().getNodeNames().length; + assertFalse(client().admin().cluster().prepareHealth().setWaitForNodes(String.valueOf(nodeCount)).get().isTimedOut()); logger.info("--> slowing down recoveries"); slowDownRecovery(shardSize); @@ -678,7 +684,7 @@ public void testRerouteRecovery() throws Exception { assertOnGoingRecoveryState(nodeCRecoveryStates.get(0), 0, PeerRecoverySource.INSTANCE, false, nodeB, nodeC); validateIndexRecoveryState(nodeCRecoveryStates.get(0).getIndex()); - if (randomBoolean()) { + if (randomBoolean() && shouldAssertOngoingRecoveryInRerouteRecovery()) { // shutdown node with relocation source of replica shard and check if recovery continues internalCluster().stopRandomNode(InternalTestCluster.nameFilter(nodeA)); ensureStableCluster(2); @@ -722,6 +728,14 @@ public void testRerouteRecovery() throws Exception { validateIndexRecoveryState(nodeCRecoveryStates.get(0).getIndex()); } + protected boolean shouldAssertOngoingRecoveryInRerouteRecovery() { + return false; + } + + protected Matcher getMatcherForThrottling(long value) { + return greaterThan(value); + } + public void testSnapshotRecovery() throws Exception { logger.info("--> start node A"); String nodeA = internalCluster().startNode(); @@ -824,7 +838,7 @@ private IndicesStatsResponse createAndPopulateIndex(String name, int nodeCount, ensureGreen(); logger.info("--> indexing sample data"); - final int numDocs = between(MIN_DOC_COUNT, MAX_DOC_COUNT); + final int numDocs = numDocs(); final IndexRequestBuilder[] docs = new IndexRequestBuilder[numDocs]; for (int i = 0; i < numDocs; i++) { @@ -846,6 +860,10 @@ private void validateIndexRecoveryState(ReplicationLuceneIndex indexState) { assertThat(indexState.recoveredBytesPercent(), lessThanOrEqualTo(100.0f)); } + protected int numDocs() { + return between(MIN_DOC_COUNT, MAX_DOC_COUNT); + } + public void testTransientErrorsDuringRecoveryAreRetried() throws Exception { final String indexName = "test"; final Settings nodeSettings = Settings.builder() @@ -1384,10 +1402,10 @@ public void testHistoryRetention() throws Exception { flush(indexName); } - String firstNodeToStop = randomFrom(internalCluster().getNodeNames()); + String firstNodeToStop = randomFrom(internalCluster().getDataNodeNames()); Settings firstNodeToStopDataPathSettings = internalCluster().dataPathSettings(firstNodeToStop); internalCluster().stopRandomNode(InternalTestCluster.nameFilter(firstNodeToStop)); - String secondNodeToStop = randomFrom(internalCluster().getNodeNames()); + String secondNodeToStop = randomFrom(internalCluster().getDataNodeNames()); Settings secondNodeToStopDataPathSettings = internalCluster().dataPathSettings(secondNodeToStop); internalCluster().stopRandomNode(InternalTestCluster.nameFilter(secondNodeToStop)); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteIndexRecoveryIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteIndexRecoveryIT.java new file mode 100644 index 0000000000000..11c9993ac7874 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteIndexRecoveryIT.java @@ -0,0 +1,88 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore; + +import org.hamcrest.Matcher; +import org.hamcrest.Matchers; +import org.junit.After; +import org.junit.Before; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.index.IndexModule; +import org.opensearch.index.IndexSettings; +import org.opensearch.indices.recovery.IndexRecoveryIT; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.nio.file.Path; + +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class RemoteIndexRecoveryIT extends IndexRecoveryIT { + + protected static final String REPOSITORY_NAME = "test-remore-store-repo"; + + protected Path absolutePath; + + @Override + protected Settings featureFlagSettings() { + return Settings.builder() + .put(super.featureFlagSettings()) + .put(FeatureFlags.REMOTE_STORE, "true") + .put(FeatureFlags.SEGMENT_REPLICATION_EXPERIMENTAL, "true") + .build(); + } + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + internalCluster().startClusterManagerOnlyNode(); + absolutePath = randomRepoPath().toAbsolutePath(); + assertAcked( + clusterAdmin().preparePutRepository(REPOSITORY_NAME).setType("fs").setSettings(Settings.builder().put("location", absolutePath)) + ); + } + + @Override + public Settings indexSettings() { + return Settings.builder() + .put(super.indexSettings()) + .put(IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING.getKey(), false) + .put(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, true) + .put(IndexMetadata.SETTING_REMOTE_STORE_REPOSITORY, REPOSITORY_NAME) + .put(IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_ENABLED, true) + .put(IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY, REPOSITORY_NAME) + .put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "300s") + .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .build(); + } + + @After + public void teardown() { + assertAcked(clusterAdmin().prepareDeleteRepository(REPOSITORY_NAME)); + } + + @Override + protected Matcher getMatcherForThrottling(long value) { + return Matchers.greaterThanOrEqualTo(value); + } + + @Override + protected int numDocs() { + return randomIntBetween(100, 200); + } + + @Override + protected boolean shouldAssertOngoingRecoveryInRerouteRecovery() { + return false; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/NoOpEngine.java b/server/src/main/java/org/opensearch/index/engine/NoOpEngine.java index 2b126e627bd3d..5c548df1cbb60 100644 --- a/server/src/main/java/org/opensearch/index/engine/NoOpEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/NoOpEngine.java @@ -209,11 +209,15 @@ public void trimUnreferencedTranslogFiles() throws TranslogException { translog.trimUnreferencedReaders(); // refresh the translog stats translogStats = translog.stats(); - assert translog.currentFileGeneration() == translog.getMinFileGeneration() : "translog was not trimmed " - + " current gen " - + translog.currentFileGeneration() - + " != min gen " - + translog.getMinFileGeneration(); + // When remote translog is enabled, the min file generation is dependent on the (N-1) + // lastRefreshedCheckpoint SeqNo - refer RemoteStoreRefreshListener. This leads to older generations not + // being trimmed and leading to current generation being higher than the min file generation. + assert engineConfig.getIndexSettings().isRemoteTranslogStoreEnabled() + || translog.currentFileGeneration() == translog.getMinFileGeneration() : "translog was not trimmed " + + " current gen " + + translog.currentFileGeneration() + + " != min gen " + + translog.getMinFileGeneration(); } } } catch (final Exception e) { diff --git a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java index 49d8b64bc71cd..3f7bb71b27681 100644 --- a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java @@ -1524,11 +1524,13 @@ public void assertSeqNos() throws Exception { } assertThat(replicaShardRouting + " seq_no_stats mismatch", seqNoStats, equalTo(primarySeqNoStats)); // the local knowledge on the primary of the global checkpoint equals the global checkpoint on the shard - assertThat( - replicaShardRouting + " global checkpoint syncs mismatch", - seqNoStats.getGlobalCheckpoint(), - equalTo(syncGlobalCheckpoints.get(replicaShardRouting.allocationId().getId())) - ); + if (primaryShard.isRemoteTranslogEnabled() == false) { + assertThat( + replicaShardRouting + " global checkpoint syncs mismatch", + seqNoStats.getGlobalCheckpoint(), + equalTo(syncGlobalCheckpoints.get(replicaShardRouting.allocationId().getId())) + ); + } } } } @@ -2155,6 +2157,10 @@ synchronized Set allDataNodesButN(int count) { return set; } + public Set getDataNodeNames() { + return allDataNodesButN(0); + } + /** * Returns a set of nodes that have at least one shard of the given index. */ From 5ad6d6df6c098a29b8521baf7909c1b31f2b8715 Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Mon, 10 Jul 2023 12:03:56 -0400 Subject: [PATCH 09/29] Bump com.google.guava:guava from 30.1.1-jre to 32.1.1-jre (#8583) Signed-off-by: Andriy Redko --- CHANGELOG.md | 2 +- buildSrc/version.properties | 2 +- distribution/tools/keystore-cli/build.gradle | 2 +- distribution/tools/plugin-cli/build.gradle | 2 +- distribution/tools/upgrade-cli/build.gradle | 2 +- plugins/ingest-attachment/licenses/guava-32.0.1-jre.jar.sha1 | 1 - plugins/ingest-attachment/licenses/guava-32.1.1-jre.jar.sha1 | 1 + plugins/repository-azure/licenses/guava-32.0.1-jre.jar.sha1 | 1 - plugins/repository-azure/licenses/guava-32.1.1-jre.jar.sha1 | 1 + plugins/repository-gcs/licenses/guava-32.0.1-jre.jar.sha1 | 1 - plugins/repository-gcs/licenses/guava-32.1.1-jre.jar.sha1 | 1 + plugins/repository-hdfs/licenses/guava-32.0.1-jre.jar.sha1 | 1 - plugins/repository-hdfs/licenses/guava-32.1.1-jre.jar.sha1 | 1 + 13 files changed, 9 insertions(+), 9 deletions(-) delete mode 100644 plugins/ingest-attachment/licenses/guava-32.0.1-jre.jar.sha1 create mode 100644 plugins/ingest-attachment/licenses/guava-32.1.1-jre.jar.sha1 delete mode 100644 plugins/repository-azure/licenses/guava-32.0.1-jre.jar.sha1 create mode 100644 plugins/repository-azure/licenses/guava-32.1.1-jre.jar.sha1 delete mode 100644 plugins/repository-gcs/licenses/guava-32.0.1-jre.jar.sha1 create mode 100644 plugins/repository-gcs/licenses/guava-32.1.1-jre.jar.sha1 delete mode 100644 plugins/repository-hdfs/licenses/guava-32.0.1-jre.jar.sha1 create mode 100644 plugins/repository-hdfs/licenses/guava-32.1.1-jre.jar.sha1 diff --git a/CHANGELOG.md b/CHANGELOG.md index e7998d4022cb8..929431bba24d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -105,7 +105,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Dependencies - Bump `com.azure:azure-storage-common` from 12.21.0 to 12.21.1 (#7566, #7814) -- Bump `com.google.guava:guava` from 30.1.1-jre to 32.1.1-jre (#7565, #7811, #7807, #7808, #8402, #8400, #8401) +- Bump `com.google.guava:guava` from 30.1.1-jre to 32.1.1-jre (#7565, #7811, #7807, #7808, #8402, #8400, #8401, #8581) - Bump `net.minidev:json-smart` from 2.4.10 to 2.4.11 (#7660, #7812) - Bump `org.gradle.test-retry` from 1.5.2 to 1.5.3 (#7810) - Bump `com.diffplug.spotless` from 6.17.0 to 6.18.0 (#7896) diff --git a/buildSrc/version.properties b/buildSrc/version.properties index 408b03e60cc5d..7a2ddc24aabcf 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -22,7 +22,7 @@ jettison = 1.5.4 woodstox = 6.4.0 kotlin = 1.7.10 antlr4 = 4.11.1 -guava = 32.0.1-jre +guava = 32.1.1-jre protobuf = 3.22.3 jakarta_annotation = 1.3.5 diff --git a/distribution/tools/keystore-cli/build.gradle b/distribution/tools/keystore-cli/build.gradle index fe57b342ae298..d819322fc77b7 100644 --- a/distribution/tools/keystore-cli/build.gradle +++ b/distribution/tools/keystore-cli/build.gradle @@ -35,7 +35,7 @@ dependencies { compileOnly project(":libs:opensearch-cli") testImplementation project(":test:framework") testImplementation 'com.google.jimfs:jimfs:1.2' - testRuntimeOnly('com.google.guava:guava:32.1.1-jre') { + testRuntimeOnly("com.google.guava:guava:${versions.guava}") { transitive = false } } diff --git a/distribution/tools/plugin-cli/build.gradle b/distribution/tools/plugin-cli/build.gradle index d39697e81914b..7a300c17c8189 100644 --- a/distribution/tools/plugin-cli/build.gradle +++ b/distribution/tools/plugin-cli/build.gradle @@ -41,7 +41,7 @@ dependencies { api "org.bouncycastle:bc-fips:1.0.2.3" testImplementation project(":test:framework") testImplementation 'com.google.jimfs:jimfs:1.2' - testRuntimeOnly('com.google.guava:guava:32.1.1-jre') { + testRuntimeOnly("com.google.guava:guava:${versions.guava}") { transitive = false } diff --git a/distribution/tools/upgrade-cli/build.gradle b/distribution/tools/upgrade-cli/build.gradle index d81d00440a864..c7bf0ff1d2810 100644 --- a/distribution/tools/upgrade-cli/build.gradle +++ b/distribution/tools/upgrade-cli/build.gradle @@ -21,7 +21,7 @@ dependencies { implementation "com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}" testImplementation project(":test:framework") testImplementation 'com.google.jimfs:jimfs:1.2' - testRuntimeOnly('com.google.guava:guava:32.1.1-jre') { + testRuntimeOnly("com.google.guava:guava:${versions.guava}") { transitive = false } } diff --git a/plugins/ingest-attachment/licenses/guava-32.0.1-jre.jar.sha1 b/plugins/ingest-attachment/licenses/guava-32.0.1-jre.jar.sha1 deleted file mode 100644 index 80dc9e9308a6c..0000000000000 --- a/plugins/ingest-attachment/licenses/guava-32.0.1-jre.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -6e5d51a72d142f2d40a57dfb897188b36a95b489 \ No newline at end of file diff --git a/plugins/ingest-attachment/licenses/guava-32.1.1-jre.jar.sha1 b/plugins/ingest-attachment/licenses/guava-32.1.1-jre.jar.sha1 new file mode 100644 index 0000000000000..0d791b5d3f55b --- /dev/null +++ b/plugins/ingest-attachment/licenses/guava-32.1.1-jre.jar.sha1 @@ -0,0 +1 @@ +ad575652d84153075dd41ec6177ccb15251262b2 \ No newline at end of file diff --git a/plugins/repository-azure/licenses/guava-32.0.1-jre.jar.sha1 b/plugins/repository-azure/licenses/guava-32.0.1-jre.jar.sha1 deleted file mode 100644 index 80dc9e9308a6c..0000000000000 --- a/plugins/repository-azure/licenses/guava-32.0.1-jre.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -6e5d51a72d142f2d40a57dfb897188b36a95b489 \ No newline at end of file diff --git a/plugins/repository-azure/licenses/guava-32.1.1-jre.jar.sha1 b/plugins/repository-azure/licenses/guava-32.1.1-jre.jar.sha1 new file mode 100644 index 0000000000000..0d791b5d3f55b --- /dev/null +++ b/plugins/repository-azure/licenses/guava-32.1.1-jre.jar.sha1 @@ -0,0 +1 @@ +ad575652d84153075dd41ec6177ccb15251262b2 \ No newline at end of file diff --git a/plugins/repository-gcs/licenses/guava-32.0.1-jre.jar.sha1 b/plugins/repository-gcs/licenses/guava-32.0.1-jre.jar.sha1 deleted file mode 100644 index 80dc9e9308a6c..0000000000000 --- a/plugins/repository-gcs/licenses/guava-32.0.1-jre.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -6e5d51a72d142f2d40a57dfb897188b36a95b489 \ No newline at end of file diff --git a/plugins/repository-gcs/licenses/guava-32.1.1-jre.jar.sha1 b/plugins/repository-gcs/licenses/guava-32.1.1-jre.jar.sha1 new file mode 100644 index 0000000000000..0d791b5d3f55b --- /dev/null +++ b/plugins/repository-gcs/licenses/guava-32.1.1-jre.jar.sha1 @@ -0,0 +1 @@ +ad575652d84153075dd41ec6177ccb15251262b2 \ No newline at end of file diff --git a/plugins/repository-hdfs/licenses/guava-32.0.1-jre.jar.sha1 b/plugins/repository-hdfs/licenses/guava-32.0.1-jre.jar.sha1 deleted file mode 100644 index 80dc9e9308a6c..0000000000000 --- a/plugins/repository-hdfs/licenses/guava-32.0.1-jre.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -6e5d51a72d142f2d40a57dfb897188b36a95b489 \ No newline at end of file diff --git a/plugins/repository-hdfs/licenses/guava-32.1.1-jre.jar.sha1 b/plugins/repository-hdfs/licenses/guava-32.1.1-jre.jar.sha1 new file mode 100644 index 0000000000000..0d791b5d3f55b --- /dev/null +++ b/plugins/repository-hdfs/licenses/guava-32.1.1-jre.jar.sha1 @@ -0,0 +1 @@ +ad575652d84153075dd41ec6177ccb15251262b2 \ No newline at end of file From 91bfa01606974b947455fbc289e21a1aad096fa8 Mon Sep 17 00:00:00 2001 From: Kunal Kotwani Date: Mon, 10 Jul 2023 09:43:19 -0700 Subject: [PATCH 10/29] Add safeguard limits for file cache during node level allocation (#8208) Signed-off-by: Kunal Kotwani --- CHANGELOG.md | 1 + .../cluster/ClusterInfoServiceIT.java | 28 +++ .../org/opensearch/cluster/ClusterInfo.java | 24 +- .../cluster/InternalClusterInfoService.java | 15 +- .../decider/DiskThresholdDecider.java | 52 +++++ .../store/remote/filecache/FileCache.java | 3 + .../opensearch/cluster/ClusterInfoTests.java | 24 +- .../allocation/DiskThresholdMonitorTests.java | 2 +- ...dexShardConstraintDeciderOverlapTests.java | 2 +- .../RemoteShardsBalancerBaseTestCase.java | 2 +- .../decider/DiskThresholdDeciderTests.java | 205 +++++++++++++++++- .../DiskThresholdDeciderUnitTests.java | 13 +- .../MockInternalClusterInfoService.java | 3 +- .../opensearch/test/OpenSearchTestCase.java | 8 + 14 files changed, 367 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 929431bba24d5..ecb92a4051738 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -158,6 +158,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Search Pipelines] Pass pipeline creation context to processor factories ([#8164](https://github.com/opensearch-project/OpenSearch/pull/8164)) - Enabling compression levels for zstd and zstd_no_dict ([#8312](https://github.com/opensearch-project/OpenSearch/pull/8312)) - Optimize Metadata build() to skip redundant computations as part of ClusterState build ([#7853](https://github.com/opensearch-project/OpenSearch/pull/7853)) +- Add safeguard limits for file cache during node level allocation ([#8208](https://github.com/opensearch-project/OpenSearch/pull/8208)) ### Deprecated diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/ClusterInfoServiceIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/ClusterInfoServiceIT.java index 17e8526acfd74..508b8e21e42c1 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/ClusterInfoServiceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/ClusterInfoServiceIT.java @@ -50,6 +50,7 @@ import org.opensearch.core.common.Strings; import org.opensearch.index.IndexService; import org.opensearch.index.shard.IndexShard; +import org.opensearch.index.store.remote.filecache.FileCacheStats; import org.opensearch.index.store.Store; import org.opensearch.indices.IndicesService; import org.opensearch.indices.SystemIndexDescriptor; @@ -192,6 +193,11 @@ public void testClusterInfoServiceCollectsInformation() { logger.info("--> shard size: {}", size); assertThat("shard size is greater than 0", size, greaterThanOrEqualTo(0L)); } + + final Map nodeFileCacheStats = info.nodeFileCacheStats; + assertNotNull(nodeFileCacheStats); + assertThat("file cache is empty on non search nodes", nodeFileCacheStats.size(), Matchers.equalTo(0)); + ClusterService clusterService = internalTestCluster.getInstance(ClusterService.class, internalTestCluster.getClusterManagerName()); ClusterState state = clusterService.state(); for (ShardRouting shard : state.routingTable().allShards()) { @@ -209,6 +215,28 @@ public void testClusterInfoServiceCollectsInformation() { } } + public void testClusterInfoServiceCollectsFileCacheInformation() { + internalCluster().startNodes(1); + internalCluster().ensureAtLeastNumSearchAndDataNodes(2); + + InternalTestCluster internalTestCluster = internalCluster(); + // Get the cluster info service on the cluster-manager node + final InternalClusterInfoService infoService = (InternalClusterInfoService) internalTestCluster.getInstance( + ClusterInfoService.class, + internalTestCluster.getClusterManagerName() + ); + infoService.setUpdateFrequency(TimeValue.timeValueMillis(200)); + ClusterInfo info = infoService.refresh(); + assertNotNull("info should not be null", info); + final Map nodeFileCacheStats = info.nodeFileCacheStats; + assertNotNull(nodeFileCacheStats); + assertThat("file cache is enabled on both search nodes", nodeFileCacheStats.size(), Matchers.equalTo(2)); + + for (FileCacheStats fileCacheStats : nodeFileCacheStats.values()) { + assertThat("file cache is non empty", fileCacheStats.getTotal().getBytes(), greaterThan(0L)); + } + } + public void testClusterInfoServiceInformationClearOnError() { internalCluster().startNodes( 2, diff --git a/server/src/main/java/org/opensearch/cluster/ClusterInfo.java b/server/src/main/java/org/opensearch/cluster/ClusterInfo.java index 876a36c205975..ffa3d0d19fb71 100644 --- a/server/src/main/java/org/opensearch/cluster/ClusterInfo.java +++ b/server/src/main/java/org/opensearch/cluster/ClusterInfo.java @@ -34,6 +34,7 @@ import com.carrotsearch.hppc.ObjectHashSet; import com.carrotsearch.hppc.cursors.ObjectCursor; +import org.opensearch.Version; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.common.io.stream.StreamOutput; @@ -42,6 +43,7 @@ import org.opensearch.core.xcontent.ToXContentFragment; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.shard.ShardId; +import org.opensearch.index.store.remote.filecache.FileCacheStats; import java.io.IOException; import java.util.Collections; @@ -63,9 +65,10 @@ public class ClusterInfo implements ToXContentFragment, Writeable { public static final ClusterInfo EMPTY = new ClusterInfo(); final Map routingToDataPath; final Map reservedSpace; + final Map nodeFileCacheStats; protected ClusterInfo() { - this(Map.of(), Map.of(), Map.of(), Map.of(), Map.of()); + this(Map.of(), Map.of(), Map.of(), Map.of(), Map.of(), Map.of()); } /** @@ -83,13 +86,15 @@ public ClusterInfo( final Map mostAvailableSpaceUsage, final Map shardSizes, final Map routingToDataPath, - final Map reservedSpace + final Map reservedSpace, + final Map nodeFileCacheStats ) { this.leastAvailableSpaceUsage = leastAvailableSpaceUsage; this.shardSizes = shardSizes; this.mostAvailableSpaceUsage = mostAvailableSpaceUsage; this.routingToDataPath = routingToDataPath; this.reservedSpace = reservedSpace; + this.nodeFileCacheStats = nodeFileCacheStats; } public ClusterInfo(StreamInput in) throws IOException { @@ -105,6 +110,11 @@ public ClusterInfo(StreamInput in) throws IOException { this.shardSizes = Collections.unmodifiableMap(sizeMap); this.routingToDataPath = Collections.unmodifiableMap(routingMap); this.reservedSpace = Collections.unmodifiableMap(reservedSpaceMap); + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + this.nodeFileCacheStats = in.readMap(StreamInput::readString, FileCacheStats::new); + } else { + this.nodeFileCacheStats = Map.of(); + } } @Override @@ -114,6 +124,9 @@ public void writeTo(StreamOutput out) throws IOException { out.writeMap(this.shardSizes, StreamOutput::writeString, (o, v) -> out.writeLong(v == null ? -1 : v)); out.writeMap(this.routingToDataPath, (o, k) -> k.writeTo(o), StreamOutput::writeString); out.writeMap(this.reservedSpace, (o, v) -> v.writeTo(o), (o, v) -> v.writeTo(o)); + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeMap(this.nodeFileCacheStats, StreamOutput::writeString, (o, v) -> v.writeTo(o)); + } } public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { @@ -187,6 +200,13 @@ public Map getNodeMostAvailableDiskUsages() { return Collections.unmodifiableMap(this.mostAvailableSpaceUsage); } + /** + * Returns a node id to file cache stats mapping for the nodes that have search roles assigned to it. + */ + public Map getNodeFileCacheStats() { + return Collections.unmodifiableMap(this.nodeFileCacheStats); + } + /** * Returns the shard size for the given shard routing or null it that metric is not available. */ diff --git a/server/src/main/java/org/opensearch/cluster/InternalClusterInfoService.java b/server/src/main/java/org/opensearch/cluster/InternalClusterInfoService.java index 0acc7bece439f..9c12d6bb3e7ea 100644 --- a/server/src/main/java/org/opensearch/cluster/InternalClusterInfoService.java +++ b/server/src/main/java/org/opensearch/cluster/InternalClusterInfoService.java @@ -59,6 +59,7 @@ import org.opensearch.common.util.concurrent.AbstractRunnable; import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; import org.opensearch.index.store.StoreStats; +import org.opensearch.index.store.remote.filecache.FileCacheStats; import org.opensearch.monitor.fs.FsInfo; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.ReceiveTimeoutTransportException; @@ -72,6 +73,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; +import java.util.stream.Collectors; /** * InternalClusterInfoService provides the ClusterInfoService interface, @@ -110,6 +112,7 @@ public class InternalClusterInfoService implements ClusterInfoService, ClusterSt private volatile Map leastAvailableSpaceUsages; private volatile Map mostAvailableSpaceUsages; + private volatile Map nodeFileCacheStats; private volatile IndicesStatsSummary indicesStatsSummary; // null if this node is not currently the cluster-manager private final AtomicReference refreshAndRescheduleRunnable = new AtomicReference<>(); @@ -122,6 +125,7 @@ public class InternalClusterInfoService implements ClusterInfoService, ClusterSt public InternalClusterInfoService(Settings settings, ClusterService clusterService, ThreadPool threadPool, Client client) { this.leastAvailableSpaceUsages = Map.of(); this.mostAvailableSpaceUsages = Map.of(); + this.nodeFileCacheStats = Map.of(); this.indicesStatsSummary = IndicesStatsSummary.EMPTY; this.threadPool = threadPool; this.client = client; @@ -208,7 +212,8 @@ public ClusterInfo getClusterInfo() { mostAvailableSpaceUsages, indicesStatsSummary.shardSizes, indicesStatsSummary.shardRoutingToDataPath, - indicesStatsSummary.reservedSpace + indicesStatsSummary.reservedSpace, + nodeFileCacheStats ); } @@ -221,6 +226,7 @@ protected CountDownLatch updateNodeStats(final ActionListener(listener, latch)); return latch; @@ -264,6 +270,13 @@ public void onResponse(NodesStatsResponse nodesStatsResponse) { ); leastAvailableSpaceUsages = Collections.unmodifiableMap(leastAvailableUsagesBuilder); mostAvailableSpaceUsages = Collections.unmodifiableMap(mostAvailableUsagesBuilder); + + nodeFileCacheStats = Collections.unmodifiableMap( + nodesStatsResponse.getNodes() + .stream() + .filter(nodeStats -> nodeStats.getNode().isSearchNode()) + .collect(Collectors.toMap(nodeStats -> nodeStats.getNode().getId(), NodeStats::getFileCacheStats)) + ); } @Override diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/DiskThresholdDecider.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/DiskThresholdDecider.java index ddd5e9274f08b..e216ca4511bff 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/DiskThresholdDecider.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/DiskThresholdDecider.java @@ -54,14 +54,21 @@ import org.opensearch.common.unit.ByteSizeValue; import org.opensearch.index.Index; import org.opensearch.index.shard.ShardId; +import org.opensearch.index.store.remote.filecache.FileCacheStats; import org.opensearch.snapshots.SnapshotShardSizeInfo; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; +import static org.opensearch.cluster.routing.RoutingPool.REMOTE_CAPABLE; +import static org.opensearch.cluster.routing.RoutingPool.getNodePool; +import static org.opensearch.cluster.routing.RoutingPool.getShardPool; import static org.opensearch.cluster.routing.allocation.DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING; import static org.opensearch.cluster.routing.allocation.DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING; +import static org.opensearch.index.store.remote.filecache.FileCache.DATA_TO_FILE_CACHE_SIZE_RATIO; /** * The {@link DiskThresholdDecider} checks that the node a shard is potentially @@ -167,6 +174,42 @@ public static long sizeOfRelocatingShards( @Override public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { ClusterInfo clusterInfo = allocation.clusterInfo(); + + /* + The following block enables allocation for remote shards within safeguard limits of the filecache. + */ + if (REMOTE_CAPABLE.equals(getNodePool(node)) && REMOTE_CAPABLE.equals(getShardPool(shardRouting, allocation))) { + final List remoteShardsOnNode = StreamSupport.stream(node.spliterator(), false) + .filter(shard -> shard.primary() && REMOTE_CAPABLE.equals(getShardPool(shard, allocation))) + .collect(Collectors.toList()); + final long currentNodeRemoteShardSize = remoteShardsOnNode.stream() + .map(ShardRouting::getExpectedShardSize) + .mapToLong(Long::longValue) + .sum(); + + final long shardSize = getExpectedShardSize( + shardRouting, + 0L, + allocation.clusterInfo(), + allocation.snapshotShardSizeInfo(), + allocation.metadata(), + allocation.routingTable() + ); + + final FileCacheStats fileCacheStats = clusterInfo.getNodeFileCacheStats().getOrDefault(node.nodeId(), null); + final long nodeCacheSize = fileCacheStats != null ? fileCacheStats.getTotal().getBytes() : 0; + final long totalNodeRemoteShardSize = currentNodeRemoteShardSize + shardSize; + + if (totalNodeRemoteShardSize > DATA_TO_FILE_CACHE_SIZE_RATIO * nodeCacheSize) { + return allocation.decision( + Decision.NO, + NAME, + "file cache limit reached - remote shard size will exceed configured safeguard ratio" + ); + } + return Decision.YES; + } + Map usages = clusterInfo.getNodeMostAvailableDiskUsages(); final Decision decision = earlyTerminate(allocation, usages); if (decision != null) { @@ -422,6 +465,15 @@ public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAl if (shardRouting.currentNodeId().equals(node.nodeId()) == false) { throw new IllegalArgumentException("Shard [" + shardRouting + "] is not allocated on node: [" + node.nodeId() + "]"); } + + /* + The following block prevents movement for remote shards since they do not use the local storage as + the primary source of data storage. + */ + if (REMOTE_CAPABLE.equals(getNodePool(node)) && REMOTE_CAPABLE.equals(getShardPool(shardRouting, allocation))) { + return Decision.ALWAYS; + } + final ClusterInfo clusterInfo = allocation.clusterInfo(); final Map usages = clusterInfo.getNodeLeastAvailableDiskUsages(); final Decision decision = earlyTerminate(allocation, usages); diff --git a/server/src/main/java/org/opensearch/index/store/remote/filecache/FileCache.java b/server/src/main/java/org/opensearch/index/store/remote/filecache/FileCache.java index 0aa3740fb6ecb..3d23b4d22538c 100644 --- a/server/src/main/java/org/opensearch/index/store/remote/filecache/FileCache.java +++ b/server/src/main/java/org/opensearch/index/store/remote/filecache/FileCache.java @@ -49,6 +49,9 @@ public class FileCache implements RefCountedCache { private final CircuitBreaker circuitBreaker; + // TODO: Convert the constant into an integer setting + public static final int DATA_TO_FILE_CACHE_SIZE_RATIO = 5; + public FileCache(SegmentedCache cache, CircuitBreaker circuitBreaker) { this.theCache = cache; this.circuitBreaker = circuitBreaker; diff --git a/server/src/test/java/org/opensearch/cluster/ClusterInfoTests.java b/server/src/test/java/org/opensearch/cluster/ClusterInfoTests.java index a32d6e35d0182..e1294da1e57bc 100644 --- a/server/src/test/java/org/opensearch/cluster/ClusterInfoTests.java +++ b/server/src/test/java/org/opensearch/cluster/ClusterInfoTests.java @@ -36,6 +36,7 @@ import org.opensearch.cluster.routing.TestShardRouting; import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.index.shard.ShardId; +import org.opensearch.index.store.remote.filecache.FileCacheStats; import org.opensearch.test.OpenSearchTestCase; import java.util.HashMap; @@ -49,7 +50,8 @@ public void testSerialization() throws Exception { randomDiskUsage(), randomShardSizes(), randomRoutingToDataPath(), - randomReservedSpace() + randomReservedSpace(), + randomFileCacheStats() ); BytesStreamOutput output = new BytesStreamOutput(); clusterInfo.writeTo(output); @@ -60,6 +62,7 @@ public void testSerialization() throws Exception { assertEquals(clusterInfo.shardSizes, result.shardSizes); assertEquals(clusterInfo.routingToDataPath, result.routingToDataPath); assertEquals(clusterInfo.reservedSpace, result.reservedSpace); + assertEquals(clusterInfo.getNodeFileCacheStats().size(), result.getNodeFileCacheStats().size()); } private static Map randomDiskUsage() { @@ -79,6 +82,25 @@ private static Map randomDiskUsage() { return builder; } + private static Map randomFileCacheStats() { + int numEntries = randomIntBetween(0, 16); + final Map builder = new HashMap<>(numEntries); + for (int i = 0; i < numEntries; i++) { + String key = randomAlphaOfLength(16); + FileCacheStats fileCacheStats = new FileCacheStats( + randomLong(), + randomLong(), + randomLong(), + randomLong(), + randomLong(), + randomLong(), + randomLong() + ); + builder.put(key, fileCacheStats); + } + return builder; + } + private static Map randomShardSizes() { int numEntries = randomIntBetween(0, 128); final Map builder = new HashMap<>(numEntries); diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/DiskThresholdMonitorTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/DiskThresholdMonitorTests.java index 21d891bdbc317..3e21f6c19e150 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/DiskThresholdMonitorTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/DiskThresholdMonitorTests.java @@ -798,7 +798,7 @@ private static ClusterInfo clusterInfo( final Map diskUsages, final Map reservedSpace ) { - return new ClusterInfo(diskUsages, null, null, null, reservedSpace); + return new ClusterInfo(diskUsages, null, null, null, reservedSpace, Map.of()); } } diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/IndexShardConstraintDeciderOverlapTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/IndexShardConstraintDeciderOverlapTests.java index 7112af6b4efc0..15dcae65ce6e7 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/IndexShardConstraintDeciderOverlapTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/IndexShardConstraintDeciderOverlapTests.java @@ -176,7 +176,7 @@ public DevNullClusterInfo( final Map shardSizes, final Map reservedSpace ) { - super(leastAvailableSpaceUsage, mostAvailableSpaceUsage, shardSizes, null, reservedSpace); + super(leastAvailableSpaceUsage, mostAvailableSpaceUsage, shardSizes, null, reservedSpace, Map.of()); } @Override diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteShardsBalancerBaseTestCase.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteShardsBalancerBaseTestCase.java index 9d7d0ebc5b2b1..dbb08a999877d 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteShardsBalancerBaseTestCase.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteShardsBalancerBaseTestCase.java @@ -239,7 +239,7 @@ public DevNullClusterInfo( final Map mostAvailableSpaceUsage, final Map shardSizes ) { - super(leastAvailableSpaceUsage, mostAvailableSpaceUsage, shardSizes, null, Map.of()); + super(leastAvailableSpaceUsage, mostAvailableSpaceUsage, shardSizes, null, Map.of(), Map.of()); } @Override diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java index c23d98c95fc3c..4ccf0a9bc3a20 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java @@ -70,6 +70,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.index.Index; import org.opensearch.index.shard.ShardId; +import org.opensearch.index.store.remote.filecache.FileCacheStats; import org.opensearch.repositories.IndexId; import org.opensearch.snapshots.EmptySnapshotsInfoService; import org.opensearch.snapshots.InternalSnapshotsInfoService.SnapshotShard; @@ -83,6 +84,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.atomic.AtomicReference; import static java.util.Collections.emptyMap; @@ -283,6 +285,190 @@ public void testDiskThreshold() { assertThat(clusterState.getRoutingNodes().node("node4").size(), equalTo(1)); } + public void testDiskThresholdForRemoteShards() { + Settings diskSettings = Settings.builder() + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey(), true) + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), 0.7) + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), 0.8) + .build(); + + Map usages = new HashMap<>(); + usages.put("node1", new DiskUsage("node1", "node1", "/dev/null", 100, 10)); // 90% used + usages.put("node2", new DiskUsage("node2", "node2", "/dev/null", 100, 35)); // 65% used + usages.put("node3", new DiskUsage("node3", "node3", "/dev/null", 100, 60)); // 40% used + + Map shardSizes = new HashMap<>(); + shardSizes.put("[test][0][p]", 10L); // 10 bytes + shardSizes.put("[test][0][r]", 10L); + + Map fileCacheStatsMap = new HashMap<>(); + fileCacheStatsMap.put("node1", new FileCacheStats(0, 0, 1000, 0, 0, 0, 0)); + fileCacheStatsMap.put("node2", new FileCacheStats(0, 0, 1000, 0, 0, 0, 0)); + fileCacheStatsMap.put("node3", new FileCacheStats(0, 0, 1000, 0, 0, 0, 0)); + final ClusterInfo clusterInfo = new DevNullClusterInfo(usages, usages, shardSizes, fileCacheStatsMap); + + ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + AllocationDeciders deciders = new AllocationDeciders( + new HashSet<>(Arrays.asList(new SameShardAllocationDecider(Settings.EMPTY, clusterSettings), makeDecider(diskSettings))) + ); + + ClusterInfoService cis = () -> { + logger.info("--> calling fake getClusterInfo"); + return clusterInfo; + }; + AllocationService strategy = new AllocationService( + deciders, + new TestGatewayAllocator(), + new BalancedShardsAllocator(Settings.EMPTY), + cis, + EmptySnapshotsInfoService.INSTANCE + ); + + Metadata metadata = Metadata.builder() + .put(IndexMetadata.builder("test").settings(remoteIndexSettings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(1)) + .build(); + + final RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build(); + + ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + .metadata(metadata) + .routingTable(initialRoutingTable) + .build(); + + Set defaultWithSearchRole = new HashSet<>(CLUSTER_MANAGER_DATA_ROLES); + defaultWithSearchRole.add(DiscoveryNodeRole.SEARCH_ROLE); + + logger.info("--> adding two nodes"); + clusterState = ClusterState.builder(clusterState) + .nodes(DiscoveryNodes.builder().add(newNode("node1", defaultWithSearchRole)).add(newNode("node2", defaultWithSearchRole))) + .build(); + clusterState = strategy.reroute(clusterState, "reroute"); + logShardStates(clusterState); + + // Primary shard should be initializing, replica should not + assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(2)); + + logger.info("--> start the shards (primaries)"); + clusterState = startInitializingShardsAndReroute(strategy, clusterState); + + logShardStates(clusterState); + // Assert that we're able to start the primary + assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(2)); + + logger.info("--> adding node3"); + + clusterState = ClusterState.builder(clusterState).nodes(DiscoveryNodes.builder(clusterState.nodes()).add(newNode("node3"))).build(); + clusterState = strategy.reroute(clusterState, "reroute"); + + logShardStates(clusterState); + // Assert that the replica is initialized now that node3 is available with enough space + assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(2)); + assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.INITIALIZING).size(), equalTo(0)); + + logger.info("--> start the shards (replicas)"); + clusterState = startInitializingShardsAndReroute(strategy, clusterState); + + logShardStates(clusterState); + // Assert that the replica couldn't be started since node1 doesn't have enough space + assertThat(clusterState.getRoutingNodes().shardsWithState(ShardRoutingState.STARTED).size(), equalTo(2)); + assertThat(clusterState.getRoutingNodes().node("node1").size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node2").size(), equalTo(1)); + assertThat(clusterState.getRoutingNodes().node("node3").size(), equalTo(0)); + } + + public void testFileCacheRemoteShardsDecisions() { + Settings diskSettings = Settings.builder() + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey(), true) + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), "60%") + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), "70%") + .build(); + + // We have an index with 2 primary shards each taking 40 bytes. Each node has 100 bytes available + final Map usages = new HashMap<>(); + usages.put("node1", new DiskUsage("node1", "n1", "/dev/null", 100, 20)); // 80% used + usages.put("node2", new DiskUsage("node2", "n2", "/dev/null", 100, 100)); // 0% used + + final Map shardSizes = new HashMap<>(); + shardSizes.put("[test][0][p]", 40L); + shardSizes.put("[test][1][p]", 40L); + shardSizes.put("[foo][0][p]", 10L); + + // First node has filecache size as 0, second has 1000, greater than the shard sizes. + Map fileCacheStatsMap = new HashMap<>(); + fileCacheStatsMap.put("node1", new FileCacheStats(0, 0, 0, 0, 0, 0, 0)); + fileCacheStatsMap.put("node2", new FileCacheStats(0, 0, 1000, 0, 0, 0, 0)); + + final ClusterInfo clusterInfo = new DevNullClusterInfo(usages, usages, shardSizes, fileCacheStatsMap); + + Set defaultWithSearchRole = new HashSet<>(CLUSTER_MANAGER_DATA_ROLES); + defaultWithSearchRole.add(DiscoveryNodeRole.SEARCH_ROLE); + + DiskThresholdDecider diskThresholdDecider = makeDecider(diskSettings); + Metadata metadata = Metadata.builder() + .put(IndexMetadata.builder("test").settings(remoteIndexSettings(Version.CURRENT)).numberOfShards(2).numberOfReplicas(0)) + .build(); + + RoutingTable initialRoutingTable = RoutingTable.builder().addAsNew(metadata.index("test")).build(); + + DiscoveryNode discoveryNode1 = new DiscoveryNode( + "node1", + buildNewFakeTransportAddress(), + emptyMap(), + defaultWithSearchRole, + Version.CURRENT + ); + DiscoveryNode discoveryNode2 = new DiscoveryNode( + "node2", + buildNewFakeTransportAddress(), + emptyMap(), + defaultWithSearchRole, + Version.CURRENT + ); + DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(discoveryNode1).add(discoveryNode2).build(); + + ClusterState baseClusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + .metadata(metadata) + .routingTable(initialRoutingTable) + .nodes(discoveryNodes) + .build(); + + // Two shards consuming each 80% of disk space while 70% is allowed, so shard 0 isn't allowed here + ShardRouting firstRouting = TestShardRouting.newShardRouting("test", 0, "node1", null, true, ShardRoutingState.STARTED); + ShardRouting secondRouting = TestShardRouting.newShardRouting("test", 1, "node1", null, true, ShardRoutingState.STARTED); + RoutingNode firstRoutingNode = new RoutingNode("node1", discoveryNode1, firstRouting, secondRouting); + RoutingNode secondRoutingNode = new RoutingNode("node2", discoveryNode2); + + RoutingTable.Builder builder = RoutingTable.builder() + .add( + IndexRoutingTable.builder(firstRouting.index()) + .addIndexShard(new IndexShardRoutingTable.Builder(firstRouting.shardId()).addShard(firstRouting).build()) + .addIndexShard(new IndexShardRoutingTable.Builder(secondRouting.shardId()).addShard(secondRouting).build()) + ); + ClusterState clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build(); + RoutingAllocation routingAllocation = new RoutingAllocation( + null, + new RoutingNodes(clusterState), + clusterState, + clusterInfo, + null, + System.nanoTime() + ); + routingAllocation.debugDecision(true); + Decision decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); + assertThat(decision.type(), equalTo(Decision.Type.YES)); + + decision = diskThresholdDecider.canAllocate(firstRouting, firstRoutingNode, routingAllocation); + assertThat(decision.type(), equalTo(Decision.Type.NO)); + + assertThat( + ((Decision.Single) decision).getExplanation(), + containsString("file cache limit reached - remote shard size will exceed configured safeguard ratio") + ); + + decision = diskThresholdDecider.canAllocate(firstRouting, secondRoutingNode, routingAllocation); + assertThat(decision.type(), equalTo(Decision.Type.YES)); + } + public void testDiskThresholdWithAbsoluteSizes() { Settings diskSettings = Settings.builder() .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.getKey(), true) @@ -863,7 +1049,8 @@ public void testShardRelocationsTakenIntoAccount() { Map.of( new ClusterInfo.NodeAndPath("node1", "/dev/null"), new ClusterInfo.ReservedSpace.Builder().add(new ShardId("", "", 0), between(51, 200)).build() - ) + ), + Map.of() ) ); clusterState = applyStartedShardsUntilNoChange(clusterState, strategy); @@ -1455,16 +1642,26 @@ static class DevNullClusterInfo extends ClusterInfo { final Map mostAvailableSpaceUsage, final Map shardSizes ) { - this(leastAvailableSpaceUsage, mostAvailableSpaceUsage, shardSizes, Map.of()); + this(leastAvailableSpaceUsage, mostAvailableSpaceUsage, shardSizes, Map.of(), Map.of()); + } + + DevNullClusterInfo( + final Map leastAvailableSpaceUsage, + final Map mostAvailableSpaceUsage, + final Map shardSizes, + final Map nodeFileCacheStats + ) { + this(leastAvailableSpaceUsage, mostAvailableSpaceUsage, shardSizes, Map.of(), nodeFileCacheStats); } DevNullClusterInfo( final Map leastAvailableSpaceUsage, final Map mostAvailableSpaceUsage, final Map shardSizes, - Map reservedSpace + Map reservedSpace, + final Map nodeFileCacheStats ) { - super(leastAvailableSpaceUsage, mostAvailableSpaceUsage, shardSizes, null, reservedSpace); + super(leastAvailableSpaceUsage, mostAvailableSpaceUsage, shardSizes, null, reservedSpace, nodeFileCacheStats); } @Override diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java index caab381e65e84..62c52e93aad33 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java @@ -127,7 +127,7 @@ public void testCanAllocateUsesMaxAvailableSpace() { final Map shardSizes = new HashMap<>(); shardSizes.put("[test][0][p]", 10L); // 10 bytes - final ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages, mostAvailableUsage, shardSizes, Map.of(), Map.of()); + final ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages, mostAvailableUsage, shardSizes, Map.of(), Map.of(), Map.of()); RoutingAllocation allocation = new RoutingAllocation( new AllocationDeciders(Collections.singleton(decider)), clusterState.getRoutingNodes(), @@ -203,7 +203,7 @@ public void testCannotAllocateDueToLackOfDiskResources() { // way bigger than available space final long shardSize = randomIntBetween(110, 1000); shardSizes.put("[test][0][p]", shardSize); - ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages, mostAvailableUsage, shardSizes, Map.of(), Map.of()); + ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages, mostAvailableUsage, shardSizes, Map.of(), Map.of(), Map.of()); RoutingAllocation allocation = new RoutingAllocation( new AllocationDeciders(Collections.singleton(decider)), clusterState.getRoutingNodes(), @@ -320,7 +320,14 @@ public void testCanRemainUsesLeastAvailableSpace() { shardSizes.put("[test][1][p]", 10L); shardSizes.put("[test][2][p]", 10L); - final ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages, mostAvailableUsage, shardSizes, shardRoutingMap, Map.of()); + final ClusterInfo clusterInfo = new ClusterInfo( + leastAvailableUsages, + mostAvailableUsage, + shardSizes, + shardRoutingMap, + Map.of(), + Map.of() + ); RoutingAllocation allocation = new RoutingAllocation( new AllocationDeciders(Collections.singleton(decider)), clusterState.getRoutingNodes(), diff --git a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java index 6634d1b4dbafc..6354cf18e8b62 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java +++ b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java @@ -132,7 +132,8 @@ class SizeFakingClusterInfo extends ClusterInfo { delegate.getNodeMostAvailableDiskUsages(), delegate.shardSizes, delegate.routingToDataPath, - delegate.reservedSpace + delegate.reservedSpace, + delegate.nodeFileCacheStats ); } diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java index 7722b59313b5f..ec397a2baa640 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java @@ -109,6 +109,7 @@ import org.opensearch.env.NodeEnvironment; import org.opensearch.env.TestEnvironment; import org.opensearch.index.Index; +import org.opensearch.index.IndexModule; import org.opensearch.index.IndexSettings; import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.index.analysis.AnalyzerScope; @@ -1200,6 +1201,13 @@ public static Settings.Builder settings(Version version) { return builder; } + public static Settings.Builder remoteIndexSettings(Version version) { + Settings.Builder builder = Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, version) + .put(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(), IndexModule.Type.REMOTE_SNAPSHOT.getSettingsKey()); + return builder; + } + /** * Returns size random values */ From 2b8b846f02c308ed82253acb8cf3ea62f652246a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Jul 2023 12:44:43 -0400 Subject: [PATCH 11/29] Bump com.netflix.nebula:gradle-info-plugin from 12.1.4 to 12.1.5 (#8568) * Bump com.netflix.nebula:gradle-info-plugin from 12.1.4 to 12.1.5 Bumps [com.netflix.nebula:gradle-info-plugin](https://github.com/nebula-plugins/gradle-info-plugin) from 12.1.4 to 12.1.5. - [Release notes](https://github.com/nebula-plugins/gradle-info-plugin/releases) - [Changelog](https://github.com/nebula-plugins/gradle-info-plugin/blob/main/CHANGELOG.md) - [Commits](https://github.com/nebula-plugins/gradle-info-plugin/compare/v12.1.4...v12.1.5) --- updated-dependencies: - dependency-name: com.netflix.nebula:gradle-info-plugin dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] * Update changelog Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- CHANGELOG.md | 4 ++-- buildSrc/build.gradle | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ecb92a4051738..74a66a7880114 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -125,7 +125,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `io.projectreactor:reactor-core` from 3.4.18 to 3.5.6 in /plugins/repository-azure ([#8016](https://github.com/opensearch-project/OpenSearch/pull/8016)) - Bump `spock-core` from 2.1-groovy-3.0 to 2.3-groovy-3.0 ([#8122](https://github.com/opensearch-project/OpenSearch/pull/8122)) - Bump `com.networknt:json-schema-validator` from 1.0.83 to 1.0.84 (#8141) -- Bump `com.netflix.nebula:gradle-info-plugin` from 12.1.3 to 12.1.4 (#8139) +- Bump `com.netflix.nebula:gradle-info-plugin` from 12.1.3 to 12.1.5 (#8139, #8568) - Bump `commons-io:commons-io` from 2.12.0 to 2.13.0 in /plugins/discovery-azure-classic ([#8140](https://github.com/opensearch-project/OpenSearch/pull/8140)) - Bump `mockito` from 5.2.0 to 5.4.0 ([#8181](https://github.com/opensearch-project/OpenSearch/pull/8181)) - Bump `netty` from 4.1.93.Final to 4.1.94.Final ([#8191](https://github.com/opensearch-project/OpenSearch/pull/8191)) @@ -177,4 +177,4 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Security [Unreleased 3.0]: https://github.com/opensearch-project/OpenSearch/compare/2.x...HEAD -[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.8...2.x +[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.8...2.x \ No newline at end of file diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle index feb8da7c20984..852f9ef7f0474 100644 --- a/buildSrc/build.gradle +++ b/buildSrc/build.gradle @@ -114,7 +114,7 @@ dependencies { api 'org.apache.ant:ant:1.10.13' api 'com.netflix.nebula:gradle-extra-configurations-plugin:10.0.0' api 'com.netflix.nebula:nebula-publishing-plugin:20.3.0' - api 'com.netflix.nebula:gradle-info-plugin:12.1.4' + api 'com.netflix.nebula:gradle-info-plugin:12.1.5' api 'org.apache.rat:apache-rat:0.15' api 'commons-io:commons-io:2.13.0' api "net.java.dev.jna:jna:5.13.0" From 8b44a5dc624f2a12ad256b290e612e55069e9470 Mon Sep 17 00:00:00 2001 From: Stephen Crawford <65832608+scrawfor99@users.noreply.github.com> Date: Mon, 10 Jul 2023 12:50:14 -0400 Subject: [PATCH 12/29] Manually update google cloud core from 2.17.0 to 2.21.0 (#8586) * Update google cloud core Signed-off-by: Stephen Crawford * Specify pr number Signed-off-by: Stephen Crawford --------- Signed-off-by: Stephen Crawford --- CHANGELOG.md | 8 ++++---- plugins/repository-gcs/build.gradle | 2 +- .../licenses/google-cloud-core-http-2.17.0.jar.sha1 | 1 - .../licenses/google-cloud-core-http-2.21.0.jar.sha1 | 1 + 4 files changed, 6 insertions(+), 6 deletions(-) delete mode 100644 plugins/repository-gcs/licenses/google-cloud-core-http-2.17.0.jar.sha1 create mode 100644 plugins/repository-gcs/licenses/google-cloud-core-http-2.21.0.jar.sha1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 74a66a7880114..77d8099225b73 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,10 +37,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - OpenJDK Update (April 2023 Patch releases) ([#7344](https://github.com/opensearch-project/OpenSearch/pull/7344) - Bump `com.google.http-client:google-http-client:1.43.2` from 1.42.0 to 1.43.2 ([7928](https://github.com/opensearch-project/OpenSearch/pull/7928))) - Add Opentelemetry dependencies ([#7543](https://github.com/opensearch-project/OpenSearch/issues/7543)) -- Bump `org.bouncycastle:bcprov-jdk15on` to `org.bouncycastle:bcprov-jdk15to18` version 1.75 ([8247](https://github.com/opensearch-project/OpenSearch/pull/8247)) -- Bump `org.bouncycastle:bcmail-jdk15on` to `org.bouncycastle:bcmail-jdk15to18` version 1.75 ([8247](https://github.com/opensearch-project/OpenSearch/pull/8247)) -- Bump `org.bouncycastle:bcpkix-jdk15on` to `org.bouncycastle:bcpkix-jdk15to18` version 1.75 ([8247](https://github.com/opensearch-project/OpenSearch/pull/8247)) - +- Bump `org.bouncycastle:bcprov-jdk15on` to `org.bouncycastle:bcprov-jdk15to18` version 1.75 ([#8247](https://github.com/opensearch-project/OpenSearch/pull/8247)) +- Bump `org.bouncycastle:bcmail-jdk15on` to `org.bouncycastle:bcmail-jdk15to18` version 1.75 ([#8247](https://github.com/opensearch-project/OpenSearch/pull/8247)) +- Bump `org.bouncycastle:bcpkix-jdk15on` to `org.bouncycastle:bcpkix-jdk15to18` version 1.75 ([#8247](https://github.com/opensearch-project/OpenSearch/pull/8247)) +- Bump `com.google.cloud:google-cloud-core-http` from 2.17.0 to 2.21.0 ([#8586](https://github.com/opensearch-project/OpenSearch/pull/8586)) ### Changed diff --git a/plugins/repository-gcs/build.gradle b/plugins/repository-gcs/build.gradle index 41c36dffea296..9bf1e8ac856b1 100644 --- a/plugins/repository-gcs/build.gradle +++ b/plugins/repository-gcs/build.gradle @@ -67,7 +67,7 @@ dependencies { api "com.google.auth:google-auth-library-oauth2-http:${versions.google_auth}" api 'com.google.cloud:google-cloud-core:2.5.10' - api 'com.google.cloud:google-cloud-core-http:2.17.0' + api 'com.google.cloud:google-cloud-core-http:2.21.0' api 'com.google.cloud:google-cloud-storage:1.113.1' api 'com.google.code.gson:gson:2.9.0' diff --git a/plugins/repository-gcs/licenses/google-cloud-core-http-2.17.0.jar.sha1 b/plugins/repository-gcs/licenses/google-cloud-core-http-2.17.0.jar.sha1 deleted file mode 100644 index eaf69a96b62b9..0000000000000 --- a/plugins/repository-gcs/licenses/google-cloud-core-http-2.17.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -b9a2fc2235dadfa359967a3d67e8bb11eb62a6dd \ No newline at end of file diff --git a/plugins/repository-gcs/licenses/google-cloud-core-http-2.21.0.jar.sha1 b/plugins/repository-gcs/licenses/google-cloud-core-http-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..2ef0a9bf9b33e --- /dev/null +++ b/plugins/repository-gcs/licenses/google-cloud-core-http-2.21.0.jar.sha1 @@ -0,0 +1 @@ +07da4710ccdbcfee253672c0b9e00e7370626c26 \ No newline at end of file From a15f0ed56d2787439963ac88fe836c11c720d66c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Jul 2023 10:07:59 -0700 Subject: [PATCH 13/29] Bump com.azure:azure-storage-blob from 12.22.2 to 12.22.3 in /plugins/repository-azure (#8572) * Bump com.azure:azure-storage-blob in /plugins/repository-azure Bumps [com.azure:azure-storage-blob](https://github.com/Azure/azure-sdk-for-java) from 12.22.2 to 12.22.3. - [Release notes](https://github.com/Azure/azure-sdk-for-java/releases) - [Commits](https://github.com/Azure/azure-sdk-for-java/compare/azure-storage-blob_12.22.2...azure-storage-blob_12.22.3) --- updated-dependencies: - dependency-name: com.azure:azure-storage-blob dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] * Updating SHAs Signed-off-by: dependabot[bot] * Update changelog Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- CHANGELOG.md | 1 + plugins/repository-azure/build.gradle | 2 +- .../licenses/azure-storage-blob-12.22.2.jar.sha1 | 1 - .../licenses/azure-storage-blob-12.22.3.jar.sha1 | 1 + 4 files changed, 3 insertions(+), 2 deletions(-) delete mode 100644 plugins/repository-azure/licenses/azure-storage-blob-12.22.2.jar.sha1 create mode 100644 plugins/repository-azure/licenses/azure-storage-blob-12.22.3.jar.sha1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 77d8099225b73..fb58a54b6c9ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -137,6 +137,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Update Apache HttpCore/ HttpClient and Apache HttpCore5 / HttpClient5 dependencies ([#8434](https://github.com/opensearch-project/OpenSearch/pull/8434)) - Bump `org.apache.maven:maven-model` from 3.9.2 to 3.9.3 (#8403) - Bump `io.projectreactor.netty:reactor-netty` and `io.projectreactor.netty:reactor-netty-core` from 1.1.7 to 1.1.8 (#8405) +- Bump `com.azure:azure-storage-blob` from 12.22.2 to 12.22.3 (#8572) ### Changed - Replace jboss-annotations-api_1.2_spec with jakarta.annotation-api ([#7836](https://github.com/opensearch-project/OpenSearch/pull/7836)) diff --git a/plugins/repository-azure/build.gradle b/plugins/repository-azure/build.gradle index 4edb9e0b1913e..9ec1b4ee50569 100644 --- a/plugins/repository-azure/build.gradle +++ b/plugins/repository-azure/build.gradle @@ -55,7 +55,7 @@ dependencies { api "io.netty:netty-resolver-dns:${versions.netty}" api "io.netty:netty-transport-native-unix-common:${versions.netty}" implementation project(':modules:transport-netty4') - api 'com.azure:azure-storage-blob:12.22.2' + api 'com.azure:azure-storage-blob:12.22.3' api 'org.reactivestreams:reactive-streams:1.0.4' api 'io.projectreactor:reactor-core:3.5.6' api 'io.projectreactor.netty:reactor-netty:1.1.8' diff --git a/plugins/repository-azure/licenses/azure-storage-blob-12.22.2.jar.sha1 b/plugins/repository-azure/licenses/azure-storage-blob-12.22.2.jar.sha1 deleted file mode 100644 index a03bb750a0a96..0000000000000 --- a/plugins/repository-azure/licenses/azure-storage-blob-12.22.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -1441a678a0d28ed3b22efc27fef4752f91502834 \ No newline at end of file diff --git a/plugins/repository-azure/licenses/azure-storage-blob-12.22.3.jar.sha1 b/plugins/repository-azure/licenses/azure-storage-blob-12.22.3.jar.sha1 new file mode 100644 index 0000000000000..f6c3cc6e579fa --- /dev/null +++ b/plugins/repository-azure/licenses/azure-storage-blob-12.22.3.jar.sha1 @@ -0,0 +1 @@ +0df12462c2eac3beaf25d283f707a0560853228b \ No newline at end of file From 828730f23377e78dd1df0fa53939ed7f2486800e Mon Sep 17 00:00:00 2001 From: Shinsuke Sugaya Date: Tue, 11 Jul 2023 02:22:46 +0900 Subject: [PATCH 14/29] replace with getField (#7855) * replace with getField Signed-off-by: Shinsuke Sugaya * Update server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java Co-authored-by: Andriy Redko Signed-off-by: Shinsuke Sugaya * add import Signed-off-by: Shinsuke Sugaya * add PR to CHANGELOG Signed-off-by: Shinsuke Sugaya * move SortedSetDocValuesField addition for field type Signed-off-by: Shinsuke Sugaya * remove a test case for flat-object field Signed-off-by: Shinsuke Sugaya * remove getField Signed-off-by: Mingshi Liu * remove unused import Signed-off-by: Shinsuke Sugaya --------- Signed-off-by: Shinsuke Sugaya Signed-off-by: Shinsuke Sugaya Signed-off-by: Mingshi Liu Co-authored-by: Andriy Redko Co-authored-by: Mingshi Liu --- CHANGELOG.md | 1 + .../rest-api-spec/test/painless/30_search.yml | 97 ------------------- .../index/mapper/FlatObjectFieldMapper.java | 23 ++--- 3 files changed, 11 insertions(+), 110 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fb58a54b6c9ab..7dc484744c606 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -174,6 +174,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix mapping char_filter when mapping a hashtag ([#7591](https://github.com/opensearch-project/OpenSearch/pull/7591)) - Fix NPE in multiterms aggregations involving empty buckets ([#7318](https://github.com/opensearch-project/OpenSearch/pull/7318)) - Precise system clock time in MasterService debug logs ([#7902](https://github.com/opensearch-project/OpenSearch/pull/7902)) +- Improve indexing performance for flat_object type ([#7855](https://github.com/opensearch-project/OpenSearch/pull/7855)) ### Security diff --git a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/30_search.yml b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/30_search.yml index 4b3d5bd9e2980..a006fde630716 100644 --- a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/30_search.yml +++ b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/30_search.yml @@ -482,100 +482,3 @@ }] - match: { error.root_cause.0.type: "illegal_argument_exception" } - match: { error.root_cause.0.reason: "script score function must not produce negative scores, but got: [-9.0]"} - ---- - -"Flat-object fields from within the scripting": - - skip: - version: " - 2.6.99" - reason: "flat_object is introduced in 2.7.0" - - - do: - indices.create: - index: test - body: - mappings: - properties: - flat: - type : "flat_object" - - # This document has 6 distinct parts in its flat_object field paths: - # - flat.field_1 - # - flat.field_2 - # - flat.field_3 - # - flat.inner - # - flat.field_A - # - flat.field_B - - do: - index: - index: test - id: 1 - body: { - "flat": { - "field_1": "John Doe", - "field_2": 33, - "field_3": false, - "inner": { - "field_A": ["foo", "bar"], - "field_B": false - } - } - } - - - do: - index: - index: test - id: 2 - body: { - "flat": { - "field_1": "Joe Public", - "field_2": 45 - } - } - - - do: - indices.refresh: - index: test - - # It is possible to filter based on the number of distinct parts of flat_object field paths - - do: - search: - body: { - _source: true, - query: { - bool: { - filter: { - script: { - script: { - source: "doc['flat'].size() == 6", - lang: "painless" - } - } - } - } - } - } - - - length: { hits.hits: 1 } - - match: { hits.hits.0._source.flat.field_1: "John Doe" } - - - do: - search: - body: { - _source: true, - query: { - bool: { - filter: { - script: { - script: { - source: "doc['flat'].size() < 6", - lang: "painless" - } - } - } - } - } - } - - - length: { hits.hits: 1 } - - match: { hits.hits.0._source.flat.field_1: "Joe Public" } diff --git a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java index 36e0adbbf057f..f8206d138534d 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java @@ -659,21 +659,18 @@ private void parseValueAddFields(ParseContext context, String value, String fiel } if (fieldType().hasDocValues()) { - if (context.doc().getField(fieldType().name()) == null || !context.doc().getFields(fieldType().name()).equals(field)) { - if (fieldName.equals(fieldType().name())) { - context.doc().add(new SortedSetDocValuesField(fieldType().name(), binaryValue)); - } - if (valueType.equals(VALUE_SUFFIX)) { - if (valueFieldMapper != null) { - context.doc().add(new SortedSetDocValuesField(fieldType().name() + VALUE_SUFFIX, binaryValue)); - } + if (fieldName.equals(fieldType().name())) { + context.doc().add(new SortedSetDocValuesField(fieldType().name(), binaryValue)); + } + if (valueType.equals(VALUE_SUFFIX)) { + if (valueFieldMapper != null) { + context.doc().add(new SortedSetDocValuesField(fieldType().name() + VALUE_SUFFIX, binaryValue)); } - if (valueType.equals(VALUE_AND_PATH_SUFFIX)) { - if (valueAndPathFieldMapper != null) { - context.doc().add(new SortedSetDocValuesField(fieldType().name() + VALUE_AND_PATH_SUFFIX, binaryValue)); - } + } + if (valueType.equals(VALUE_AND_PATH_SUFFIX)) { + if (valueAndPathFieldMapper != null) { + context.doc().add(new SortedSetDocValuesField(fieldType().name() + VALUE_AND_PATH_SUFFIX, binaryValue)); } - } } From 0bb6eb809f6ea50b620849670c6575cb3219285e Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Mon, 10 Jul 2023 13:56:51 -0400 Subject: [PATCH 15/29] Update Gradle to 8.2.1 (#8580) Signed-off-by: Andriy Redko --- buildSrc/build.gradle | 25 ++++++++++++------------ gradle/wrapper/gradle-wrapper.properties | 4 ++-- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle index 852f9ef7f0474..b976085389da3 100644 --- a/buildSrc/build.gradle +++ b/buildSrc/build.gradle @@ -34,6 +34,7 @@ import org.gradle.util.GradleVersion plugins { id 'java-gradle-plugin' id 'groovy' + id 'java-test-fixtures' } group = 'org.opensearch.gradle' @@ -78,17 +79,9 @@ if (JavaVersion.current() < JavaVersion.VERSION_11) { } sourceSets { - test { - java { - srcDirs += ['src/testFixtures/java'] - } - } integTest { compileClasspath += sourceSets["main"].output + configurations["testRuntimeClasspath"] runtimeClasspath += output + compileClasspath - java { - srcDirs += ['src/testFixtures/java'] - } } } @@ -131,10 +124,10 @@ dependencies { api "com.fasterxml.jackson.core:jackson-databind:${props.getProperty('jackson_databind')}" api "org.ajoberstar.grgit:grgit-core:5.2.0" - testImplementation "junit:junit:${props.getProperty('junit')}" - testImplementation "com.carrotsearch.randomizedtesting:randomizedtesting-runner:${props.getProperty('randomizedrunner')}" - testRuntimeOnly gradleApi() - testRuntimeOnly gradleTestKit() + testFixturesApi "junit:junit:${props.getProperty('junit')}" + testFixturesApi "com.carrotsearch.randomizedtesting:randomizedtesting-runner:${props.getProperty('randomizedrunner')}" + testFixturesApi gradleApi() + testFixturesApi gradleTestKit() testImplementation 'com.github.tomakehurst:wiremock-jre8-standalone:2.35.0' testImplementation "org.mockito:mockito-core:${props.getProperty('mockito')}" integTestImplementation('org.spockframework:spock-core:2.3-groovy-3.0') { @@ -183,7 +176,7 @@ if (project != rootProject) { // build-tools is not ready for primetime with these... tasks.named("dependencyLicenses").configure { it.enabled = false } dependenciesInfo.enabled = false - disableTasks('forbiddenApisMain', 'forbiddenApisTest', 'forbiddenApisIntegTest') + disableTasks('forbiddenApisMain', 'forbiddenApisTest', 'forbiddenApisIntegTest', 'forbiddenApisTestFixtures') jarHell.enabled = false thirdPartyAudit.enabled = false if (org.opensearch.gradle.info.BuildParams.inFipsJvm) { @@ -250,6 +243,12 @@ if (project != rootProject) { } } + // disable fail-on-warnings for this specific task which trips Java 11 bug + // https://bugs.openjdk.java.net/browse/JDK-8209058 + tasks.named("compileTestFixturesJava").configure { + options.compilerArgs -= '-Werror' + } + tasks.register("integTest", Test) { inputs.dir(file("src/testKit")).withPropertyName("testkit dir").withPathSensitivity(PathSensitivity.RELATIVE) systemProperty 'test.version_under_test', version diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index f00d0c8442459..e10ceefe2a012 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -11,7 +11,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-all.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.2.1-all.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists -distributionSha256Sum=5022b0b25fe182b0e50867e77f484501dba44feeea88f5c1f13b6b4660463640 +distributionSha256Sum=7c3ad722e9b0ce8205b91560fd6ce8296ac3eadf065672242fd73c06b8eeb6ee From e3341452dfadfeb0a03832f285c7090b10d61fa4 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Jul 2023 14:02:42 -0400 Subject: [PATCH 16/29] Bump com.google.jimfs:jimfs from 1.2 to 1.3.0 in /distribution/tools/keystore-cli (#8577) * Bump com.google.jimfs:jimfs in /distribution/tools/keystore-cli Bumps [com.google.jimfs:jimfs](https://github.com/google/jimfs) from 1.2 to 1.3.0. - [Release notes](https://github.com/google/jimfs/releases) - [Commits](https://github.com/google/jimfs/compare/v1.2...v1.3.0) --- updated-dependencies: - dependency-name: com.google.jimfs:jimfs dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] * Update changelog Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- CHANGELOG.md | 1 + distribution/tools/keystore-cli/build.gradle | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7dc484744c606..f112a7ed008ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -138,6 +138,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `org.apache.maven:maven-model` from 3.9.2 to 3.9.3 (#8403) - Bump `io.projectreactor.netty:reactor-netty` and `io.projectreactor.netty:reactor-netty-core` from 1.1.7 to 1.1.8 (#8405) - Bump `com.azure:azure-storage-blob` from 12.22.2 to 12.22.3 (#8572) +- Bump `com.google.jimfs:jimfs` from 1.2 to 1.3.0 (#8577) ### Changed - Replace jboss-annotations-api_1.2_spec with jakarta.annotation-api ([#7836](https://github.com/opensearch-project/OpenSearch/pull/7836)) diff --git a/distribution/tools/keystore-cli/build.gradle b/distribution/tools/keystore-cli/build.gradle index d819322fc77b7..5dcddf3ef127e 100644 --- a/distribution/tools/keystore-cli/build.gradle +++ b/distribution/tools/keystore-cli/build.gradle @@ -34,7 +34,7 @@ dependencies { compileOnly project(":server") compileOnly project(":libs:opensearch-cli") testImplementation project(":test:framework") - testImplementation 'com.google.jimfs:jimfs:1.2' + testImplementation 'com.google.jimfs:jimfs:1.3.0' testRuntimeOnly("com.google.guava:guava:${versions.guava}") { transitive = false } From 71f807329a92f05012924348de0b5d8884023bd0 Mon Sep 17 00:00:00 2001 From: Stephen Crawford <65832608+scrawfor99@users.noreply.github.com> Date: Mon, 10 Jul 2023 14:21:40 -0400 Subject: [PATCH 17/29] Bump schema validator from from 1.0.85 to 1.0.86 (#8573) * Bump schema validator Signed-off-by: Stephen Crawford * Update changelog Signed-off-by: Stephen Crawford * Manually trigger retry Signed-off-by: Stephen Crawford * Make trivial change to trigger rerun Signed-off-by: Stephen Crawford --------- Signed-off-by: Stephen Crawford Signed-off-by: Stephen Crawford <65832608+scrawfor99@users.noreply.github.com> --- CHANGELOG.md | 1 + buildSrc/build.gradle | 2 +- .../java/org/opensearch/identity/IdentityService.java | 8 ++++---- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f112a7ed008ee..b86e83e3181ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `org.bouncycastle:bcprov-jdk15on` to `org.bouncycastle:bcprov-jdk15to18` version 1.75 ([#8247](https://github.com/opensearch-project/OpenSearch/pull/8247)) - Bump `org.bouncycastle:bcmail-jdk15on` to `org.bouncycastle:bcmail-jdk15to18` version 1.75 ([#8247](https://github.com/opensearch-project/OpenSearch/pull/8247)) - Bump `org.bouncycastle:bcpkix-jdk15on` to `org.bouncycastle:bcpkix-jdk15to18` version 1.75 ([#8247](https://github.com/opensearch-project/OpenSearch/pull/8247)) +- Bump `com.networknt:json-schema-validator` from 1.0.85 to 1.0.86 ([#8573](https://github.com/opensearch-project/OpenSearch/pull/8573)) - Bump `com.google.cloud:google-cloud-core-http` from 2.17.0 to 2.21.0 ([#8586](https://github.com/opensearch-project/OpenSearch/pull/8586)) diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle index b976085389da3..018b63816c3f1 100644 --- a/buildSrc/build.gradle +++ b/buildSrc/build.gradle @@ -118,7 +118,7 @@ dependencies { api 'com.avast.gradle:gradle-docker-compose-plugin:0.16.12' api "org.yaml:snakeyaml:${props.getProperty('snakeyaml')}" api 'org.apache.maven:maven-model:3.9.3' - api 'com.networknt:json-schema-validator:1.0.85' + api 'com.networknt:json-schema-validator:1.0.86' api 'org.jruby.jcodings:jcodings:1.0.58' api 'org.jruby.joni:joni:2.2.1' api "com.fasterxml.jackson.core:jackson-databind:${props.getProperty('jackson_databind')}" diff --git a/server/src/main/java/org/opensearch/identity/IdentityService.java b/server/src/main/java/org/opensearch/identity/IdentityService.java index ab1456cd860ac..1ce107f743efc 100644 --- a/server/src/main/java/org/opensearch/identity/IdentityService.java +++ b/server/src/main/java/org/opensearch/identity/IdentityService.java @@ -5,15 +5,15 @@ package org.opensearch.identity; +import java.util.List; +import java.util.stream.Collectors; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.OpenSearchException; -import org.opensearch.identity.noop.NoopIdentityPlugin; -import java.util.List; import org.opensearch.common.settings.Settings; +import org.opensearch.identity.noop.NoopIdentityPlugin; import org.opensearch.identity.tokens.TokenManager; import org.opensearch.plugins.IdentityPlugin; -import java.util.stream.Collectors; /** * Identity and access control for OpenSearch. @@ -44,7 +44,7 @@ public IdentityService(final Settings settings, final List ident } /** - * Gets the current subject + * Gets the current Subject */ public Subject getSubject() { return identityPlugin.getSubject(); From bcadd172f0df7d93bbc669ff3070880538f5ecfb Mon Sep 17 00:00:00 2001 From: Stephen Crawford <65832608+scrawfor99@users.noreply.github.com> Date: Mon, 10 Jul 2023 16:03:54 -0400 Subject: [PATCH 18/29] Move changelog entry (#8599) Signed-off-by: Stephen Crawford --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b86e83e3181ea..d303a2b4b50c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,7 +40,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `org.bouncycastle:bcprov-jdk15on` to `org.bouncycastle:bcprov-jdk15to18` version 1.75 ([#8247](https://github.com/opensearch-project/OpenSearch/pull/8247)) - Bump `org.bouncycastle:bcmail-jdk15on` to `org.bouncycastle:bcmail-jdk15to18` version 1.75 ([#8247](https://github.com/opensearch-project/OpenSearch/pull/8247)) - Bump `org.bouncycastle:bcpkix-jdk15on` to `org.bouncycastle:bcpkix-jdk15to18` version 1.75 ([#8247](https://github.com/opensearch-project/OpenSearch/pull/8247)) -- Bump `com.networknt:json-schema-validator` from 1.0.85 to 1.0.86 ([#8573](https://github.com/opensearch-project/OpenSearch/pull/8573)) - Bump `com.google.cloud:google-cloud-core-http` from 2.17.0 to 2.21.0 ([#8586](https://github.com/opensearch-project/OpenSearch/pull/8586)) @@ -140,6 +139,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `io.projectreactor.netty:reactor-netty` and `io.projectreactor.netty:reactor-netty-core` from 1.1.7 to 1.1.8 (#8405) - Bump `com.azure:azure-storage-blob` from 12.22.2 to 12.22.3 (#8572) - Bump `com.google.jimfs:jimfs` from 1.2 to 1.3.0 (#8577) +- Bump `com.networknt:json-schema-validator` from 1.0.85 to 1.0.86 ([#8573](https://github.com/opensearch-project/OpenSearch/pull/8573)) ### Changed - Replace jboss-annotations-api_1.2_spec with jakarta.annotation-api ([#7836](https://github.com/opensearch-project/OpenSearch/pull/7836)) @@ -181,4 +181,4 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Security [Unreleased 3.0]: https://github.com/opensearch-project/OpenSearch/compare/2.x...HEAD -[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.8...2.x \ No newline at end of file +[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.8...2.x From 62b66e56b9eddda1313a71ca90f2e95f45cdec5d Mon Sep 17 00:00:00 2001 From: Nick Knize Date: Mon, 10 Jul 2023 15:35:23 -0500 Subject: [PATCH 19/29] add jdk.incubator.vector module support for JDK 20+ (#8601) * add jdk.incubator.vector module support for JDK 20+ Adds support for the incubating jdk vector package (PANAMA) when using jdk 20+ runtime. Signed-off-by: Nicholas Walter Knize * update changelog and fix typo Signed-off-by: Nicholas Walter Knize --------- Signed-off-by: Nicholas Walter Knize --- CHANGELOG.md | 1 + build.gradle | 3 +++ distribution/src/config/jvm.options | 4 ++++ 3 files changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d303a2b4b50c6..b43885a65a4d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -102,6 +102,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Support transport action names when registering NamedRoutes ([#7957](https://github.com/opensearch-project/OpenSearch/pull/7957)) - Create concept of persistent ThreadContext headers that are unstashable ([#8291]()https://github.com/opensearch-project/OpenSearch/pull/8291) - Enable Partial Flat Object ([#7997](https://github.com/opensearch-project/OpenSearch/pull/7997)) +- Add jdk.incubator.vector module support for JDK 20+ ([#8601](https://github.com/opensearch-project/OpenSearch/pull/8601)) ### Dependencies - Bump `com.azure:azure-storage-common` from 12.21.0 to 12.21.1 (#7566, #7814) diff --git a/build.gradle b/build.gradle index ca4c6c3635d57..6a14ab231894b 100644 --- a/build.gradle +++ b/build.gradle @@ -424,6 +424,9 @@ gradle.projectsEvaluated { if (BuildParams.runtimeJavaVersion > JavaVersion.VERSION_17) { task.jvmArgs += ["-Djava.security.manager=allow"] } + if (BuildParams.runtimeJavaVersion >= JavaVersion.VERSION_20) { + task.jvmArgs += ["--add-modules=jdk.incubator.vector"] + } } } diff --git a/distribution/src/config/jvm.options b/distribution/src/config/jvm.options index ef1035489c9fc..e15afc0f677c3 100644 --- a/distribution/src/config/jvm.options +++ b/distribution/src/config/jvm.options @@ -78,3 +78,7 @@ ${error.file} # Explicitly allow security manager (https://bugs.openjdk.java.net/browse/JDK-8270380) 18-:-Djava.security.manager=allow + +# JDK 20+ Incubating Vector Module for SIMD optimizations; +# disabling may reduce performance on vector optimized lucene +20:--add-modules=jdk.incubator.vector From 397069f578ff99efbdc0b2eb0d97c7cd2b8dba52 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Jul 2023 14:10:13 -0700 Subject: [PATCH 20/29] Bump com.google.jimfs:jimfs in /distribution/tools/upgrade-cli (#8571) Bumps [com.google.jimfs:jimfs](https://github.com/google/jimfs) from 1.2 to 1.3.0. - [Release notes](https://github.com/google/jimfs/releases) - [Commits](https://github.com/google/jimfs/compare/v1.2...v1.3.0) --- updated-dependencies: - dependency-name: com.google.jimfs:jimfs dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Signed-off-by: owaiskazi19 Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- CHANGELOG.md | 2 +- distribution/tools/upgrade-cli/build.gradle | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b43885a65a4d6..dee395cbc8962 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -139,7 +139,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `org.apache.maven:maven-model` from 3.9.2 to 3.9.3 (#8403) - Bump `io.projectreactor.netty:reactor-netty` and `io.projectreactor.netty:reactor-netty-core` from 1.1.7 to 1.1.8 (#8405) - Bump `com.azure:azure-storage-blob` from 12.22.2 to 12.22.3 (#8572) -- Bump `com.google.jimfs:jimfs` from 1.2 to 1.3.0 (#8577) +- Bump `com.google.jimfs:jimfs` from 1.2 to 1.3.0 (#8577, #8571) - Bump `com.networknt:json-schema-validator` from 1.0.85 to 1.0.86 ([#8573](https://github.com/opensearch-project/OpenSearch/pull/8573)) ### Changed diff --git a/distribution/tools/upgrade-cli/build.gradle b/distribution/tools/upgrade-cli/build.gradle index c7bf0ff1d2810..99824463f14f8 100644 --- a/distribution/tools/upgrade-cli/build.gradle +++ b/distribution/tools/upgrade-cli/build.gradle @@ -20,7 +20,7 @@ dependencies { implementation "com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}" implementation "com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}" testImplementation project(":test:framework") - testImplementation 'com.google.jimfs:jimfs:1.2' + testImplementation 'com.google.jimfs:jimfs:1.3.0' testRuntimeOnly("com.google.guava:guava:${versions.guava}") { transitive = false } From a39f60f482c405a102e6eec83097e91acc90a889 Mon Sep 17 00:00:00 2001 From: Shivansh Arora <31575408+shiv0408@users.noreply.github.com> Date: Tue, 11 Jul 2023 04:54:33 +0530 Subject: [PATCH 21/29] Fix painless casting bug causing opensearch to crash (#8315) * Created a failing test to reproduce painless bug Signed-off-by: Shivansh Arora * Removed unused import Signed-off-by: Shivansh Arora * Throw exception when trying to cast def to void Signed-off-by: Shivansh Arora * Removed update context change Signed-off-by: Shivansh Arora * Created a different test Signed-off-by: Shivansh Arora --------- Signed-off-by: Shivansh Arora --- CHANGELOG.md | 1 + .../main/java/org/opensearch/painless/AnalyzerCaster.java | 2 +- .../test/java/org/opensearch/painless/FactoryTests.java | 8 ++++++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dee395cbc8962..724a75e4e0405 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -79,6 +79,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Replaces ZipInputStream with ZipFile to fix Zip Slip vulnerability ([#7230](https://github.com/opensearch-project/OpenSearch/pull/7230)) - Add missing validation/parsing of SearchBackpressureMode of SearchBackpressureSettings ([#7541](https://github.com/opensearch-project/OpenSearch/pull/7541)) - Adds log4j configuration for telemetry LogSpanExporter ([#8393](https://github.com/opensearch-project/OpenSearch/pull/8393)) +- Fix painless casting bug, which crashes the OpenSearch process ([#8315](https://github.com/opensearch-project/OpenSearch/pull/8315)) ### Security diff --git a/modules/lang-painless/src/main/java/org/opensearch/painless/AnalyzerCaster.java b/modules/lang-painless/src/main/java/org/opensearch/painless/AnalyzerCaster.java index e375ff14db67e..d830ef2ab6290 100644 --- a/modules/lang-painless/src/main/java/org/opensearch/painless/AnalyzerCaster.java +++ b/modules/lang-painless/src/main/java/org/opensearch/painless/AnalyzerCaster.java @@ -412,7 +412,7 @@ public static PainlessCast getLegalCast(Location location, Class actual, Clas } } - if (actual == def.class + if ((actual == def.class && expected != void.class) || (actual != void.class && expected == def.class) || expected.isAssignableFrom(actual) || (actual.isAssignableFrom(expected) && explicit)) { diff --git a/modules/lang-painless/src/test/java/org/opensearch/painless/FactoryTests.java b/modules/lang-painless/src/test/java/org/opensearch/painless/FactoryTests.java index 95b18cf1c5250..b4e322e12bc45 100644 --- a/modules/lang-painless/src/test/java/org/opensearch/painless/FactoryTests.java +++ b/modules/lang-painless/src/test/java/org/opensearch/painless/FactoryTests.java @@ -360,6 +360,14 @@ public void testVoidReturn() { assertEquals(iae.getMessage(), "not a statement: result not used from addition operation [+]"); } + public void testDefToVoidReturnThrowsException() { + ClassCastException exception = expectScriptThrows( + ClassCastException.class, + () -> getEngine().compile("def_return_in_void", "def x=1;return x;", VoidReturnTestScript.CONTEXT, Collections.emptyMap()) + ); + assertEquals(exception.getMessage(), "Cannot cast from [def] to [void]."); + } + public abstract static class FactoryTestConverterScript { private final Map params; From 3c1bca4e28db20b525c010b5d03af850cd899217 Mon Sep 17 00:00:00 2001 From: Vikas Bansal <43470111+vikasvb90@users.noreply.github.com> Date: Tue, 11 Jul 2023 10:14:16 +0530 Subject: [PATCH 22/29] Disabled translog fsync in remote store path (#8288) Signed-off-by: Vikas Bansal --- .../opensearch/index/translog/Checkpoint.java | 10 ++- .../index/translog/TranslogHeader.java | 6 +- .../index/translog/TranslogWriter.java | 29 ++++-- .../translog/TruncateTranslogAction.java | 2 +- .../index/translog/LocalTranslogTests.java | 85 ++++++++++++++++++ .../index/translog/RemoteFSTranslogTests.java | 90 +++++++++++++++++++ .../index/translog/TranslogHeaderTests.java | 4 +- 7 files changed, 208 insertions(+), 18 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/translog/Checkpoint.java b/server/src/main/java/org/opensearch/index/translog/Checkpoint.java index 8df574ed8374f..56de7e5daf55f 100644 --- a/server/src/main/java/org/opensearch/index/translog/Checkpoint.java +++ b/server/src/main/java/org/opensearch/index/translog/Checkpoint.java @@ -223,12 +223,14 @@ public static void write(ChannelFactory factory, Path checkpointFile, Checkpoint } } - public static void write(FileChannel fileChannel, Path checkpointFile, Checkpoint checkpoint) throws IOException { + public static void write(FileChannel fileChannel, Path checkpointFile, Checkpoint checkpoint, boolean fsync) throws IOException { byte[] bytes = createCheckpointBytes(checkpointFile, checkpoint); Channels.writeToChannel(bytes, fileChannel, 0); - // no need to force metadata, file size stays the same and we did the full fsync - // when we first created the file, so the directory entry doesn't change as well - fileChannel.force(false); + if (fsync == true) { + // no need to force metadata, file size stays the same and we did the full fsync + // when we first created the file, so the directory entry doesn't change as well + fileChannel.force(false); + } } private static byte[] createCheckpointBytes(Path checkpointFile, Checkpoint checkpoint) throws IOException { diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java b/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java index 0819d009c9992..af6ebcf7b7c66 100644 --- a/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java @@ -213,7 +213,7 @@ private static void tryReportOldVersionError(final Path path, final FileChannel /** * Writes this header with the latest format into the file channel */ - void write(final FileChannel channel) throws IOException { + void write(final FileChannel channel, boolean fsync) throws IOException { // This output is intentionally not closed because closing it will close the FileChannel. @SuppressWarnings({ "IOResourceOpenedButNotSafelyClosed", "resource" }) final BufferedChecksumStreamOutput out = new BufferedChecksumStreamOutput( @@ -229,7 +229,9 @@ void write(final FileChannel channel) throws IOException { // Checksum header out.writeInt((int) out.getChecksum()); out.flush(); - channel.force(true); + if (fsync == true) { + channel.force(true); + } assert channel.position() == headerSizeInBytes : "Header is not fully written; header size [" + headerSizeInBytes + "], channel position [" diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java b/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java index 423e9dd960ed7..e19aece60adc0 100644 --- a/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java @@ -110,6 +110,8 @@ public class TranslogWriter extends BaseTranslogReader implements Closeable { private final Map> seenSequenceNumbers; + private final Boolean remoteTranslogEnabled; + private TranslogWriter( final ShardId shardId, final Checkpoint initialCheckpoint, @@ -123,7 +125,8 @@ private TranslogWriter( TranslogHeader header, final TragicExceptionHolder tragedy, final LongConsumer persistedSequenceNumberConsumer, - final BigArrays bigArrays + final BigArrays bigArrays, + Boolean remoteTranslogEnabled ) throws IOException { super(initialCheckpoint.generation, channel, path, header); assert initialCheckpoint.offset == channel.position() : "initial checkpoint offset [" @@ -148,6 +151,7 @@ private TranslogWriter( this.bigArrays = bigArrays; this.seenSequenceNumbers = Assertions.ENABLED ? new HashMap<>() : null; this.tragedy = tragedy; + this.remoteTranslogEnabled = remoteTranslogEnabled; } public static TranslogWriter create( @@ -174,14 +178,14 @@ public static TranslogWriter create( try { checkpointChannel = channelFactory.open(checkpointFile, StandardOpenOption.WRITE); final TranslogHeader header = new TranslogHeader(translogUUID, primaryTerm); - header.write(channel); + header.write(channel, !Boolean.TRUE.equals(remoteTranslogEnabled)); final Checkpoint checkpoint = Checkpoint.emptyTranslogCheckpoint( header.sizeInBytes(), fileGeneration, initialGlobalCheckpoint, initialMinTranslogGen ); - writeCheckpoint(checkpointChannel, checkpointFile, checkpoint); + writeCheckpoint(checkpointChannel, checkpointFile, checkpoint, remoteTranslogEnabled); final LongSupplier writerGlobalCheckpointSupplier; if (Assertions.ENABLED) { writerGlobalCheckpointSupplier = () -> { @@ -209,7 +213,8 @@ public static TranslogWriter create( header, tragedy, persistedSequenceNumberConsumer, - bigArrays + bigArrays, + remoteTranslogEnabled ); } catch (Exception exception) { // if we fail to bake the file-generation into the checkpoint we stick with the file and once we recover and that @@ -508,8 +513,10 @@ final boolean syncUpTo(long offset) throws IOException { // now do the actual fsync outside of the synchronized block such that // we can continue writing to the buffer etc. try { - channel.force(false); - writeCheckpoint(checkpointChannel, checkpointPath, checkpointToSync); + if (!Boolean.TRUE.equals(remoteTranslogEnabled)) { + channel.force(false); + } + writeCheckpoint(checkpointChannel, checkpointPath, checkpointToSync, remoteTranslogEnabled); } catch (final Exception ex) { closeWithTragicEvent(ex); throw ex; @@ -603,9 +610,13 @@ protected void readBytes(ByteBuffer targetBuffer, long position) throws IOExcept Channels.readFromFileChannelWithEofException(channel, position, targetBuffer); } - private static void writeCheckpoint(final FileChannel fileChannel, final Path checkpointFile, final Checkpoint checkpoint) - throws IOException { - Checkpoint.write(fileChannel, checkpointFile, checkpoint); + private static void writeCheckpoint( + final FileChannel fileChannel, + final Path checkpointFile, + final Checkpoint checkpoint, + final Boolean remoteTranslogEnabled + ) throws IOException { + Checkpoint.write(fileChannel, checkpointFile, checkpoint, !Boolean.TRUE.equals(remoteTranslogEnabled)); } /** diff --git a/server/src/main/java/org/opensearch/index/translog/TruncateTranslogAction.java b/server/src/main/java/org/opensearch/index/translog/TruncateTranslogAction.java index ef948cfade815..4a082b4a19844 100644 --- a/server/src/main/java/org/opensearch/index/translog/TruncateTranslogAction.java +++ b/server/src/main/java/org/opensearch/index/translog/TruncateTranslogAction.java @@ -258,7 +258,7 @@ private static void writeEmptyCheckpoint(Path filename, int translogLength, long private static int writeEmptyTranslog(Path filename, String translogUUID) throws IOException { try (FileChannel fc = FileChannel.open(filename, StandardOpenOption.WRITE, StandardOpenOption.CREATE_NEW)) { TranslogHeader header = new TranslogHeader(translogUUID, SequenceNumbers.UNASSIGNED_PRIMARY_TERM); - header.write(fc); + header.write(fc, true); return header.sizeInBytes(); } } diff --git a/server/src/test/java/org/opensearch/index/translog/LocalTranslogTests.java b/server/src/test/java/org/opensearch/index/translog/LocalTranslogTests.java index b2827b8bc2953..a7d7b3a51cebb 100644 --- a/server/src/test/java/org/opensearch/index/translog/LocalTranslogTests.java +++ b/server/src/test/java/org/opensearch/index/translog/LocalTranslogTests.java @@ -1541,6 +1541,91 @@ ChannelFactory getChannelFactory() { } } + public void testTranslogWriterFsyncedWithLocalTranslog() throws IOException { + Path tempDir = createTempDir(); + final TranslogConfig temp = getTranslogConfig(tempDir); + final TranslogConfig config = new TranslogConfig( + temp.getShardId(), + temp.getTranslogPath(), + temp.getIndexSettings(), + temp.getBigArrays(), + new ByteSizeValue(1, ByteSizeUnit.KB) + ); + + final Set persistedSeqNos = new HashSet<>(); + final AtomicInteger translogFsyncCalls = new AtomicInteger(); + final AtomicInteger checkpointFsyncCalls = new AtomicInteger(); + + final ChannelFactory channelFactory = (file, openOption) -> { + FileChannel delegate = FileChannel.open(file, openOption); + boolean success = false; + try { + // don't do partial writes for checkpoints we rely on the fact that the bytes are written as an atomic operation + final boolean isCkpFile = file.getFileName().toString().endsWith(".ckp"); + + final FileChannel channel; + if (isCkpFile) { + channel = new FilterFileChannel(delegate) { + @Override + public void force(boolean metaData) throws IOException { + checkpointFsyncCalls.incrementAndGet(); + } + }; + } else { + channel = new FilterFileChannel(delegate) { + + @Override + public void force(boolean metaData) throws IOException { + translogFsyncCalls.incrementAndGet(); + } + }; + } + success = true; + return channel; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(delegate); + } + } + }; + + String translogUUID = Translog.createEmptyTranslog( + config.getTranslogPath(), + SequenceNumbers.NO_OPS_PERFORMED, + shardId, + channelFactory, + primaryTerm.get() + ); + + try ( + Translog translog = new LocalTranslog( + config, + translogUUID, + new DefaultTranslogDeletionPolicy(-1, -1, 0), + () -> SequenceNumbers.NO_OPS_PERFORMED, + primaryTerm::get, + persistedSeqNos::add + ) { + @Override + ChannelFactory getChannelFactory() { + return channelFactory; + } + } + ) { + TranslogWriter writer = translog.getCurrent(); + byte[] bytes = new byte[256]; + writer.add(ReleasableBytesReference.wrap(new BytesArray(bytes)), 1); + writer.add(ReleasableBytesReference.wrap(new BytesArray(bytes)), 2); + writer.add(ReleasableBytesReference.wrap(new BytesArray(bytes)), 3); + writer.add(ReleasableBytesReference.wrap(new BytesArray(bytes)), 4); + writer.sync(); + assertEquals(4, checkpointFsyncCalls.get()); + assertEquals(3, translogFsyncCalls.get()); + // Sequence numbers are marked as persisted after sync + assertThat(persistedSeqNos, contains(1L, 2L, 3L, 4L)); + } + } + public void testTranslogWriterDoesNotBlockAddsOnWrite() throws IOException, InterruptedException { Path tempDir = createTempDir(); final TranslogConfig config = getTranslogConfig(tempDir); diff --git a/server/src/test/java/org/opensearch/index/translog/RemoteFSTranslogTests.java b/server/src/test/java/org/opensearch/index/translog/RemoteFSTranslogTests.java index 30c04c731d1f8..04dc90cd9087d 100644 --- a/server/src/test/java/org/opensearch/index/translog/RemoteFSTranslogTests.java +++ b/server/src/test/java/org/opensearch/index/translog/RemoteFSTranslogTests.java @@ -182,6 +182,7 @@ private TranslogConfig getTranslogConfig(final Path path) { // only randomize between nog age retention and a long one, so failures will have a chance of reproducing .put(IndexSettings.INDEX_TRANSLOG_RETENTION_AGE_SETTING.getKey(), randomBoolean() ? "-1ms" : "1h") .put(IndexSettings.INDEX_TRANSLOG_RETENTION_SIZE_SETTING.getKey(), randomIntBetween(-1, 2048) + "b") + .put(IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_ENABLED, true) .put(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, true) .build(); return getTranslogConfig(path, settings); @@ -1261,6 +1262,95 @@ ChannelFactory getChannelFactory() { } } + public void testTranslogWriterFsyncDisabledInRemoteFsTranslog() throws IOException { + Path tempDir = createTempDir(); + final TranslogConfig temp = getTranslogConfig(tempDir); + final TranslogConfig config = new TranslogConfig( + temp.getShardId(), + temp.getTranslogPath(), + temp.getIndexSettings(), + temp.getBigArrays(), + new ByteSizeValue(1, ByteSizeUnit.KB) + ); + + final Set persistedSeqNos = new HashSet<>(); + final AtomicInteger translogFsyncCalls = new AtomicInteger(); + final AtomicInteger checkpointFsyncCalls = new AtomicInteger(); + + final ChannelFactory channelFactory = (file, openOption) -> { + FileChannel delegate = FileChannel.open(file, openOption); + boolean success = false; + try { + // don't do partial writes for checkpoints we rely on the fact that the bytes are written as an atomic operation + final boolean isCkpFile = file.getFileName().toString().endsWith(".ckp"); + + final FileChannel channel; + if (isCkpFile) { + channel = new FilterFileChannel(delegate) { + @Override + public void force(boolean metaData) throws IOException { + checkpointFsyncCalls.incrementAndGet(); + } + }; + } else { + channel = new FilterFileChannel(delegate) { + + @Override + public void force(boolean metaData) throws IOException { + translogFsyncCalls.incrementAndGet(); + } + }; + } + success = true; + return channel; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(delegate); + } + } + }; + + String translogUUID = Translog.createEmptyTranslog( + config.getTranslogPath(), + SequenceNumbers.NO_OPS_PERFORMED, + shardId, + channelFactory, + primaryTerm.get() + ); + + try ( + Translog translog = new RemoteFsTranslog( + config, + translogUUID, + new DefaultTranslogDeletionPolicy(-1, -1, 0), + () -> SequenceNumbers.NO_OPS_PERFORMED, + primaryTerm::get, + persistedSeqNos::add, + repository, + threadPool, + () -> Boolean.TRUE + ) { + @Override + ChannelFactory getChannelFactory() { + return channelFactory; + } + } + ) { + TranslogWriter writer = translog.getCurrent(); + byte[] bytes = new byte[256]; + writer.add(ReleasableBytesReference.wrap(new BytesArray(bytes)), 1); + writer.add(ReleasableBytesReference.wrap(new BytesArray(bytes)), 2); + writer.add(ReleasableBytesReference.wrap(new BytesArray(bytes)), 3); + writer.add(ReleasableBytesReference.wrap(new BytesArray(bytes)), 4); + writer.sync(); + // Fsync is still enabled during empty translog creation. + assertEquals(2, checkpointFsyncCalls.get()); + assertEquals(1, translogFsyncCalls.get()); + // Sequence numbers are marked as persisted after sync + assertThat(persistedSeqNos, contains(1L, 2L, 3L, 4L)); + } + } + public void testCloseIntoReader() throws IOException { try (TranslogWriter writer = translog.createWriter(translog.currentFileGeneration() + 1)) { final int numOps = randomIntBetween(8, 128); diff --git a/server/src/test/java/org/opensearch/index/translog/TranslogHeaderTests.java b/server/src/test/java/org/opensearch/index/translog/TranslogHeaderTests.java index 3569d596a5569..10b20ab207927 100644 --- a/server/src/test/java/org/opensearch/index/translog/TranslogHeaderTests.java +++ b/server/src/test/java/org/opensearch/index/translog/TranslogHeaderTests.java @@ -63,7 +63,7 @@ public void testCurrentHeaderVersion() throws Exception { final long generation = randomNonNegativeLong(); final Path translogFile = createTempDir().resolve(Translog.getFilename(generation)); try (FileChannel channel = FileChannel.open(translogFile, StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE)) { - outHeader.write(channel); + outHeader.write(channel, true); assertThat(outHeader.sizeInBytes(), equalTo((int) channel.position())); } try (FileChannel channel = FileChannel.open(translogFile, StandardOpenOption.READ)) { @@ -165,7 +165,7 @@ public void testCorruptTranslogHeader() throws Exception { final Path translogLocation = createTempDir(); final Path translogFile = translogLocation.resolve(Translog.getFilename(generation)); try (FileChannel channel = FileChannel.open(translogFile, StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE)) { - outHeader.write(channel); + outHeader.write(channel, true); assertThat(outHeader.sizeInBytes(), equalTo((int) channel.position())); } TestTranslog.corruptFile(logger, random(), translogFile, false); From a5752cb28d8705800b1118c89ecfda660b9543ce Mon Sep 17 00:00:00 2001 From: Bansi Kasundra <66969140+kasundra07@users.noreply.github.com> Date: Mon, 10 Jul 2023 21:59:05 -0700 Subject: [PATCH 23/29] =?UTF-8?q?[Snapshot=20Interop]=20Add=20Changes=20in?= =?UTF-8?q?=20Snapshot=20Status=20Flow=20for=20remote=20sto=E2=80=A6=20(#7?= =?UTF-8?q?495)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Bansi Kasundra --- .../blobstore/url/URLBlobContainer.java | 16 +- .../opensearch/snapshots/CloneSnapshotIT.java | 23 +-- .../snapshots/SnapshotStatusApisIT.java | 171 ++++++++++++++++++ .../BlobStoreIndexShardSnapshot.java | 16 +- .../blobstore/IndexShardSnapshot.java | 21 +++ .../RemoteStoreShardShallowCopySnapshot.java | 16 +- .../RemoteSnapshotDirectoryFactory.java | 6 +- .../blobstore/BlobStoreRepository.java | 63 +++---- .../AbstractSnapshotIntegTestCase.java | 7 + 9 files changed, 283 insertions(+), 56 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/snapshots/blobstore/IndexShardSnapshot.java diff --git a/modules/repository-url/src/main/java/org/opensearch/common/blobstore/url/URLBlobContainer.java b/modules/repository-url/src/main/java/org/opensearch/common/blobstore/url/URLBlobContainer.java index 1f966be98bdf7..b13a4d5a39a5b 100644 --- a/modules/repository-url/src/main/java/org/opensearch/common/blobstore/url/URLBlobContainer.java +++ b/modules/repository-url/src/main/java/org/opensearch/common/blobstore/url/URLBlobContainer.java @@ -83,12 +83,20 @@ public URL url() { } /** - * This operation is not supported by URLBlobContainer + * Tests whether a blob with the given blob name exists in the container. + * + * @param blobName + * The name of the blob whose existence is to be determined. + * @return {@code true} if a blob exists in the {@link BlobContainer} with the given name, and {@code false} otherwise. */ @Override - public boolean blobExists(String blobName) { - assert false : "should never be called for a read-only url repo"; - throw new UnsupportedOperationException("URL repository doesn't support this operation"); + public boolean blobExists(String blobName) throws IOException { + try { + readBlob(blobName); + return true; + } catch (FileNotFoundException e) { + return false; + } } /** diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotIT.java index ce92a15026b70..3de982f89ac80 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotIT.java @@ -43,6 +43,7 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.FeatureFlags; import org.opensearch.index.IndexNotFoundException; +import org.opensearch.index.snapshots.blobstore.IndexShardSnapshot; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.RepositoryData; import org.opensearch.snapshots.mockstore.MockRepository; @@ -832,18 +833,14 @@ private static BlobStoreIndexShardSnapshot readShardSnapshot( RepositoryShardId repositoryShardId, SnapshotId snapshotId ) { - return PlainActionFuture.get( - f -> repository.threadPool() - .generic() - .execute( - ActionRunnable.supply( - f, - () -> repository.loadShardSnapshot( - repository.shardContainer(repositoryShardId.index(), repositoryShardId.shardId()), - snapshotId - ) - ) - ) - ); + return PlainActionFuture.get(f -> repository.threadPool().generic().execute(ActionRunnable.supply(f, () -> { + IndexShardSnapshot indexShardSnapshot = repository.loadShardSnapshot( + repository.shardContainer(repositoryShardId.index(), repositoryShardId.shardId()), + snapshotId + ); + assert indexShardSnapshot instanceof BlobStoreIndexShardSnapshot + : "indexShardSnapshot should be an instance of BlobStoreIndexShardSnapshot"; + return (BlobStoreIndexShardSnapshot) indexShardSnapshot; + }))); } } diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/SnapshotStatusApisIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/SnapshotStatusApisIT.java index 72c64b56c19c1..c22dd90cc930b 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/SnapshotStatusApisIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/SnapshotStatusApisIT.java @@ -48,6 +48,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.ByteSizeUnit; import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.util.io.IOUtils; import org.opensearch.core.common.Strings; import org.opensearch.repositories.blobstore.BlobStoreRepository; @@ -76,6 +77,7 @@ protected Settings nodeSettings(int nodeOrdinal) { return Settings.builder() .put(super.nodeSettings(nodeOrdinal)) .put(ThreadPool.ESTIMATED_TIME_INTERVAL_SETTING.getKey(), 0) // We have tests that check by-timestamp order + .put(FeatureFlags.REMOTE_STORE, "true") .build(); } @@ -110,6 +112,61 @@ public void testStatusApiConsistency() { assertEquals(snStatus.getStats().getTime(), snapshotInfo.endTime() - snapshotInfo.startTime()); } + public void testStatusAPICallForShallowCopySnapshot() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used for the test"); + internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNode(); + + final String snapshotRepoName = "snapshot-repo-name"; + createRepository(snapshotRepoName, "fs", snapshotRepoSettingsForShallowCopy()); + + final Path remoteStoreRepoPath = randomRepoPath(); + final String remoteStoreRepoName = "remote-store-repo-name"; + createRepository(remoteStoreRepoName, "fs", remoteStoreRepoPath); + + final String indexName = "index-1"; + createIndex(indexName); + ensureGreen(); + logger.info("--> indexing some data"); + for (int i = 0; i < 100; i++) { + index(indexName, "_doc", Integer.toString(i), "foo", "bar" + i); + } + refresh(); + + final String remoteStoreEnabledIndexName = "remote-index-1"; + final Settings remoteStoreEnabledIndexSettings = getRemoteStoreBackedIndexSettings(remoteStoreRepoName); + createIndex(remoteStoreEnabledIndexName, remoteStoreEnabledIndexSettings); + ensureGreen(); + + logger.info("--> indexing some data"); + for (int i = 0; i < 100; i++) { + index(remoteStoreEnabledIndexName, "_doc", Integer.toString(i), "foo", "bar" + i); + } + refresh(); + + final String snapshot = "snapshot"; + createFullSnapshot(snapshotRepoName, snapshot); + assert (getLockFilesInRemoteStore(remoteStoreEnabledIndexName, remoteStoreRepoName).length == 1); + + final SnapshotStatus snapshotStatus = getSnapshotStatus(snapshotRepoName, snapshot); + assertThat(snapshotStatus.getState(), is(SnapshotsInProgress.State.SUCCESS)); + + final SnapshotIndexShardStatus snapshotShardState = stateFirstShard(snapshotStatus, indexName); + assertThat(snapshotShardState.getStage(), is(SnapshotIndexShardStage.DONE)); + assertThat(snapshotShardState.getStats().getTotalFileCount(), greaterThan(0)); + assertThat(snapshotShardState.getStats().getTotalSize(), greaterThan(0L)); + assertThat(snapshotShardState.getStats().getIncrementalFileCount(), greaterThan(0)); + assertThat(snapshotShardState.getStats().getIncrementalSize(), greaterThan(0L)); + + // Validating that the incremental file count and incremental file size is zero for shallow copy + final SnapshotIndexShardStatus shallowSnapshotShardState = stateFirstShard(snapshotStatus, remoteStoreEnabledIndexName); + assertThat(shallowSnapshotShardState.getStage(), is(SnapshotIndexShardStage.DONE)); + assertThat(shallowSnapshotShardState.getStats().getTotalFileCount(), greaterThan(0)); + assertThat(shallowSnapshotShardState.getStats().getTotalSize(), greaterThan(0L)); + assertThat(shallowSnapshotShardState.getStats().getIncrementalFileCount(), is(0)); + assertThat(shallowSnapshotShardState.getStats().getIncrementalSize(), is(0L)); + } + public void testStatusAPICallInProgressSnapshot() throws Exception { createRepository("test-repo", "mock", Settings.builder().put("location", randomRepoPath()).put("block_on_data", true)); @@ -188,6 +245,63 @@ public void testExceptionOnMissingShardLevelSnapBlob() throws IOException { ); } + public void testStatusAPIStatsForBackToBackShallowSnapshot() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used for the test"); + internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNode(); + + final String snapshotRepoName = "snapshot-repo-name"; + createRepository(snapshotRepoName, "fs", snapshotRepoSettingsForShallowCopy()); + + final Path remoteStoreRepoPath = randomRepoPath(); + final String remoteStoreRepoName = "remote-store-repo-name"; + createRepository(remoteStoreRepoName, "fs", remoteStoreRepoPath); + + final String indexName = "index-1"; + createIndex(indexName); + ensureGreen(); + logger.info("--> indexing some data"); + for (int i = 0; i < 100; i++) { + index(indexName, "_doc", Integer.toString(i), "foo", "bar" + i); + } + refresh(); + + final String remoteStoreEnabledIndexName = "remote-index-1"; + final Settings remoteStoreEnabledIndexSettings = getRemoteStoreBackedIndexSettings(remoteStoreRepoName); + createIndex(remoteStoreEnabledIndexName, remoteStoreEnabledIndexSettings); + ensureGreen(); + + logger.info("--> indexing some data"); + for (int i = 0; i < 100; i++) { + index(remoteStoreEnabledIndexName, "_doc", Integer.toString(i), "foo", "bar" + i); + } + refresh(); + + createFullSnapshot(snapshotRepoName, "test-snap-1"); + assert (getLockFilesInRemoteStore(remoteStoreEnabledIndexName, remoteStoreRepoName).length == 1); + + SnapshotStatus snapshotStatus = getSnapshotStatus(snapshotRepoName, "test-snap-1"); + assertThat(snapshotStatus.getState(), is(SnapshotsInProgress.State.SUCCESS)); + + SnapshotIndexShardStatus shallowSnapshotShardState = stateFirstShard(snapshotStatus, remoteStoreEnabledIndexName); + assertThat(shallowSnapshotShardState.getStage(), is(SnapshotIndexShardStage.DONE)); + final int totalFileCount = shallowSnapshotShardState.getStats().getTotalFileCount(); + final long totalSize = shallowSnapshotShardState.getStats().getTotalSize(); + final int incrementalFileCount = shallowSnapshotShardState.getStats().getIncrementalFileCount(); + final long incrementalSize = shallowSnapshotShardState.getStats().getIncrementalSize(); + + createFullSnapshot(snapshotRepoName, "test-snap-2"); + assert (getLockFilesInRemoteStore(remoteStoreEnabledIndexName, remoteStoreRepoName).length == 2); + + snapshotStatus = getSnapshotStatus(snapshotRepoName, "test-snap-2"); + assertThat(snapshotStatus.getState(), is(SnapshotsInProgress.State.SUCCESS)); + shallowSnapshotShardState = stateFirstShard(snapshotStatus, remoteStoreEnabledIndexName); + assertThat(shallowSnapshotShardState.getStats().getTotalFileCount(), equalTo(totalFileCount)); + assertThat(shallowSnapshotShardState.getStats().getTotalSize(), equalTo(totalSize)); + assertThat(shallowSnapshotShardState.getStats().getIncrementalFileCount(), equalTo(incrementalFileCount)); + assertThat(shallowSnapshotShardState.getStats().getIncrementalSize(), equalTo(incrementalSize)); + } + public void testGetSnapshotsWithoutIndices() throws Exception { createRepository("test-repo", "fs"); @@ -326,6 +440,63 @@ public void testSnapshotStatusOnFailedSnapshot() throws Exception { assertEquals(SnapshotsInProgress.State.FAILED, snapshotsStatusResponse.getSnapshots().get(0).getState()); } + public void testStatusAPICallInProgressShallowSnapshot() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used for the test"); + internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNode(); + + final String snapshotRepoName = "snapshot-repo-name"; + createRepository(snapshotRepoName, "mock", snapshotRepoSettingsForShallowCopy().put("block_on_data", true)); + + final Path remoteStoreRepoPath = randomRepoPath(); + final String remoteStoreRepoName = "remote-store-repo-name"; + createRepository(remoteStoreRepoName, "mock", remoteStoreRepoPath); + + final String indexName = "index-1"; + createIndex(indexName); + ensureGreen(); + logger.info("--> indexing some data"); + for (int i = 0; i < 100; i++) { + index(indexName, "_doc", Integer.toString(i), "foo", "bar" + i); + } + refresh(); + + final String remoteStoreEnabledIndexName = "remote-index-1"; + final Settings remoteStoreEnabledIndexSettings = getRemoteStoreBackedIndexSettings(remoteStoreRepoName); + createIndex(remoteStoreEnabledIndexName, remoteStoreEnabledIndexSettings); + ensureGreen(); + + logger.info("--> indexing some data"); + for (int i = 0; i < 100; i++) { + index(remoteStoreEnabledIndexName, "_doc", Integer.toString(i), "foo", "bar" + i); + } + refresh(); + + logger.info("--> snapshot"); + ActionFuture createSnapshotResponseActionFuture = startFullSnapshot(snapshotRepoName, "test-snap"); + + logger.info("--> wait for data nodes to get blocked"); + waitForBlockOnAnyDataNode(snapshotRepoName, TimeValue.timeValueMinutes(1)); + awaitNumberOfSnapshotsInProgress(1); + assertEquals( + SnapshotsInProgress.State.STARTED, + client().admin() + .cluster() + .prepareSnapshotStatus(snapshotRepoName) + .setSnapshots("test-snap") + .get() + .getSnapshots() + .get(0) + .getState() + ); + + logger.info("--> unblock all data nodes"); + unblockAllDataNodes(snapshotRepoName); + + logger.info("--> wait for snapshot to finish"); + createSnapshotResponseActionFuture.actionGet(); + } + public void testGetSnapshotsRequest() throws Exception { final String repositoryName = "test-repo"; final String indexName = "test-idx"; diff --git a/server/src/main/java/org/opensearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java b/server/src/main/java/org/opensearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java index 9d7dfa92a48eb..d295027a857cd 100644 --- a/server/src/main/java/org/opensearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java +++ b/server/src/main/java/org/opensearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java @@ -44,6 +44,7 @@ import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.common.xcontent.XContentParserUtils; +import org.opensearch.index.snapshots.IndexShardSnapshotStatus; import org.opensearch.index.store.StoreFileMetadata; import java.io.IOException; @@ -57,7 +58,7 @@ * * @opensearch.internal */ -public class BlobStoreIndexShardSnapshot implements ToXContentFragment { +public class BlobStoreIndexShardSnapshot implements ToXContentFragment, IndexShardSnapshot { /** * Information about snapshotted file @@ -592,4 +593,17 @@ public static BlobStoreIndexShardSnapshot fromXContent(XContentParser parser) th incrementalSize ); } + + @Override + public IndexShardSnapshotStatus getIndexShardSnapshotStatus() { + return IndexShardSnapshotStatus.newDone( + startTime, + time, + incrementalFileCount, + totalFileCount(), + incrementalSize, + totalSize(), + null + ); // Not adding a real generation here as it doesn't matter to callers + } } diff --git a/server/src/main/java/org/opensearch/index/snapshots/blobstore/IndexShardSnapshot.java b/server/src/main/java/org/opensearch/index/snapshots/blobstore/IndexShardSnapshot.java new file mode 100644 index 0000000000000..e79b9069ef16a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/snapshots/blobstore/IndexShardSnapshot.java @@ -0,0 +1,21 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.snapshots.blobstore; + +import org.opensearch.index.snapshots.IndexShardSnapshotStatus; + +/** + * Base interface for shard snapshot status + * + * @opensearch.internal + */ +@FunctionalInterface +public interface IndexShardSnapshot { + IndexShardSnapshotStatus getIndexShardSnapshotStatus(); +} diff --git a/server/src/main/java/org/opensearch/index/snapshots/blobstore/RemoteStoreShardShallowCopySnapshot.java b/server/src/main/java/org/opensearch/index/snapshots/blobstore/RemoteStoreShardShallowCopySnapshot.java index 8cb9fd3cd3c63..eefc1469a06a0 100644 --- a/server/src/main/java/org/opensearch/index/snapshots/blobstore/RemoteStoreShardShallowCopySnapshot.java +++ b/server/src/main/java/org/opensearch/index/snapshots/blobstore/RemoteStoreShardShallowCopySnapshot.java @@ -13,6 +13,7 @@ import org.opensearch.core.xcontent.ToXContentFragment; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.snapshots.IndexShardSnapshotStatus; import java.io.IOException; import java.util.ArrayList; @@ -23,7 +24,7 @@ * * @opensearch.internal */ -public class RemoteStoreShardShallowCopySnapshot implements ToXContentFragment { +public class RemoteStoreShardShallowCopySnapshot implements ToXContentFragment, IndexShardSnapshot { private final String snapshot; private final String version; @@ -433,4 +434,17 @@ public RemoteStoreShardShallowCopySnapshot asClone(String targetSnapshotName, lo fileNames ); } + + @Override + public IndexShardSnapshotStatus getIndexShardSnapshotStatus() { + return IndexShardSnapshotStatus.newDone( + startTime, + time, + incrementalFileCount(), + totalFileCount, + incrementalSize(), + totalSize, + null + ); // Not adding a real generation here as it doesn't matter to callers + } } diff --git a/server/src/main/java/org/opensearch/index/store/remote/directory/RemoteSnapshotDirectoryFactory.java b/server/src/main/java/org/opensearch/index/store/remote/directory/RemoteSnapshotDirectoryFactory.java index 2c7e66b9a121d..3238ffe45e0a6 100644 --- a/server/src/main/java/org/opensearch/index/store/remote/directory/RemoteSnapshotDirectoryFactory.java +++ b/server/src/main/java/org/opensearch/index/store/remote/directory/RemoteSnapshotDirectoryFactory.java @@ -22,6 +22,7 @@ import org.opensearch.index.IndexSettings; import org.opensearch.index.shard.ShardPath; import org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot; +import org.opensearch.index.snapshots.blobstore.IndexShardSnapshot; import org.opensearch.index.store.remote.filecache.FileCache; import org.opensearch.index.store.remote.utils.TransferManager; import org.opensearch.plugins.IndexStorePlugin; @@ -89,7 +90,10 @@ private Future createRemoteSnapshotDirectoryFromSnapsho // index restore is invoked return threadPool.executor(ThreadPool.Names.SNAPSHOT).submit(() -> { final BlobContainer blobContainer = blobStoreRepository.blobStore().blobContainer(blobPath); - final BlobStoreIndexShardSnapshot snapshot = blobStoreRepository.loadShardSnapshot(blobContainer, snapshotId); + final IndexShardSnapshot indexShardSnapshot = blobStoreRepository.loadShardSnapshot(blobContainer, snapshotId); + assert indexShardSnapshot instanceof BlobStoreIndexShardSnapshot + : "indexShardSnapshot should be an instance of BlobStoreIndexShardSnapshot"; + final BlobStoreIndexShardSnapshot snapshot = (BlobStoreIndexShardSnapshot) indexShardSnapshot; TransferManager transferManager = new TransferManager(blobContainer, remoteStoreFileCache); return new RemoteSnapshotDirectory(snapshot, localStoreDir, transferManager); }); diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java index be5fbf2ab6a51..3e77a7e796375 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java @@ -109,6 +109,7 @@ import org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot; import org.opensearch.index.snapshots.blobstore.RemoteStoreShardShallowCopySnapshot; import org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshots; +import org.opensearch.index.snapshots.blobstore.IndexShardSnapshot; import org.opensearch.index.snapshots.blobstore.RateLimitingInputStream; import org.opensearch.index.snapshots.blobstore.SlicedInputStream; import org.opensearch.index.snapshots.blobstore.SnapshotFiles; @@ -566,7 +567,10 @@ public void cloneShardSnapshot( // We don't need to check if there exists a shallow snapshot with the same name as we have the check before starting the clone // operation ensuring that the snapshot name is available by checking the repository data. Also, the new clone snapshot would // have a different UUID and hence a new unique snap-N file will be created. - final BlobStoreIndexShardSnapshot sourceMeta = loadShardSnapshot(shardContainer, source); + IndexShardSnapshot indexShardSnapshot = loadShardSnapshot(shardContainer, source); + assert indexShardSnapshot instanceof BlobStoreIndexShardSnapshot + : "indexShardSnapshot should be an instance of BlobStoreIndexShardSnapshot"; + final BlobStoreIndexShardSnapshot sourceMeta = (BlobStoreIndexShardSnapshot) indexShardSnapshot; logger.trace("[{}] [{}] writing shard snapshot file for clone", shardId, target); INDEX_SHARD_SNAPSHOT_FORMAT.write( sourceMeta.asClone(target.getName(), startTime, threadPool.absoluteTimeInMillis() - startTime), @@ -606,7 +610,10 @@ public void cloneRemoteStoreIndexShardSnapshot( // We don't need to check if there exists a shallow/full copy snapshot with the same name as we have the check before starting // the clone operation ensuring that the snapshot name is available by checking the repository data. Also, the new clone shallow // snapshot would have a different UUID and hence a new unique shallow-snap-N file will be created. - RemoteStoreShardShallowCopySnapshot remStoreBasedShardMetadata = loadShallowCopyShardSnapshot(shardContainer, source); + IndexShardSnapshot indexShardSnapshot = loadShardSnapshot(shardContainer, source); + assert indexShardSnapshot instanceof RemoteStoreShardShallowCopySnapshot + : "indexShardSnapshot should be an instance of RemoteStoreShardShallowCopySnapshot"; + RemoteStoreShardShallowCopySnapshot remStoreBasedShardMetadata = (RemoteStoreShardShallowCopySnapshot) indexShardSnapshot; String indexUUID = remStoreBasedShardMetadata.getIndexUUID(); String remoteStoreRepository = remStoreBasedShardMetadata.getRemoteStoreRepository(); RemoteStoreMetadataLockManager remoteStoreMetadataLockManger = remoteStoreLockManagerFactory.newLockManager( @@ -2697,7 +2704,10 @@ public void restoreShard( final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT); final BlobContainer container = shardContainer(indexId, snapshotShardId); executor.execute(ActionRunnable.wrap(restoreListener, l -> { - final BlobStoreIndexShardSnapshot snapshot = loadShardSnapshot(container, snapshotId); + IndexShardSnapshot indexShardSnapshot = loadShardSnapshot(container, snapshotId); + assert indexShardSnapshot instanceof BlobStoreIndexShardSnapshot + : "indexShardSnapshot should be an instance of BlobStoreIndexShardSnapshot"; + final BlobStoreIndexShardSnapshot snapshot = (BlobStoreIndexShardSnapshot) indexShardSnapshot; final SnapshotFiles snapshotFiles = new SnapshotFiles(snapshot.snapshot(), snapshot.indexFiles(), null); new FileRestoreContext(metadata.name(), shardId, snapshotId, recoveryState) { @Override @@ -2846,21 +2856,16 @@ public RemoteStoreShardShallowCopySnapshot getRemoteStoreShallowCopyShardMetadat ShardId snapshotShardId ) { final BlobContainer container = shardContainer(indexId, snapshotShardId); - return loadShallowCopyShardSnapshot(container, snapshotId); + IndexShardSnapshot indexShardSnapshot = loadShardSnapshot(container, snapshotId); + assert indexShardSnapshot instanceof RemoteStoreShardShallowCopySnapshot + : "indexShardSnapshot should be an instance of RemoteStoreShardShallowCopySnapshot"; + return (RemoteStoreShardShallowCopySnapshot) indexShardSnapshot; } @Override public IndexShardSnapshotStatus getShardSnapshotStatus(SnapshotId snapshotId, IndexId indexId, ShardId shardId) { - BlobStoreIndexShardSnapshot snapshot = loadShardSnapshot(shardContainer(indexId, shardId), snapshotId); - return IndexShardSnapshotStatus.newDone( - snapshot.startTime(), - snapshot.time(), - snapshot.incrementalFileCount(), - snapshot.totalFileCount(), - snapshot.incrementalSize(), - snapshot.totalSize(), - null - ); // Not adding a real generation here as it doesn't matter to callers + IndexShardSnapshot snapshot = loadShardSnapshot(shardContainer(indexId, shardId), snapshotId); + return snapshot.getIndexShardSnapshotStatus(); } @Override @@ -3019,32 +3024,18 @@ private static List unusedBlobs( .collect(Collectors.toList()); } - /** - * Loads information about remote store enabled shard snapshot for remote store interop enabled snapshots - */ - public RemoteStoreShardShallowCopySnapshot loadShallowCopyShardSnapshot(BlobContainer shardContainer, SnapshotId snapshotId) { - try { - return REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.read(shardContainer, snapshotId.getUUID(), namedXContentRegistry); - } catch (NoSuchFileException ex) { - throw new SnapshotMissingException(metadata.name(), snapshotId, ex); - } catch (IOException ex) { - throw new SnapshotException( - metadata.name(), - snapshotId, - "failed to read shard snapshot file for [" + shardContainer.path() + ']', - ex - ); - } - } - /** * Loads information about shard snapshot */ - public BlobStoreIndexShardSnapshot loadShardSnapshot(BlobContainer shardContainer, SnapshotId snapshotId) { + public IndexShardSnapshot loadShardSnapshot(BlobContainer shardContainer, SnapshotId snapshotId) { try { - return INDEX_SHARD_SNAPSHOT_FORMAT.read(shardContainer, snapshotId.getUUID(), namedXContentRegistry); - } catch (NoSuchFileException ex) { - throw new SnapshotMissingException(metadata.name(), snapshotId, ex); + if (shardContainer.blobExists(INDEX_SHARD_SNAPSHOT_FORMAT.blobName(snapshotId.getUUID()))) { + return INDEX_SHARD_SNAPSHOT_FORMAT.read(shardContainer, snapshotId.getUUID(), namedXContentRegistry); + } else if (shardContainer.blobExists(REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.blobName(snapshotId.getUUID()))) { + return REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.read(shardContainer, snapshotId.getUUID(), namedXContentRegistry); + } else { + throw new SnapshotMissingException(metadata.name(), snapshotId.getName()); + } } catch (IOException ex) { throw new SnapshotException( metadata.name(), diff --git a/test/framework/src/main/java/org/opensearch/snapshots/AbstractSnapshotIntegTestCase.java b/test/framework/src/main/java/org/opensearch/snapshots/AbstractSnapshotIntegTestCase.java index 83051c7fed4e4..de5a569bebaf9 100644 --- a/test/framework/src/main/java/org/opensearch/snapshots/AbstractSnapshotIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/snapshots/AbstractSnapshotIntegTestCase.java @@ -425,6 +425,13 @@ protected Settings.Builder randomRepositorySettings() { return settings; } + protected Settings.Builder snapshotRepoSettingsForShallowCopy() { + final Settings.Builder settings = Settings.builder(); + settings.put("location", randomRepoPath()); + settings.put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), Boolean.TRUE); + return settings; + } + protected static Settings.Builder indexSettingsNoReplicas(int shards) { return Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, shards).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0); } From 11f46994a6a0e4fa510fb878891446ccb007fbf4 Mon Sep 17 00:00:00 2001 From: Stephen Crawford <65832608+scrawfor99@users.noreply.github.com> Date: Tue, 11 Jul 2023 02:34:00 -0400 Subject: [PATCH 24/29] Manually bump net.minidev:json-smart from 2.4.11 to 2.5.0 (#8576) * Bump schema validator Signed-off-by: Stephen Crawford * update Json smart Signed-off-by: Stephen Crawford * Update changelog Signed-off-by: Stephen Crawford * Manually retry Signed-off-by: Stephen Crawford * Remove unneeded comment to trigger rerun Signed-off-by: Stephen Crawford * MOve line Signed-off-by: Stephen Crawford * Update changelog Signed-off-by: Stephen Crawford --------- Signed-off-by: Stephen Crawford Signed-off-by: Stephen Crawford <65832608+scrawfor99@users.noreply.github.com> --- CHANGELOG.md | 4 ++-- test/fixtures/hdfs-fixture/build.gradle | 2 +- .../src/main/java/hdfs/MiniHDFS.java | 24 +++++++++---------- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 724a75e4e0405..5130dd00845b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,8 +40,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `org.bouncycastle:bcprov-jdk15on` to `org.bouncycastle:bcprov-jdk15to18` version 1.75 ([#8247](https://github.com/opensearch-project/OpenSearch/pull/8247)) - Bump `org.bouncycastle:bcmail-jdk15on` to `org.bouncycastle:bcmail-jdk15to18` version 1.75 ([#8247](https://github.com/opensearch-project/OpenSearch/pull/8247)) - Bump `org.bouncycastle:bcpkix-jdk15on` to `org.bouncycastle:bcpkix-jdk15to18` version 1.75 ([#8247](https://github.com/opensearch-project/OpenSearch/pull/8247)) -- Bump `com.google.cloud:google-cloud-core-http` from 2.17.0 to 2.21.0 ([#8586](https://github.com/opensearch-project/OpenSearch/pull/8586)) - ### Changed - [CCR] Add getHistoryOperationsFromTranslog method to fetch the history snapshot from translogs ([#3948](https://github.com/opensearch-project/OpenSearch/pull/3948)) @@ -140,8 +138,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `org.apache.maven:maven-model` from 3.9.2 to 3.9.3 (#8403) - Bump `io.projectreactor.netty:reactor-netty` and `io.projectreactor.netty:reactor-netty-core` from 1.1.7 to 1.1.8 (#8405) - Bump `com.azure:azure-storage-blob` from 12.22.2 to 12.22.3 (#8572) +- Bump `net.minidev:json-smart` from 2.4.11 to 2.5.0 ([#8576](https://github.com/opensearch-project/OpenSearch/pull/8576)) - Bump `com.google.jimfs:jimfs` from 1.2 to 1.3.0 (#8577, #8571) - Bump `com.networknt:json-schema-validator` from 1.0.85 to 1.0.86 ([#8573](https://github.com/opensearch-project/OpenSearch/pull/8573)) +- Bump `com.google.cloud:google-cloud-core-http` from 2.17.0 to 2.21.0 ([#8586](https://github.com/opensearch-project/OpenSearch/pull/8586)) ### Changed - Replace jboss-annotations-api_1.2_spec with jakarta.annotation-api ([#7836](https://github.com/opensearch-project/OpenSearch/pull/7836)) diff --git a/test/fixtures/hdfs-fixture/build.gradle b/test/fixtures/hdfs-fixture/build.gradle index b1a87fe6c3112..b6dc8820a85c5 100644 --- a/test/fixtures/hdfs-fixture/build.gradle +++ b/test/fixtures/hdfs-fixture/build.gradle @@ -55,7 +55,7 @@ dependencies { api "com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:${versions.jackson}" api "com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}" api "com.fasterxml.woodstox:woodstox-core:${versions.woodstox}" - api 'net.minidev:json-smart:2.4.11' + api 'net.minidev:json-smart:2.5.0' api "org.mockito:mockito-core:${versions.mockito}" api "com.google.protobuf:protobuf-java:${versions.protobuf}" api "org.jetbrains.kotlin:kotlin-stdlib:${versions.kotlin}" diff --git a/test/fixtures/hdfs-fixture/src/main/java/hdfs/MiniHDFS.java b/test/fixtures/hdfs-fixture/src/main/java/hdfs/MiniHDFS.java index 5e111364b955e..99c62d1d0b707 100644 --- a/test/fixtures/hdfs-fixture/src/main/java/hdfs/MiniHDFS.java +++ b/test/fixtures/hdfs-fixture/src/main/java/hdfs/MiniHDFS.java @@ -32,6 +32,17 @@ package hdfs; +import java.io.File; +import java.lang.management.ManagementFactory; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; @@ -46,18 +57,6 @@ import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil; import org.apache.hadoop.security.UserGroupInformation; -import java.io.File; -import java.lang.management.ManagementFactory; -import java.net.URL; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.StandardCopyOption; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - /** * MiniHDFS test fixture. There is a CLI tool, but here we can * easily properly setup logging, avoid parsing JSON, etc. @@ -74,7 +73,6 @@ public static void main(String[] args) throws Exception { } boolean secure = args.length == 3; - // configure Paths Path baseDir = Paths.get(args[0]); // hadoop-home/, so logs will not complain if (System.getenv("HADOOP_HOME") == null) { From c677f355589ead8b5d9458ef75e82749dba44d76 Mon Sep 17 00:00:00 2001 From: Harish Bhakuni Date: Tue, 11 Jul 2023 00:45:22 -0700 Subject: [PATCH 25/29] [Snapshot Interop] Keep API parameters behind remote store experimental flag. (#8594) Signed-off-by: Harish Bhakuni --- .../restore/RestoreSnapshotRequest.java | 69 +++++++++++++------ .../repositories/RepositoriesService.java | 7 ++ .../opensearch/snapshots/RestoreService.java | 6 ++ 3 files changed, 61 insertions(+), 21 deletions(-) diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/restore/RestoreSnapshotRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/restore/RestoreSnapshotRequest.java index 9ab66d726854e..cddb338310c4f 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/restore/RestoreSnapshotRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/snapshots/restore/RestoreSnapshotRequest.java @@ -41,6 +41,7 @@ import org.opensearch.common.io.stream.StreamOutput; import org.opensearch.common.logging.DeprecationLogger; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.common.Strings; import org.opensearch.core.xcontent.ToXContentObject; import org.opensearch.core.xcontent.XContentBuilder; @@ -150,7 +151,7 @@ public RestoreSnapshotRequest(StreamInput in) throws IOException { if (in.getVersion().onOrAfter(Version.V_2_7_0)) { storageType = in.readEnum(StorageType.class); } - if (in.getVersion().onOrAfter(Version.V_2_9_0)) { + if (FeatureFlags.isEnabled(FeatureFlags.REMOTE_STORE) && in.getVersion().onOrAfter(Version.V_2_9_0)) { sourceRemoteStoreRepository = in.readOptionalString(); } } @@ -174,7 +175,7 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_7_0)) { out.writeEnum(storageType); } - if (out.getVersion().onOrAfter(Version.V_2_9_0)) { + if (FeatureFlags.isEnabled(FeatureFlags.REMOTE_STORE) && out.getVersion().onOrAfter(Version.V_2_9_0)) { out.writeOptionalString(sourceRemoteStoreRepository); } } @@ -614,6 +615,11 @@ public RestoreSnapshotRequest source(Map source) { } } else if (name.equals("source_remote_store_repository")) { + if (!FeatureFlags.isEnabled(FeatureFlags.REMOTE_STORE)) { + throw new IllegalArgumentException( + "Unsupported parameter " + name + ". Please enable remote store feature flag for this experimental feature" + ); + } if (entry.getValue() instanceof String) { setSourceRemoteStoreRepository((String) entry.getValue()); } else { @@ -664,7 +670,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (storageType != null) { storageType.toXContent(builder); } - if (sourceRemoteStoreRepository != null) { + if (FeatureFlags.isEnabled(FeatureFlags.REMOTE_STORE) && sourceRemoteStoreRepository != null) { builder.field("source_remote_store_repository", sourceRemoteStoreRepository); } builder.endObject(); @@ -681,7 +687,7 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; RestoreSnapshotRequest that = (RestoreSnapshotRequest) o; - return waitForCompletion == that.waitForCompletion + boolean equals = waitForCompletion == that.waitForCompletion && includeGlobalState == that.includeGlobalState && partial == that.partial && includeAliases == that.includeAliases @@ -694,27 +700,48 @@ public boolean equals(Object o) { && Objects.equals(indexSettings, that.indexSettings) && Arrays.equals(ignoreIndexSettings, that.ignoreIndexSettings) && Objects.equals(snapshotUuid, that.snapshotUuid) - && Objects.equals(storageType, that.storageType) - && Objects.equals(sourceRemoteStoreRepository, that.sourceRemoteStoreRepository); + && Objects.equals(storageType, that.storageType); + if (FeatureFlags.isEnabled(FeatureFlags.REMOTE_STORE)) { + equals = Objects.equals(sourceRemoteStoreRepository, that.sourceRemoteStoreRepository); + } + return equals; } @Override public int hashCode() { - int result = Objects.hash( - snapshot, - repository, - indicesOptions, - renamePattern, - renameReplacement, - waitForCompletion, - includeGlobalState, - partial, - includeAliases, - indexSettings, - snapshotUuid, - storageType, - sourceRemoteStoreRepository - ); + int result; + if (FeatureFlags.isEnabled(FeatureFlags.REMOTE_STORE)) { + result = Objects.hash( + snapshot, + repository, + indicesOptions, + renamePattern, + renameReplacement, + waitForCompletion, + includeGlobalState, + partial, + includeAliases, + indexSettings, + snapshotUuid, + storageType, + sourceRemoteStoreRepository + ); + } else { + result = Objects.hash( + snapshot, + repository, + indicesOptions, + renamePattern, + renameReplacement, + waitForCompletion, + includeGlobalState, + partial, + includeAliases, + indexSettings, + snapshotUuid, + storageType + ); + } result = 31 * result + Arrays.hashCode(indices); result = 31 * result + Arrays.hashCode(ignoreIndexSettings); return result; diff --git a/server/src/main/java/org/opensearch/repositories/RepositoriesService.java b/server/src/main/java/org/opensearch/repositories/RepositoriesService.java index 9c56d172f2ea1..e7f7a1d9c0554 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoriesService.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoriesService.java @@ -63,6 +63,7 @@ import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.util.concurrent.ConcurrentCollections; import org.opensearch.common.util.io.IOUtils; import org.opensearch.repositories.blobstore.MeteredBlobStoreRepository; @@ -627,6 +628,12 @@ public static void validateRepositoryMetadataSettings( + minVersionInCluster ); } + if (REMOTE_STORE_INDEX_SHALLOW_COPY.get(repositoryMetadataSettings) && !FeatureFlags.isEnabled(FeatureFlags.REMOTE_STORE)) { + throw new RepositoryException( + repositoryName, + "setting " + REMOTE_STORE_INDEX_SHALLOW_COPY.getKey() + " cannot be enabled, as remote store feature is not enabled." + ); + } } private static void ensureRepositoryNotInUse(ClusterState clusterState, String repository) { diff --git a/server/src/main/java/org/opensearch/snapshots/RestoreService.java b/server/src/main/java/org/opensearch/snapshots/RestoreService.java index ebd0e59599c21..7df24e5357555 100644 --- a/server/src/main/java/org/opensearch/snapshots/RestoreService.java +++ b/server/src/main/java/org/opensearch/snapshots/RestoreService.java @@ -453,6 +453,12 @@ public ClusterState execute(ClusterState currentState) { final boolean isRemoteStoreShallowCopy = Boolean.TRUE.equals( snapshotInfo.isRemoteStoreIndexShallowCopyEnabled() ) && metadata.index(index).getSettings().getAsBoolean(SETTING_REMOTE_STORE_ENABLED, false); + if (isSearchableSnapshot && isRemoteStoreShallowCopy) { + throw new SnapshotRestoreException( + snapshot, + "Shallow copy snapshot cannot be restored as searchable snapshot." + ); + } if (isRemoteStoreShallowCopy && !currentState.getNodes().getMinNodeVersion().onOrAfter(Version.V_2_9_0)) { throw new SnapshotRestoreException( snapshot, From a430e18e82f7671e01c37ae5a549fc36dd673b1a Mon Sep 17 00:00:00 2001 From: Bansi Kasundra <66969140+kasundra07@users.noreply.github.com> Date: Tue, 11 Jul 2023 01:02:32 -0700 Subject: [PATCH 26/29] Add Changes in Snapshot Delete Flow for remote store interoperability. (#7497) Signed-off-by: Bansi Kasundra --- .../snapshots/DeleteSnapshotIT.java | 305 ++++++++++++++++++ .../TransportCleanupRepositoryAction.java | 5 + .../opensearch/repositories/Repository.java | 19 ++ .../blobstore/BlobStoreRepository.java | 273 ++++++++++++---- .../snapshots/SnapshotsService.java | 24 +- 5 files changed, 569 insertions(+), 57 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotIT.java diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotIT.java new file mode 100644 index 0000000000000..2688449294f3d --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/DeleteSnapshotIT.java @@ -0,0 +1,305 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.snapshots; + +import org.opensearch.action.ActionFuture; +import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; +import org.opensearch.client.Client; +import org.opensearch.common.UUIDs; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.test.FeatureFlagSetter; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import java.util.stream.Stream; + +import static org.hamcrest.Matchers.is; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class DeleteSnapshotIT extends AbstractSnapshotIntegTestCase { + + public void testDeleteSnapshot() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used in the test"); + FeatureFlagSetter.set(FeatureFlags.REMOTE_STORE); + internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNode(); + + final String snapshotRepoName = "snapshot-repo-name"; + final Path snapshotRepoPath = randomRepoPath(); + createRepository(snapshotRepoName, "fs", snapshotRepoPath); + + final Path remoteStoreRepoPath = randomRepoPath(); + final String remoteStoreRepoName = "remote-store-repo-name"; + createRepository(remoteStoreRepoName, "fs", remoteStoreRepoPath); + + final String indexName = "index-1"; + createIndexWithRandomDocs(indexName, randomIntBetween(5, 10)); + + final String remoteStoreEnabledIndexName = "remote-index-1"; + final Settings remoteStoreEnabledIndexSettings = getRemoteStoreBackedIndexSettings(remoteStoreRepoName); + createIndex(remoteStoreEnabledIndexName, remoteStoreEnabledIndexSettings); + indexRandomDocs(remoteStoreEnabledIndexName, randomIntBetween(5, 10)); + + final String snapshot = "snapshot"; + createFullSnapshot(snapshotRepoName, snapshot); + assert (getLockFilesInRemoteStore(remoteStoreEnabledIndexName, remoteStoreRepoName).length == 0); + assert (getRepositoryData(snapshotRepoName).getSnapshotIds().size() == 1); + + assertAcked(startDeleteSnapshot(snapshotRepoName, snapshot).get()); + assert (getRepositoryData(snapshotRepoName).getSnapshotIds().size() == 0); + } + + public void testDeleteShallowCopySnapshot() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used in the test"); + FeatureFlagSetter.set(FeatureFlags.REMOTE_STORE); + internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNode(); + + final String snapshotRepoName = "snapshot-repo-name"; + createRepository(snapshotRepoName, "fs", snapshotRepoSettingsForShallowCopy()); + + final Path remoteStoreRepoPath = randomRepoPath(); + final String remoteStoreRepoName = "remote-store-repo-name"; + createRepository(remoteStoreRepoName, "fs", remoteStoreRepoPath); + + final String indexName = "index-1"; + createIndexWithRandomDocs(indexName, randomIntBetween(5, 10)); + + final String remoteStoreEnabledIndexName = "remote-index-1"; + final Settings remoteStoreEnabledIndexSettings = getRemoteStoreBackedIndexSettings(remoteStoreRepoName); + createIndex(remoteStoreEnabledIndexName, remoteStoreEnabledIndexSettings); + indexRandomDocs(remoteStoreEnabledIndexName, randomIntBetween(5, 10)); + + final String shallowSnapshot = "shallow-snapshot"; + createFullSnapshot(snapshotRepoName, shallowSnapshot); + assert (getLockFilesInRemoteStore(remoteStoreEnabledIndexName, remoteStoreRepoName).length == 1); + assert (getRepositoryData(snapshotRepoName).getSnapshotIds().size() == 1); + + assertAcked(startDeleteSnapshot(snapshotRepoName, shallowSnapshot).get()); + assert (getRepositoryData(snapshotRepoName).getSnapshotIds().size() == 0); + assert (getLockFilesInRemoteStore(remoteStoreEnabledIndexName, remoteStoreRepoName).length == 0); + } + + // Deleting multiple shallow copy snapshots as part of single delete call with repo having only shallow copy snapshots. + public void testDeleteMultipleShallowCopySnapshotsCase1() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used in the test"); + FeatureFlagSetter.set(FeatureFlags.REMOTE_STORE); + + internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNode(); + final Client clusterManagerClient = internalCluster().clusterManagerClient(); + ensureStableCluster(2); + + final String snapshotRepoName = "snapshot-repo-name"; + final Path snapshotRepoPath = randomRepoPath(); + createRepository(snapshotRepoName, "mock", snapshotRepoSettingsForShallowCopy(snapshotRepoPath)); + final String testIndex = "index-test"; + createIndexWithContent(testIndex); + + final Path remoteStoreRepoPath = randomRepoPath(); + final String remoteStoreRepoName = "remote-store-repo-name"; + createRepository(remoteStoreRepoName, "fs", remoteStoreRepoPath); + + final String remoteStoreEnabledIndexName = "remote-index-1"; + final Settings remoteStoreEnabledIndexSettings = getRemoteStoreBackedIndexSettings(remoteStoreRepoName); + createIndex(remoteStoreEnabledIndexName, remoteStoreEnabledIndexSettings); + indexRandomDocs(remoteStoreEnabledIndexName, randomIntBetween(5, 10)); + + // Creating some shallow copy snapshots + int totalShallowCopySnapshotsCount = randomIntBetween(4, 10); + List shallowCopySnapshots = createNSnapshots(snapshotRepoName, totalShallowCopySnapshotsCount); + List snapshotsToBeDeleted = shallowCopySnapshots.subList(0, randomIntBetween(2, totalShallowCopySnapshotsCount)); + int tobeDeletedSnapshotsCount = snapshotsToBeDeleted.size(); + assert (getLockFilesInRemoteStore(remoteStoreEnabledIndexName, remoteStoreRepoName).length == totalShallowCopySnapshotsCount); + assert (getRepositoryData(snapshotRepoName).getSnapshotIds().size() == totalShallowCopySnapshotsCount); + // Deleting subset of shallow copy snapshots + assertAcked( + clusterManagerClient.admin() + .cluster() + .prepareDeleteSnapshot(snapshotRepoName, snapshotsToBeDeleted.toArray(new String[0])) + .get() + ); + assert (getRepositoryData(snapshotRepoName).getSnapshotIds().size() == totalShallowCopySnapshotsCount - tobeDeletedSnapshotsCount); + assert (getLockFilesInRemoteStore(remoteStoreEnabledIndexName, remoteStoreRepoName).length == totalShallowCopySnapshotsCount + - tobeDeletedSnapshotsCount); + } + + // Deleting multiple shallow copy snapshots as part of single delete call with both partial and full copy snapshot present in the repo + // And then deleting multiple full copy snapshots as part of single delete call with both partial and shallow copy snapshots present in + // the repo + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/8610") + public void testDeleteMultipleShallowCopySnapshotsCase2() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used in the test"); + FeatureFlagSetter.set(FeatureFlags.REMOTE_STORE); + + internalCluster().startClusterManagerOnlyNode(); + final String dataNode = internalCluster().startDataOnlyNode(); + ensureStableCluster(2); + final String clusterManagerNode = internalCluster().getClusterManagerName(); + + final String snapshotRepoName = "snapshot-repo-name"; + final Path snapshotRepoPath = randomRepoPath(); + createRepository(snapshotRepoName, "mock", snapshotRepoSettingsForShallowCopy(snapshotRepoPath)); + final String testIndex = "index-test"; + createIndexWithContent(testIndex); + + final Path remoteStoreRepoPath = randomRepoPath(); + final String remoteStoreRepoName = "remote-store-repo-name"; + createRepository(remoteStoreRepoName, "fs", remoteStoreRepoPath); + + final String remoteStoreEnabledIndexName = "remote-index-1"; + final Settings remoteStoreEnabledIndexSettings = getRemoteStoreBackedIndexSettings(remoteStoreRepoName); + createIndex(remoteStoreEnabledIndexName, remoteStoreEnabledIndexSettings); + indexRandomDocs(remoteStoreEnabledIndexName, randomIntBetween(5, 10)); + + // Creating a partial shallow copy snapshot + final String snapshot = "snapshot"; + blockNodeWithIndex(snapshotRepoName, testIndex); + blockDataNode(snapshotRepoName, dataNode); + + final Client clusterManagerClient = internalCluster().clusterManagerClient(); + final ActionFuture snapshotFuture = clusterManagerClient.admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, snapshot) + .setWaitForCompletion(true) + .execute(); + + awaitNumberOfSnapshotsInProgress(1); + waitForBlock(dataNode, snapshotRepoName, TimeValue.timeValueSeconds(30L)); + internalCluster().restartNode(dataNode); + assertThat(snapshotFuture.get().getSnapshotInfo().state(), is(SnapshotState.PARTIAL)); + + unblockAllDataNodes(snapshotRepoName); + + ensureStableCluster(2, clusterManagerNode); + + // Creating some shallow copy snapshots + int totalShallowCopySnapshotsCount = randomIntBetween(4, 10); + List shallowCopySnapshots = createNSnapshots(snapshotRepoName, totalShallowCopySnapshotsCount); + List shallowCopySnapshotsToBeDeleted = shallowCopySnapshots.subList(0, randomIntBetween(2, totalShallowCopySnapshotsCount)); + int tobeDeletedShallowCopySnapshotsCount = shallowCopySnapshotsToBeDeleted.size(); + totalShallowCopySnapshotsCount += 1; // Adding partial shallow snapshot here + // Updating the snapshot repository flag to disable shallow snapshots + createRepository(snapshotRepoName, "mock", snapshotRepoPath); + // Creating some full copy snapshots + int totalFullCopySnapshotsCount = randomIntBetween(4, 10); + List fullCopySnapshots = createNSnapshots(snapshotRepoName, totalFullCopySnapshotsCount); + List fullCopySnapshotsToBeDeleted = fullCopySnapshots.subList(0, randomIntBetween(2, totalFullCopySnapshotsCount)); + int tobeDeletedFullCopySnapshotsCount = fullCopySnapshotsToBeDeleted.size(); + + int totalSnapshotsCount = totalFullCopySnapshotsCount + totalShallowCopySnapshotsCount; + + assert (getLockFilesInRemoteStore(remoteStoreEnabledIndexName, remoteStoreRepoName).length == totalShallowCopySnapshotsCount); + assert (getRepositoryData(snapshotRepoName).getSnapshotIds().size() == totalSnapshotsCount); + // Deleting subset of shallow copy snapshots + assertAcked( + clusterManagerClient.admin() + .cluster() + .prepareDeleteSnapshot(snapshotRepoName, shallowCopySnapshotsToBeDeleted.toArray(new String[0])) + .get() + ); + totalSnapshotsCount -= tobeDeletedShallowCopySnapshotsCount; + totalShallowCopySnapshotsCount -= tobeDeletedShallowCopySnapshotsCount; + assert (getRepositoryData(snapshotRepoName).getSnapshotIds().size() == totalSnapshotsCount); + assert (getLockFilesInRemoteStore(remoteStoreEnabledIndexName, remoteStoreRepoName).length == totalShallowCopySnapshotsCount); + + // Deleting subset of full copy snapshots + assertAcked( + clusterManagerClient.admin() + .cluster() + .prepareDeleteSnapshot(snapshotRepoName, fullCopySnapshotsToBeDeleted.toArray(new String[0])) + .get() + ); + totalSnapshotsCount -= tobeDeletedFullCopySnapshotsCount; + assert (getRepositoryData(snapshotRepoName).getSnapshotIds().size() == totalSnapshotsCount); + assert (getLockFilesInRemoteStore(remoteStoreEnabledIndexName, remoteStoreRepoName).length == totalShallowCopySnapshotsCount); + } + + // Deleting subset of shallow and full copy snapshots as part of single delete call and then deleting all snapshots in the repo. + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/8610") + public void testDeleteMultipleShallowCopySnapshotsCase3() throws Exception { + disableRepoConsistencyCheck("Remote store repository is being used in the test"); + FeatureFlagSetter.set(FeatureFlags.REMOTE_STORE); + + internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNode(); + final Client clusterManagerClient = internalCluster().clusterManagerClient(); + ensureStableCluster(2); + + final String snapshotRepoName = "snapshot-repo-name"; + final Path snapshotRepoPath = randomRepoPath(); + createRepository(snapshotRepoName, "mock", snapshotRepoSettingsForShallowCopy(snapshotRepoPath)); + final String testIndex = "index-test"; + createIndexWithContent(testIndex); + + final Path remoteStoreRepoPath = randomRepoPath(); + final String remoteStoreRepoName = "remote-store-repo-name"; + createRepository(remoteStoreRepoName, "fs", remoteStoreRepoPath); + + final String remoteStoreEnabledIndexName = "remote-index-1"; + final Settings remoteStoreEnabledIndexSettings = getRemoteStoreBackedIndexSettings(remoteStoreRepoName); + createIndex(remoteStoreEnabledIndexName, remoteStoreEnabledIndexSettings); + indexRandomDocs(remoteStoreEnabledIndexName, randomIntBetween(5, 10)); + + // Creating some shallow copy snapshots + int totalShallowCopySnapshotsCount = randomIntBetween(4, 10); + List shallowCopySnapshots = createNSnapshots(snapshotRepoName, totalShallowCopySnapshotsCount); + List shallowCopySnapshotsToBeDeleted = shallowCopySnapshots.subList(0, randomIntBetween(2, totalShallowCopySnapshotsCount)); + int tobeDeletedShallowCopySnapshotsCount = shallowCopySnapshotsToBeDeleted.size(); + // Updating the snapshot repository flag to disable shallow snapshots + createRepository(snapshotRepoName, "mock", snapshotRepoPath); + // Creating some full copy snapshots + int totalFullCopySnapshotsCount = randomIntBetween(4, 10); + List fullCopySnapshots = createNSnapshots(snapshotRepoName, totalFullCopySnapshotsCount); + List fullCopySnapshotsToBeDeleted = fullCopySnapshots.subList(0, randomIntBetween(2, totalFullCopySnapshotsCount)); + int tobeDeletedFullCopySnapshotsCount = fullCopySnapshotsToBeDeleted.size(); + + int totalSnapshotsCount = totalFullCopySnapshotsCount + totalShallowCopySnapshotsCount; + + assert (getLockFilesInRemoteStore(remoteStoreEnabledIndexName, remoteStoreRepoName).length == totalShallowCopySnapshotsCount); + assert (getRepositoryData(snapshotRepoName).getSnapshotIds().size() == totalSnapshotsCount); + // Deleting subset of shallow copy snapshots and full copy snapshots + assertAcked( + clusterManagerClient.admin() + .cluster() + .prepareDeleteSnapshot( + snapshotRepoName, + Stream.concat(shallowCopySnapshotsToBeDeleted.stream(), fullCopySnapshotsToBeDeleted.stream()).toArray(String[]::new) + ) + .get() + ); + totalSnapshotsCount -= (tobeDeletedShallowCopySnapshotsCount + tobeDeletedFullCopySnapshotsCount); + totalShallowCopySnapshotsCount -= tobeDeletedShallowCopySnapshotsCount; + assert (getRepositoryData(snapshotRepoName).getSnapshotIds().size() == totalSnapshotsCount); + assert (getLockFilesInRemoteStore(remoteStoreEnabledIndexName, remoteStoreRepoName).length == totalShallowCopySnapshotsCount); + + // Deleting all the remaining snapshots + assertAcked(clusterManagerClient.admin().cluster().prepareDeleteSnapshot(snapshotRepoName, "*").get()); + assert (getRepositoryData(snapshotRepoName).getSnapshotIds().size() == 0); + assert (getLockFilesInRemoteStore(remoteStoreEnabledIndexName, remoteStoreRepoName).length == 0); + } + + private List createNSnapshots(String repoName, int count) { + final List snapshotNames = new ArrayList<>(count); + final String prefix = "snap-" + UUIDs.randomBase64UUID(random()).toLowerCase(Locale.ROOT) + "-"; + for (int i = 0; i < count; i++) { + final String name = prefix + i; + createFullSnapshot(repoName, name); + snapshotNames.add(name); + } + logger.info("--> created {} in [{}]", snapshotNames, repoName); + return snapshotNames; + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/repositories/cleanup/TransportCleanupRepositoryAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/repositories/cleanup/TransportCleanupRepositoryAction.java index 07b918e427784..ec02173ff2807 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/repositories/cleanup/TransportCleanupRepositoryAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/repositories/cleanup/TransportCleanupRepositoryAction.java @@ -52,6 +52,7 @@ import org.opensearch.common.Nullable; import org.opensearch.common.inject.Inject; import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.index.store.lockmanager.RemoteStoreLockManagerFactory; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.Repository; import org.opensearch.repositories.RepositoryCleanupResult; @@ -93,6 +94,8 @@ public final class TransportCleanupRepositoryAction extends TransportClusterMana private final SnapshotsService snapshotsService; + private final RemoteStoreLockManagerFactory remoteStoreLockManagerFactory; + @Override protected String executor() { return ThreadPool.Names.SAME; @@ -119,6 +122,7 @@ public TransportCleanupRepositoryAction( ); this.repositoriesService = repositoriesService; this.snapshotsService = snapshotsService; + this.remoteStoreLockManagerFactory = new RemoteStoreLockManagerFactory(() -> repositoriesService); // We add a state applier that will remove any dangling repository cleanup actions on cluster-manager failover. // This is safe to do since cleanups will increment the repository state id before executing any operations to prevent concurrent // operations from corrupting the repository. This is the same safety mechanism used by snapshot deletes. @@ -267,6 +271,7 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS l -> blobStoreRepository.cleanup( repositoryStateId, snapshotsService.minCompatibleVersion(newState.nodes().getMinNodeVersion(), repositoryData, null), + remoteStoreLockManagerFactory, ActionListener.wrap(result -> after(null, result), e -> after(e, null)) ) ) diff --git a/server/src/main/java/org/opensearch/repositories/Repository.java b/server/src/main/java/org/opensearch/repositories/Repository.java index 793b3d317e1bd..4367b4c65851e 100644 --- a/server/src/main/java/org/opensearch/repositories/Repository.java +++ b/server/src/main/java/org/opensearch/repositories/Repository.java @@ -169,6 +169,25 @@ void deleteSnapshots( ActionListener listener ); + /** + * Deletes snapshots and releases respective lock files from remote store repository. + * + * @param snapshotIds snapshot ids + * @param repositoryStateId the unique id identifying the state of the repository when the snapshot deletion began + * @param repositoryMetaVersion version of the updated repository metadata to write + * @param remoteStoreLockManagerFactory RemoteStoreLockManagerFactory to be used for cleaning up remote store lock files + * @param listener completion listener + */ + default void deleteSnapshotsAndReleaseLockFiles( + Collection snapshotIds, + long repositoryStateId, + Version repositoryMetaVersion, + RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, + ActionListener listener + ) { + throw new UnsupportedOperationException(); + } + /** * Returns snapshot throttle time in nanoseconds */ diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java index 3e77a7e796375..398d78392ae17 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java @@ -797,11 +797,11 @@ public RepositoryStats stats() { return new RepositoryStats(store.stats()); } - @Override - public void deleteSnapshots( + public void deleteSnapshotsAndReleaseLockFiles( Collection snapshotIds, long repositoryStateId, Version repositoryMetaVersion, + RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, ActionListener listener ) { if (isReadOnly()) { @@ -822,6 +822,7 @@ protected void doRun() throws Exception { rootBlobs, repositoryData, repositoryMetaVersion, + remoteStoreLockManagerFactory, listener ); } @@ -834,6 +835,22 @@ public void onFailure(Exception e) { } } + @Override + public void deleteSnapshots( + Collection snapshotIds, + long repositoryStateId, + Version repositoryMetaVersion, + ActionListener listener + ) { + deleteSnapshotsAndReleaseLockFiles( + snapshotIds, + repositoryStateId, + repositoryMetaVersion, + null, // Passing null since no remote store lock files need to be cleaned up. + listener + ); + } + /** * Loads {@link RepositoryData} ensuring that it is consistent with the given {@code rootBlobs} as well of the assumed generation. * @@ -883,16 +900,18 @@ private RepositoryData safeRepositoryData(long repositoryStateId, Map snapshotIds, @@ -901,11 +920,18 @@ private void doDeleteShardSnapshots( Map rootBlobs, RepositoryData repositoryData, Version repoMetaVersion, + RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, ActionListener listener ) { // First write the new shard state metadata (with the removed snapshot) and compute deletion targets final StepListener> writeShardMetaDataAndComputeDeletesStep = new StepListener<>(); - writeUpdatedShardMetaDataAndComputeDeletes(snapshotIds, repositoryData, true, writeShardMetaDataAndComputeDeletesStep); + writeUpdatedShardMetaDataAndComputeDeletes( + snapshotIds, + repositoryData, + true, + remoteStoreLockManagerFactory, + writeShardMetaDataAndComputeDeletesStep + ); // Once we have put the new shard-level metadata into place, we can update the repository metadata as follows: // 1. Remove the snapshots from the list of existing snapshots // 2. Update the index shard generations of all updated shard folders @@ -935,11 +961,19 @@ private void doDeleteShardSnapshots( ActionListener.wrap(() -> listener.onResponse(updatedRepoData)), 2 ); - cleanupUnlinkedRootAndIndicesBlobs(snapshotIds, foundIndices, rootBlobs, updatedRepoData, afterCleanupsListener); + cleanupUnlinkedRootAndIndicesBlobs( + snapshotIds, + foundIndices, + rootBlobs, + updatedRepoData, + remoteStoreLockManagerFactory, + afterCleanupsListener + ); asyncCleanupUnlinkedShardLevelBlobs( repositoryData, snapshotIds, writeShardMetaDataAndComputeDeletesStep.result(), + remoteStoreLockManagerFactory, afterCleanupsListener ); }, listener::onFailure); @@ -950,15 +984,24 @@ private void cleanupUnlinkedRootAndIndicesBlobs( Map foundIndices, Map rootBlobs, RepositoryData updatedRepoData, + RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, ActionListener listener ) { - cleanupStaleBlobs(deletedSnapshots, foundIndices, rootBlobs, updatedRepoData, ActionListener.map(listener, ignored -> null)); + cleanupStaleBlobs( + deletedSnapshots, + foundIndices, + rootBlobs, + updatedRepoData, + remoteStoreLockManagerFactory, + ActionListener.map(listener, ignored -> null) + ); } private void asyncCleanupUnlinkedShardLevelBlobs( RepositoryData oldRepositoryData, Collection snapshotIds, Collection deleteResults, + RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, ActionListener listener ) { final List filesToDelete = resolveFilesToDelete(oldRepositoryData, snapshotIds, deleteResults); @@ -982,7 +1025,7 @@ private void asyncCleanupUnlinkedShardLevelBlobs( // Start as many workers as fit into the snapshot pool at once at the most final int workers = Math.min(threadPool.info(ThreadPool.Names.SNAPSHOT).getMax(), staleFilesToDeleteInBatch.size()); for (int i = 0; i < workers; ++i) { - executeStaleShardDelete(staleFilesToDeleteInBatch, groupedListener); + executeStaleShardDelete(staleFilesToDeleteInBatch, remoteStoreLockManagerFactory, groupedListener); } } catch (Exception e) { @@ -995,12 +1038,49 @@ private void asyncCleanupUnlinkedShardLevelBlobs( } } - private void executeStaleShardDelete(BlockingQueue> staleFilesToDeleteInBatch, GroupedActionListener listener) - throws InterruptedException { + // When remoteStoreLockManagerFactory is non-null, while deleting the files, lock files are also released before deletion of respective + // shallow-snap-UUID files. And if it is null, we just delete the stale shard blobs. + private void executeStaleShardDelete( + BlockingQueue> staleFilesToDeleteInBatch, + RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, + GroupedActionListener listener + ) throws InterruptedException { List filesToDelete = staleFilesToDeleteInBatch.poll(0L, TimeUnit.MILLISECONDS); if (filesToDelete != null) { threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.wrap(listener, l -> { try { + if (remoteStoreLockManagerFactory != null) { + for (String fileToDelete : filesToDelete) { + if (fileToDelete.contains(SHALLOW_SNAPSHOT_PREFIX)) { + String[] fileToDeletePath = fileToDelete.split("/"); + String indexId = fileToDeletePath[1]; + String shardId = fileToDeletePath[2]; + String shallowSnapBlob = fileToDeletePath[3]; + String snapshotUUID = shallowSnapBlob.substring( + SHALLOW_SNAPSHOT_PREFIX.length(), + shallowSnapBlob.length() - ".dat".length() + ); + BlobContainer shardContainer = blobStore().blobContainer(indicesPath().add(indexId).add(shardId)); + RemoteStoreShardShallowCopySnapshot remoteStoreShardShallowCopySnapshot = + REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.read( + shardContainer, + snapshotUUID, + namedXContentRegistry + ); + String indexUUID = remoteStoreShardShallowCopySnapshot.getIndexUUID(); + String remoteStoreRepoForIndex = remoteStoreShardShallowCopySnapshot.getRemoteStoreRepository(); + // Releasing lock file before deleting the shallow-snap-UUID file because in case of any failure while + // releasing the lock file, we would still have the shallow-snap-UUID file and that would be used during + // next delete operation for releasing this lock file + RemoteStoreMetadataLockManager remoteStoreMetadataLockManager = remoteStoreLockManagerFactory + .newLockManager(remoteStoreRepoForIndex, indexUUID, shardId); + remoteStoreMetadataLockManager.release( + FileLockInfo.getLockInfoBuilder().withAcquirerId(snapshotUUID).build() + ); + } + } + } + // Deleting the shard blobs deleteFromContainer(blobContainer(), filesToDelete); l.onResponse(null); } catch (Exception e) { @@ -1014,7 +1094,7 @@ private void executeStaleShardDelete(BlockingQueue> staleFilesToDel ); l.onFailure(e); } - executeStaleShardDelete(staleFilesToDeleteInBatch, listener); + executeStaleShardDelete(staleFilesToDeleteInBatch, remoteStoreLockManagerFactory, listener); })); } } @@ -1024,6 +1104,7 @@ private void writeUpdatedShardMetaDataAndComputeDeletes( Collection snapshotIds, RepositoryData oldRepositoryData, boolean useUUIDs, + RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, ActionListener> onAllShardsCompleted ) { @@ -1096,17 +1177,30 @@ protected void doRun() throws Exception { final Set blobs = shardContainer.listBlobs().keySet(); final BlobStoreIndexShardSnapshots blobStoreIndexShardSnapshots; final long newGen; - if (useUUIDs) { - newGen = -1L; - blobStoreIndexShardSnapshots = buildBlobStoreIndexShardSnapshots( - blobs, - shardContainer, - oldRepositoryData.shardGenerations().getShardGen(indexId, finalShardId) - ).v1(); + + // Index-N file would be present if snapshots other than shallow snapshots are present for this shard + if (blobs.stream() + .filter(blob -> blob.startsWith(SNAPSHOT_INDEX_PREFIX)) + .collect(Collectors.toSet()) + .size() > 0) { + if (useUUIDs) { + newGen = -1L; + blobStoreIndexShardSnapshots = buildBlobStoreIndexShardSnapshots( + blobs, + shardContainer, + oldRepositoryData.shardGenerations().getShardGen(indexId, finalShardId) + ).v1(); + } else { + Tuple tuple = buildBlobStoreIndexShardSnapshots( + blobs, + shardContainer + ); + newGen = tuple.v2() + 1; + blobStoreIndexShardSnapshots = tuple.v1(); + } } else { - Tuple tuple = buildBlobStoreIndexShardSnapshots(blobs, shardContainer); - newGen = tuple.v2() + 1; - blobStoreIndexShardSnapshots = tuple.v1(); + newGen = -1L; + blobStoreIndexShardSnapshots = BlobStoreIndexShardSnapshots.EMPTY; } allShardsListener.onResponse( deleteFromShardSnapshotMeta( @@ -1117,7 +1211,8 @@ protected void doRun() throws Exception { shardContainer, blobs, blobStoreIndexShardSnapshots, - newGen + newGen, + remoteStoreLockManagerFactory ) ); } @@ -1168,20 +1263,23 @@ private List resolveFilesToDelete( /** * Cleans up stale blobs directly under the repository root as well as all indices paths that aren't referenced by any existing * snapshots. This method is only to be called directly after a new {@link RepositoryData} was written to the repository and with - * parameters {@code foundIndices}, {@code rootBlobs} + * parameters {@code foundIndices}, {@code rootBlobs}. If remoteStoreLockManagerFactory is not null, remote store lock files are + * released when deleting the respective shallow-snap-UUID blobs. * - * @param deletedSnapshots if this method is called as part of a delete operation, the snapshot ids just deleted or empty if called as - * part of a repository cleanup - * @param foundIndices all indices blob containers found in the repository before {@code newRepoData} was written - * @param rootBlobs all blobs found directly under the repository root - * @param newRepoData new repository data that was just written - * @param listener listener to invoke with the combined {@link DeleteResult} of all blobs removed in this operation + * @param deletedSnapshots if this method is called as part of a delete operation, the snapshot ids just deleted or empty if called as + * part of a repository cleanup + * @param foundIndices all indices blob containers found in the repository before {@code newRepoData} was written + * @param rootBlobs all blobs found directly under the repository root + * @param newRepoData new repository data that was just written + * @param remoteStoreLockManagerFactory RemoteStoreLockManagerFactory to be used for cleaning up remote store lock files. + * @param listener listener to invoke with the combined {@link DeleteResult} of all blobs removed in this operation */ private void cleanupStaleBlobs( Collection deletedSnapshots, Map foundIndices, Map rootBlobs, RepositoryData newRepoData, + RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, ActionListener listener ) { final GroupedActionListener groupedListener = new GroupedActionListener<>(ActionListener.wrap(deleteResults -> { @@ -1207,24 +1305,31 @@ private void cleanupStaleBlobs( if (foundIndices.keySet().equals(survivingIndexIds)) { groupedListener.onResponse(DeleteResult.ZERO); } else { - cleanupStaleIndices(foundIndices, survivingIndexIds, groupedListener); + cleanupStaleIndices(foundIndices, survivingIndexIds, remoteStoreLockManagerFactory, groupedListener); } } /** * Runs cleanup actions on the repository. Increments the repository state id by one before executing any modifications on the - * repository. + * repository. If remoteStoreLockManagerFactory is not null, remote store lock files are released when deleting the respective + * shallow-snap-UUID blobs. * TODO: Add shard level cleanups * TODO: Add unreferenced index metadata cleanup *
    *
  • Deleting stale indices {@link #cleanupStaleIndices}
  • *
  • Deleting unreferenced root level blobs {@link #cleanupStaleRootFiles}
  • *
- * @param repositoryStateId Current repository state id - * @param repositoryMetaVersion version of the updated repository metadata to write - * @param listener Listener to complete when done + * @param repositoryStateId Current repository state id + * @param repositoryMetaVersion version of the updated repository metadata to write + * @param remoteStoreLockManagerFactory RemoteStoreLockManagerFactory to be used for cleaning up remote store lock files. + * @param listener Listener to complete when done */ - public void cleanup(long repositoryStateId, Version repositoryMetaVersion, ActionListener listener) { + public void cleanup( + long repositoryStateId, + Version repositoryMetaVersion, + RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, + ActionListener listener + ) { try { if (isReadOnly()) { throw new RepositoryException(metadata.name(), "cannot run cleanup on readonly repository"); @@ -1254,6 +1359,7 @@ public void cleanup(long repositoryStateId, Version repositoryMetaVersion, Actio foundIndices, rootBlobs, repositoryData, + remoteStoreLockManagerFactory, ActionListener.map(listener, RepositoryCleanupResult::new) ), listener::onFailure @@ -1345,6 +1451,7 @@ private List cleanupStaleRootFiles( private void cleanupStaleIndices( Map foundIndices, Set survivingIndexIds, + RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, GroupedActionListener listener ) { final GroupedActionListener groupedListener = new GroupedActionListener<>(ActionListener.wrap(deleteResults -> { @@ -1369,7 +1476,7 @@ private void cleanupStaleIndices( foundIndices.size() - survivingIndexIds.size() ); for (int i = 0; i < workers; ++i) { - executeOneStaleIndexDelete(staleIndicesToDelete, groupedListener); + executeOneStaleIndexDelete(staleIndicesToDelete, remoteStoreLockManagerFactory, groupedListener); } } catch (Exception e) { // TODO: We shouldn't be blanket catching and suppressing all exceptions here and instead handle them safely upstream. @@ -1382,6 +1489,7 @@ private void cleanupStaleIndices( private void executeOneStaleIndexDelete( BlockingQueue> staleIndicesToDelete, + RemoteStoreLockManagerFactory remoteStoreLockManagerFactory, GroupedActionListener listener ) throws InterruptedException { Map.Entry indexEntry = staleIndicesToDelete.poll(0L, TimeUnit.MILLISECONDS); @@ -1391,6 +1499,37 @@ private void executeOneStaleIndexDelete( DeleteResult deleteResult = DeleteResult.ZERO; try { logger.debug("[{}] Found stale index [{}]. Cleaning it up", metadata.name(), indexSnId); + if (remoteStoreLockManagerFactory != null) { + Map shardBlobs = indexEntry.getValue().children(); + if (!shardBlobs.isEmpty()) { + for (Map.Entry shardBlob : shardBlobs.entrySet()) { + Map shardLevelBlobs = shardBlob.getValue().listBlobs(); + for (Map.Entry shardLevelBlob : shardLevelBlobs.entrySet()) { + String blob = shardLevelBlob.getKey(); + String snapshotUUID = blob.substring(SHALLOW_SNAPSHOT_PREFIX.length(), blob.length() - ".dat".length()); + if (blob.startsWith(SHALLOW_SNAPSHOT_PREFIX) && blob.endsWith(".dat")) { + RemoteStoreShardShallowCopySnapshot remoteStoreShardShallowCopySnapshot = + REMOTE_STORE_SHARD_SHALLOW_COPY_SNAPSHOT_FORMAT.read( + shardBlob.getValue(), + snapshotUUID, + namedXContentRegistry + ); + String indexUUID = remoteStoreShardShallowCopySnapshot.getIndexUUID(); + String remoteStoreRepoForIndex = remoteStoreShardShallowCopySnapshot.getRemoteStoreRepository(); + // Releasing lock files before deleting the shallow-snap-UUID file because in case of any failure + // while releasing the lock file, we would still have the corresponding shallow-snap-UUID file + // and that would be used during next delete operation for releasing this stale lock file + RemoteStoreMetadataLockManager remoteStoreMetadataLockManager = remoteStoreLockManagerFactory + .newLockManager(remoteStoreRepoForIndex, indexUUID, shardBlob.getKey()); + remoteStoreMetadataLockManager.release( + FileLockInfo.getLockInfoBuilder().withAcquirerId(snapshotUUID).build() + ); + } + } + } + } + } + // Deleting the index folder deleteResult = indexEntry.getValue().delete(); logger.debug("[{}] Cleaned up stale index [{}]", metadata.name(), indexSnId); } catch (IOException e) { @@ -1408,7 +1547,7 @@ private void executeOneStaleIndexDelete( logger.warn(new ParameterizedMessage("[{}] Exception during single stale index delete", metadata.name()), e); } - executeOneStaleIndexDelete(staleIndicesToDelete, listener); + executeOneStaleIndexDelete(staleIndicesToDelete, remoteStoreLockManagerFactory, listener); return deleteResult; })); } @@ -2942,7 +3081,8 @@ private ShardSnapshotMetaDeleteResult deleteFromShardSnapshotMeta( BlobContainer shardContainer, Set blobs, BlobStoreIndexShardSnapshots snapshots, - long indexGeneration + long indexGeneration, + RemoteStoreLockManagerFactory remoteStoreLockManagerFactory ) { // Build a list of snapshots that should be preserved List newSnapshotsList = new ArrayList<>(); @@ -2954,23 +3094,37 @@ private ShardSnapshotMetaDeleteResult deleteFromShardSnapshotMeta( } String writtenGeneration = null; try { - if (newSnapshotsList.isEmpty()) { + // Using survivingSnapshots instead of newSnapshotsList as shallow snapshots can be present which won't be part of + // newSnapshotsList + if (survivingSnapshots.isEmpty()) { + // No shallow copy or full copy snapshot is surviving. return new ShardSnapshotMetaDeleteResult(indexId, snapshotShardId, ShardGenerations.DELETED_SHARD_GEN, blobs); } else { - final BlobStoreIndexShardSnapshots updatedSnapshots = new BlobStoreIndexShardSnapshots(newSnapshotsList); - if (indexGeneration < 0L) { - writtenGeneration = UUIDs.randomBase64UUID(); - INDEX_SHARD_SNAPSHOTS_FORMAT.write(updatedSnapshots, shardContainer, writtenGeneration, compressor); + final BlobStoreIndexShardSnapshots updatedSnapshots; + // If we have surviving non shallow snapshots, update index- file. + if (newSnapshotsList.size() > 0) { + // Some full copy snapshots are surviving. + updatedSnapshots = new BlobStoreIndexShardSnapshots(newSnapshotsList); + if (indexGeneration < 0L) { + writtenGeneration = UUIDs.randomBase64UUID(); + INDEX_SHARD_SNAPSHOTS_FORMAT.write(updatedSnapshots, shardContainer, writtenGeneration, compressor); + } else { + writtenGeneration = String.valueOf(indexGeneration); + writeShardIndexBlobAtomic(shardContainer, indexGeneration, updatedSnapshots); + } } else { - writtenGeneration = String.valueOf(indexGeneration); - writeShardIndexBlobAtomic(shardContainer, indexGeneration, updatedSnapshots); + // Some shallow copy snapshots are surviving. In this case, since no full copy snapshots are present, we use + // EMPTY BlobStoreIndexShardSnapshots for updatedSnapshots which is used in unusedBlobs to compute stale files, + // and use DELETED_SHARD_GEN since index-N file would not be present anymore. + updatedSnapshots = BlobStoreIndexShardSnapshots.EMPTY; + writtenGeneration = ShardGenerations.DELETED_SHARD_GEN; } final Set survivingSnapshotUUIDs = survivingSnapshots.stream().map(SnapshotId::getUUID).collect(Collectors.toSet()); return new ShardSnapshotMetaDeleteResult( indexId, snapshotShardId, writtenGeneration, - unusedBlobs(blobs, survivingSnapshotUUIDs, updatedSnapshots) + unusedBlobs(blobs, survivingSnapshotUUIDs, updatedSnapshots, remoteStoreLockManagerFactory) ); } } catch (IOException e) { @@ -3004,11 +3158,13 @@ private void writeShardIndexBlobAtomic( } // Unused blobs are all previous index-, data- and meta-blobs and that are not referenced by the new index- as well as all - // temporary blobs + // temporary blobs. If remoteStoreLockManagerFactory is non-null, the shallow-snap- files that do not belong to any of the + // surviving snapshots are also added for cleanup. private static List unusedBlobs( Set blobs, Set survivingSnapshotUUIDs, - BlobStoreIndexShardSnapshots updatedSnapshots + BlobStoreIndexShardSnapshots updatedSnapshots, + RemoteStoreLockManagerFactory remoteStoreLockManagerFactory ) { return blobs.stream() .filter( @@ -3018,6 +3174,13 @@ private static List unusedBlobs( && survivingSnapshotUUIDs.contains( blob.substring(SNAPSHOT_PREFIX.length(), blob.length() - ".dat".length()) ) == false) + || (remoteStoreLockManagerFactory != null + ? (blob.startsWith(SHALLOW_SNAPSHOT_PREFIX) + && blob.endsWith(".dat") + && survivingSnapshotUUIDs.contains( + blob.substring(SHALLOW_SNAPSHOT_PREFIX.length(), blob.length() - ".dat".length()) + ) == false) + : false) || (blob.startsWith(UPLOADED_DATA_BLOB_PREFIX) && updatedSnapshots.findNameFile(canonicalName(blob)) == null) || FsBlobContainer.isTempBlobName(blob) ) diff --git a/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java b/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java index 0a455c376f62d..3946e4363298c 100644 --- a/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java @@ -2230,16 +2230,36 @@ private void deleteSnapshotsFromRepository( assert currentlyFinalizing.contains(deleteEntry.repository()); final List snapshotIds = deleteEntry.getSnapshots(); assert deleteEntry.state() == SnapshotDeletionsInProgress.State.STARTED : "incorrect state for entry [" + deleteEntry + "]"; - repositoriesService.repository(deleteEntry.repository()) - .deleteSnapshots( + final Repository repository = repositoriesService.repository(deleteEntry.repository()); + + // TODO: Relying on repository flag to decide delete flow may lead to shallow snapshot blobs not being taken up for cleanup + // when the repository currently have the flag disabled and we try to delete the shallow snapshots taken prior to disabling + // the flag. This can be improved by having the info whether there ever were any shallow snapshot present in this repository + // or not in RepositoryData. + // SEE https://github.com/opensearch-project/OpenSearch/issues/8610 + final boolean cleanupRemoteStoreLockFiles = REMOTE_STORE_INDEX_SHALLOW_COPY.get(repository.getMetadata().settings()); + if (cleanupRemoteStoreLockFiles) { + repository.deleteSnapshotsAndReleaseLockFiles( snapshotIds, repositoryData.getGenId(), minCompatibleVersion(minNodeVersion, repositoryData, snapshotIds), + remoteStoreLockManagerFactory, ActionListener.wrap(updatedRepoData -> { logger.info("snapshots {} deleted", snapshotIds); removeSnapshotDeletionFromClusterState(deleteEntry, null, updatedRepoData); }, ex -> removeSnapshotDeletionFromClusterState(deleteEntry, ex, repositoryData)) ); + } else { + repository.deleteSnapshots( + snapshotIds, + repositoryData.getGenId(), + minCompatibleVersion(minNodeVersion, repositoryData, snapshotIds), + ActionListener.wrap(updatedRepoData -> { + logger.info("snapshots {} deleted", snapshotIds); + removeSnapshotDeletionFromClusterState(deleteEntry, null, updatedRepoData); + }, ex -> removeSnapshotDeletionFromClusterState(deleteEntry, ex, repositoryData)) + ); + } } } From 8a831f84917ada0a9ad723691469024e61be1d33 Mon Sep 17 00:00:00 2001 From: Varun Bansal Date: Tue, 11 Jul 2023 17:07:10 +0530 Subject: [PATCH 27/29] Add shard id to remote store logs (#8574) --------- Signed-off-by: bansvaru --- .../opensearch/index/shard/IndexShard.java | 2 +- .../shard/RemoteStoreRefreshListener.java | 13 ++++-- .../index/translog/RemoteFsTranslog.java | 43 ++++++++++++------- .../transfer/TranslogTransferManager.java | 16 +++---- .../TranslogTransferManagerTests.java | 2 + 5 files changed, 47 insertions(+), 29 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index e7720e9343b80..154e1a4f22242 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -4589,7 +4589,7 @@ public void syncTranslogFilesFromRemoteTranslog() throws IOException { TranslogFactory translogFactory = translogFactorySupplier.apply(indexSettings, shardRouting); assert translogFactory instanceof RemoteBlobStoreInternalTranslogFactory; Repository repository = ((RemoteBlobStoreInternalTranslogFactory) translogFactory).getRepository(); - RemoteFsTranslog.download(repository, shardId, getThreadPool(), shardPath().resolveTranslog()); + RemoteFsTranslog.download(repository, shardId, getThreadPool(), shardPath().resolveTranslog(), logger); } /** diff --git a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java index aaba74cd54341..46d52bc8ca5df 100644 --- a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java +++ b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java @@ -8,7 +8,6 @@ package org.opensearch.index.shard; -import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.codecs.CodecUtil; @@ -22,6 +21,7 @@ import org.opensearch.action.bulk.BackoffPolicy; import org.opensearch.common.CheckedFunction; import org.opensearch.common.concurrent.GatedCloseable; +import org.opensearch.common.logging.Loggers; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.concurrent.ConcurrentCollections; import org.opensearch.index.engine.EngineException; @@ -60,7 +60,7 @@ */ public final class RemoteStoreRefreshListener implements ReferenceManager.RefreshListener { - private static final Logger logger = LogManager.getLogger(RemoteStoreRefreshListener.class); + private final Logger logger; /** * The initial retry interval at which the retry job gets scheduled after a failure. @@ -117,6 +117,7 @@ public RemoteStoreRefreshListener( SegmentReplicationCheckpointPublisher checkpointPublisher, RemoteRefreshSegmentTracker segmentTracker ) { + logger = Loggers.getLogger(getClass(), indexShard.shardId()); this.indexShard = indexShard; this.storeDirectory = indexShard.store().directory(); this.remoteDirectory = (RemoteSegmentStoreDirectory) ((FilterDirectory) ((FilterDirectory) indexShard.remoteStore().directory()) @@ -155,7 +156,7 @@ public void onFailure(String file) { // Track upload failure segmentTracker.addUploadBytesFailed(latestFileNameSizeOnLocalMap.get(file)); } - }, remoteDirectory, storeDirectory, this::getChecksumOfLocalFile); + }, remoteDirectory, storeDirectory, this::getChecksumOfLocalFile, logger); } @Override @@ -470,6 +471,8 @@ private void updateFinalUploadStatusInSegmentTracker(boolean uploadStatus, long */ private static class FileUploader { + private final Logger logger; + private final UploadTracker uploadTracker; private final RemoteSegmentStoreDirectory remoteDirectory; @@ -482,12 +485,14 @@ public FileUploader( UploadTracker uploadTracker, RemoteSegmentStoreDirectory remoteDirectory, Directory storeDirectory, - CheckedFunction checksumProvider + CheckedFunction checksumProvider, + Logger logger ) { this.uploadTracker = uploadTracker; this.remoteDirectory = remoteDirectory; this.storeDirectory = storeDirectory; this.checksumProvider = checksumProvider; + this.logger = logger; } /** diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java index 1e565b97387d1..9e027b9765bbc 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java @@ -8,9 +8,9 @@ package org.opensearch.index.translog; -import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.common.SetOnce; +import org.opensearch.common.logging.Loggers; import org.opensearch.common.util.concurrent.ReleasableLock; import org.opensearch.common.util.io.IOUtils; import org.opensearch.core.util.FileSystemUtils; @@ -32,6 +32,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.HashSet; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.concurrent.Semaphore; @@ -49,7 +50,7 @@ */ public class RemoteFsTranslog extends Translog { - private static final Logger logger = LogManager.getLogger(RemoteFsTranslog.class); + private final Logger logger; private final BlobStoreRepository blobStoreRepository; private final TranslogTransferManager translogTransferManager; private final FileTransferTracker fileTransferTracker; @@ -82,16 +83,19 @@ public RemoteFsTranslog( BooleanSupplier primaryModeSupplier ) throws IOException { super(config, translogUUID, deletionPolicy, globalCheckpointSupplier, primaryTermSupplier, persistedSequenceNumberConsumer); + logger = Loggers.getLogger(getClass(), shardId); this.blobStoreRepository = blobStoreRepository; this.primaryModeSupplier = primaryModeSupplier; fileTransferTracker = new FileTransferTracker(shardId); this.translogTransferManager = buildTranslogTransferManager(blobStoreRepository, threadPool, shardId, fileTransferTracker); try { - download(translogTransferManager, location); + download(translogTransferManager, location, logger); Checkpoint checkpoint = readCheckpoint(location); this.readers.addAll(recoverFromFiles(checkpoint)); if (readers.isEmpty()) { - throw new IllegalStateException("at least one reader must be recovered"); + String errorMsg = String.format(Locale.ROOT, "%s at least one reader must be recovered", shardId); + logger.error(errorMsg); + throw new IllegalStateException(errorMsg); } boolean success = false; current = null; @@ -120,8 +124,13 @@ public RemoteFsTranslog( } } - public static void download(Repository repository, ShardId shardId, ThreadPool threadPool, Path location) throws IOException { - assert repository instanceof BlobStoreRepository : "repository should be instance of BlobStoreRepository"; + public static void download(Repository repository, ShardId shardId, ThreadPool threadPool, Path location, Logger logger) + throws IOException { + assert repository instanceof BlobStoreRepository : String.format( + Locale.ROOT, + "%s repository should be instance of BlobStoreRepository", + shardId + ); BlobStoreRepository blobStoreRepository = (BlobStoreRepository) repository; FileTransferTracker fileTransferTracker = new FileTransferTracker(shardId); TranslogTransferManager translogTransferManager = buildTranslogTransferManager( @@ -130,11 +139,11 @@ public static void download(Repository repository, ShardId shardId, ThreadPool t shardId, fileTransferTracker ); - RemoteFsTranslog.download(translogTransferManager, location); + RemoteFsTranslog.download(translogTransferManager, location, logger); } - public static void download(TranslogTransferManager translogTransferManager, Path location) throws IOException { - logger.info("Downloading translog files from remote for shard {} ", translogTransferManager.getShardId()); + public static void download(TranslogTransferManager translogTransferManager, Path location, Logger logger) throws IOException { + logger.trace("Downloading translog files from remote"); TranslogTransferMetadata translogMetadata = translogTransferManager.readMetadata(); if (translogMetadata != null) { if (Files.notExists(location)) { @@ -156,7 +165,7 @@ public static void download(TranslogTransferManager translogTransferManager, Pat location.resolve(Translog.CHECKPOINT_FILE_NAME) ); } - logger.info("Downloaded translog files from remote for shard {} ", translogTransferManager.getShardId()); + logger.trace("Downloaded translog files from remote"); } public static TranslogTransferManager buildTranslogTransferManager( @@ -321,8 +330,8 @@ public boolean syncNeeded() { @Override public void close() throws IOException { - assert Translog.calledFromOutsideOrViaTragedyClose() - : "Translog.close method is called from inside Translog, but not via closeOnTragicEvent method"; + assert Translog.calledFromOutsideOrViaTragedyClose() : shardId + + "Translog.close method is called from inside Translog, but not via closeOnTragicEvent method"; if (closed.compareAndSet(false, true)) { try (ReleasableLock lock = writeLock.acquire()) { sync(); @@ -340,12 +349,14 @@ protected long getMinReferencedGen() throws IOException { minGenerationForSeqNo(minSeqNoToKeep, current, readers) ); - assert minReferencedGen >= getMinFileGeneration() : "deletion policy requires a minReferenceGen of [" + assert minReferencedGen >= getMinFileGeneration() : shardId + + " deletion policy requires a minReferenceGen of [" + minReferencedGen + "] but the lowest gen available is [" + getMinFileGeneration() + "]"; - assert minReferencedGen <= currentFileGeneration() : "deletion policy requires a minReferenceGen of [" + assert minReferencedGen <= currentFileGeneration() : shardId + + " deletion policy requires a minReferenceGen of [" + minReferencedGen + "] which is higher than the current generation [" + currentFileGeneration() @@ -356,7 +367,7 @@ protected long getMinReferencedGen() throws IOException { protected void setMinSeqNoToKeep(long seqNo) { if (seqNo < this.minSeqNoToKeep) { throw new IllegalArgumentException( - "min seq number required can't go backwards: " + "current [" + this.minSeqNoToKeep + "] new [" + seqNo + "]" + shardId + " min seq number required can't go backwards: " + "current [" + this.minSeqNoToKeep + "] new [" + seqNo + "]" ); } this.minSeqNoToKeep = seqNo; @@ -416,7 +427,7 @@ private void deleteStaleRemotePrimaryTerms() { // of older primary term. if (olderPrimaryCleaned.trySet(Boolean.TRUE)) { // First we delete all stale primary terms folders from remote store - assert readers.isEmpty() == false : "Expected non-empty readers"; + assert readers.isEmpty() == false : shardId + " Expected non-empty readers"; long minimumReferencedPrimaryTerm = readers.stream().map(BaseTranslogReader::getPrimaryTerm).min(Long::compare).get(); translogTransferManager.deletePrimaryTermsAsync(minimumReferencedPrimaryTerm); } diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java index 6da0ee5521738..54140226e3744 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java @@ -8,7 +8,6 @@ package org.opensearch.index.translog.transfer; -import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.store.IndexInput; @@ -21,6 +20,7 @@ import org.opensearch.common.bytes.BytesReference; import org.opensearch.common.io.VersionedCodecStreamWrapper; import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.logging.Loggers; import org.opensearch.common.lucene.store.ByteArrayIndexInput; import org.opensearch.index.shard.ShardId; import org.opensearch.index.translog.Translog; @@ -61,8 +61,7 @@ public class TranslogTransferManager { private static final long TRANSFER_TIMEOUT_IN_MILLIS = 30000; - private static final Logger logger = LogManager.getLogger(TranslogTransferManager.class); - + private final Logger logger; private final static String METADATA_DIR = "metadata"; private final static String DATA_DIR = "data"; @@ -84,6 +83,7 @@ public TranslogTransferManager( this.remoteDataTransferPath = remoteBaseTransferPath.add(DATA_DIR); this.remoteMetadataTransferPath = remoteBaseTransferPath.add(METADATA_DIR); this.fileTransferTracker = fileTransferTracker; + this.logger = Loggers.getLogger(getClass(), shardId); } public ShardId getShardId() { @@ -200,7 +200,7 @@ public TranslogTransferMetadata readMetadata() throws IOException { exceptionSetOnce.set(e); } }, e -> { - logger.error(() -> new ParameterizedMessage("Exception while listing metadata files "), e); + logger.error(() -> new ParameterizedMessage("Exception while listing metadata files"), e); exceptionSetOnce.set((IOException) e); }), latch @@ -295,7 +295,7 @@ public void deleteGenerationAsync(long primaryTerm, Set generations, Runna * @param minPrimaryTermToKeep all primary terms below this primary term are deleted. */ public void deletePrimaryTermsAsync(long minPrimaryTermToKeep) { - logger.info("Deleting primary terms from remote store lesser than {} for {}", minPrimaryTermToKeep, shardId); + logger.info("Deleting primary terms from remote store lesser than {}", minPrimaryTermToKeep); transferService.listFoldersAsync(ThreadPool.Names.REMOTE_PURGE, remoteDataTransferPath, new ActionListener<>() { @Override public void onResponse(Set folders) { @@ -333,7 +333,7 @@ private void deletePrimaryTermAsync(long primaryTerm) { new ActionListener<>() { @Override public void onResponse(Void unused) { - logger.info("Deleted primary term {} for {}", primaryTerm, shardId); + logger.info("Deleted primary term {}", primaryTerm); } @Override @@ -349,12 +349,12 @@ public void delete() { transferService.deleteAsync(ThreadPool.Names.REMOTE_PURGE, remoteBaseTransferPath, new ActionListener<>() { @Override public void onResponse(Void unused) { - logger.info("Deleted all remote translog data for {}", shardId); + logger.info("Deleted all remote translog data"); } @Override public void onFailure(Exception e) { - logger.error("Exception occurred while cleaning translog ", e); + logger.error("Exception occurred while cleaning translog", e); } }); } diff --git a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java index 924a9d039da28..5f8aa64457896 100644 --- a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java +++ b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java @@ -17,6 +17,7 @@ import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.blobstore.BlobStore; import org.opensearch.common.blobstore.support.PlainBlobMetadata; +import org.opensearch.index.Index; import org.opensearch.index.shard.ShardId; import org.opensearch.index.translog.Translog; import org.opensearch.index.translog.transfer.FileSnapshot.CheckpointFileSnapshot; @@ -65,6 +66,7 @@ public void setUp() throws Exception { primaryTerm = randomNonNegativeLong(); generation = randomNonNegativeLong(); shardId = mock(ShardId.class); + when(shardId.getIndex()).thenReturn(new Index("index", "indexUUid")); minTranslogGeneration = randomLongBetween(0, generation); remoteBaseTransferPath = new BlobPath().add("base_path"); transferService = mock(TransferService.class); From 7208edc1832c6be3e8548d01289a88370027e5bb Mon Sep 17 00:00:00 2001 From: suraj kumar Date: Tue, 11 Jul 2023 17:11:40 +0530 Subject: [PATCH 28/29] Add wrapped tracer implementation (#8565) * Add wrapped tracer implementation Signed-off-by: suranjay * Add changelog entry Signed-off-by: suranjay * Add @opensearch.internal annotation Signed-off-by: suranjay * Fix test Signed-off-by: suranjay * Fix changelog entry Signed-off-by: suranjay --------- Signed-off-by: suranjay --- CHANGELOG.md | 3 + .../org/opensearch/telemetry/Telemetry.java | 2 + .../telemetry/tracing/AbstractSpan.java | 2 + .../telemetry/tracing/DefaultSpanScope.java | 4 +- .../telemetry/tracing/DefaultTracer.java | 4 +- .../opensearch/telemetry/tracing/Span.java | 4 +- .../telemetry/tracing/SpanReference.java | 4 +- .../tracing/TracerContextStorage.java | 2 + .../tracing/TracingContextPropagator.java | 2 + .../telemetry/tracing/TracingTelemetry.java | 2 + .../telemetry/tracing/noop/NoopSpanScope.java | 2 + .../telemetry/tracing/noop/NoopTracer.java | 2 + .../telemetry/tracing/NoopTracerFactory.java | 2 + ...hreadContextBasedTracerContextStorage.java | 2 + .../telemetry/tracing/TracerFactory.java | 24 +++++-- .../telemetry/tracing/WrappedTracer.java | 52 ++++++++++++++ .../telemetry/tracing/TracerFactoryTests.java | 9 +-- .../telemetry/tracing/WrappedTracerTests.java | 69 +++++++++++++++++++ 18 files changed, 175 insertions(+), 16 deletions(-) create mode 100644 server/src/main/java/org/opensearch/telemetry/tracing/WrappedTracer.java create mode 100644 server/src/test/java/org/opensearch/telemetry/tracing/WrappedTracerTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 5130dd00845b6..fc367be22c1fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Pass localNode info to all plugins on node start ([#7919](https://github.com/opensearch-project/OpenSearch/pull/7919)) - Improved performance of parsing floating point numbers ([#7909](https://github.com/opensearch-project/OpenSearch/pull/7909)) - Move span actions to Scope ([#8411](https://github.com/opensearch-project/OpenSearch/pull/8411)) +- Add wrapper tracer implementation ### Deprecated @@ -164,6 +165,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Enabling compression levels for zstd and zstd_no_dict ([#8312](https://github.com/opensearch-project/OpenSearch/pull/8312)) - Optimize Metadata build() to skip redundant computations as part of ClusterState build ([#7853](https://github.com/opensearch-project/OpenSearch/pull/7853)) - Add safeguard limits for file cache during node level allocation ([#8208](https://github.com/opensearch-project/OpenSearch/pull/8208)) +- Move span actions to Scope ([#8411](https://github.com/opensearch-project/OpenSearch/pull/8411)) +- Add wrapper tracer implementation ([#8565](https://github.com/opensearch-project/OpenSearch/pull/8565)) ### Deprecated diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/Telemetry.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/Telemetry.java index 6f50699528b6b..65c974a0d0c36 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/Telemetry.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/Telemetry.java @@ -13,6 +13,8 @@ /** * Interface defining telemetry + * + * @opensearch.internal */ public interface Telemetry { diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/AbstractSpan.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/AbstractSpan.java index 316edc971913e..150a32b14d0f8 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/AbstractSpan.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/AbstractSpan.java @@ -10,6 +10,8 @@ /** * Base span + * + * @opensearch.internal */ public abstract class AbstractSpan implements Span { diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/DefaultSpanScope.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/DefaultSpanScope.java index 58e9e0abad739..356b72187de74 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/DefaultSpanScope.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/DefaultSpanScope.java @@ -12,8 +12,10 @@ /** * Default implementation of Scope + * + * @opensearch.internal */ -public class DefaultSpanScope implements SpanScope { +final class DefaultSpanScope implements SpanScope { private final Span span; diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/DefaultTracer.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/DefaultTracer.java index 783edd238c1c2..ea59eec645420 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/DefaultTracer.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/DefaultTracer.java @@ -16,9 +16,9 @@ * The default tracer implementation. It handles tracing context propagation between spans by maintaining * current active span in its storage * - * + * @opensearch.internal */ -public class DefaultTracer implements Tracer { +class DefaultTracer implements Tracer { static final String THREAD_NAME = "th_name"; private final TracingTelemetry tracingTelemetry; diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/Span.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/Span.java index d60b4e60adece..6cb1c8234f3de 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/Span.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/Span.java @@ -11,7 +11,9 @@ /** * An interface that represents a tracing span. * Spans are created by the Tracer.startSpan method. - * Span must be ended by calling Tracer.endSpan which internally calls Span's endSpan. + * Span must be ended by calling SpanScope.close which internally calls Span's endSpan. + * + * @opensearch.internal */ public interface Span { diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/SpanReference.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/SpanReference.java index 99d1bd3c93c84..180136ecf7a57 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/SpanReference.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/SpanReference.java @@ -10,8 +10,10 @@ /** * Wrapper class to hold reference of Span + * + * @opensearch.internal */ -public class SpanReference { +final class SpanReference { private Span span; diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/TracerContextStorage.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/TracerContextStorage.java index eb93006835332..d85b404b0ce41 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/TracerContextStorage.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/TracerContextStorage.java @@ -12,6 +12,8 @@ * Storage interface used for storing tracing context * @param key type * @param value type + * + * @opensearch.internal */ public interface TracerContextStorage { /** diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/TracingContextPropagator.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/TracingContextPropagator.java index 1152e3aedfa88..3e4a377d33a3d 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/TracingContextPropagator.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/TracingContextPropagator.java @@ -13,6 +13,8 @@ /** * Interface defining the tracing related context propagation + * + * @opensearch.internal */ public interface TracingContextPropagator { diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/TracingTelemetry.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/TracingTelemetry.java index 16c76bd0cc141..bce955fc2d99e 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/TracingTelemetry.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/TracingTelemetry.java @@ -12,6 +12,8 @@ /** * Interface for tracing telemetry providers + * + * @opensearch.internal */ public interface TracingTelemetry extends Closeable { diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/noop/NoopSpanScope.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/noop/NoopSpanScope.java index c0dbaf65ba48b..a1d16d1d80d00 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/noop/NoopSpanScope.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/noop/NoopSpanScope.java @@ -12,6 +12,8 @@ /** * No-op implementation of SpanScope + * + * @opensearch.internal */ public final class NoopSpanScope implements SpanScope { diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/noop/NoopTracer.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/noop/NoopTracer.java index a66cbcf4fef52..a1768d7d59116 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/noop/NoopTracer.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/noop/NoopTracer.java @@ -13,6 +13,8 @@ /** * No-op implementation of Tracer + * + * @opensearch.internal */ public class NoopTracer implements Tracer { diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/NoopTracerFactory.java b/server/src/main/java/org/opensearch/telemetry/tracing/NoopTracerFactory.java index 3d7f8133788ce..f82a390dc1754 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/NoopTracerFactory.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/NoopTracerFactory.java @@ -14,6 +14,8 @@ /** * No-op implementation of TracerFactory + * + * @opensearch.internal */ public class NoopTracerFactory extends TracerFactory { public NoopTracerFactory() { diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/ThreadContextBasedTracerContextStorage.java b/server/src/main/java/org/opensearch/telemetry/tracing/ThreadContextBasedTracerContextStorage.java index 0d0b795fdc715..c009ab2391aab 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/ThreadContextBasedTracerContextStorage.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/ThreadContextBasedTracerContextStorage.java @@ -18,6 +18,8 @@ /** * Core's ThreadContext based TracerContextStorage implementation + * + * @opensearch.internal */ public class ThreadContextBasedTracerContextStorage implements TracerContextStorage, ThreadContextStatePropagator { diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/TracerFactory.java b/server/src/main/java/org/opensearch/telemetry/tracing/TracerFactory.java index 8228cded4c822..d8fe812c82f53 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/TracerFactory.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/TracerFactory.java @@ -21,7 +21,7 @@ /** * TracerManager represents a single global class that is used to access tracers. - * + *

* The Tracer singleton object can be retrieved using tracerManager.getTracer(). The TracerManager object * is created during class initialization and cannot subsequently be changed. */ @@ -30,21 +30,20 @@ public class TracerFactory implements Closeable { private static final Logger logger = LogManager.getLogger(TracerFactory.class); private final TelemetrySettings telemetrySettings; - private final Tracer defaultTracer; + private final Tracer tracer; public TracerFactory(TelemetrySettings telemetrySettings, Optional telemetry, ThreadContext threadContext) { this.telemetrySettings = telemetrySettings; - this.defaultTracer = telemetry.map(Telemetry::getTracingTelemetry) - .map(tracingTelemetry -> createDefaultTracer(tracingTelemetry, threadContext)) - .orElse(NoopTracer.INSTANCE); + this.tracer = tracer(telemetry, threadContext); } /** * Returns the tracer instance + * * @return tracer instance */ public Tracer getTracer() { - return telemetrySettings.isTracingEnabled() ? defaultTracer : NoopTracer.INSTANCE; + return tracer; } /** @@ -53,12 +52,19 @@ public Tracer getTracer() { @Override public void close() { try { - defaultTracer.close(); + tracer.close(); } catch (IOException e) { logger.warn("Error closing tracer", e); } } + private Tracer tracer(Optional telemetry, ThreadContext threadContext) { + return telemetry.map(Telemetry::getTracingTelemetry) + .map(tracingTelemetry -> createDefaultTracer(tracingTelemetry, threadContext)) + .map(defaultTracer -> createWrappedTracer(defaultTracer)) + .orElse(NoopTracer.INSTANCE); + } + private Tracer createDefaultTracer(TracingTelemetry tracingTelemetry, ThreadContext threadContext) { TracerContextStorage tracerContextStorage = new ThreadContextBasedTracerContextStorage( threadContext, @@ -67,4 +73,8 @@ private Tracer createDefaultTracer(TracingTelemetry tracingTelemetry, ThreadCont return new DefaultTracer(tracingTelemetry, tracerContextStorage); } + private Tracer createWrappedTracer(Tracer defaultTracer) { + return new WrappedTracer(telemetrySettings, defaultTracer); + } + } diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/WrappedTracer.java b/server/src/main/java/org/opensearch/telemetry/tracing/WrappedTracer.java new file mode 100644 index 0000000000000..0ba9a8ea5fd88 --- /dev/null +++ b/server/src/main/java/org/opensearch/telemetry/tracing/WrappedTracer.java @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.telemetry.tracing; + +import org.opensearch.telemetry.TelemetrySettings; +import org.opensearch.telemetry.tracing.noop.NoopTracer; + +import java.io.IOException; + +/** + * Wrapper implementation of Tracer. This delegates call to right tracer based on the tracer settings + * + * @opensearch.internal + */ +final class WrappedTracer implements Tracer { + + private final Tracer defaultTracer; + private final TelemetrySettings telemetrySettings; + + /** + * Creates WrappedTracer instance + * + * @param telemetrySettings telemetry settings + * @param defaultTracer default tracer instance + */ + public WrappedTracer(TelemetrySettings telemetrySettings, Tracer defaultTracer) { + this.defaultTracer = defaultTracer; + this.telemetrySettings = telemetrySettings; + } + + @Override + public SpanScope startSpan(String spanName) { + Tracer delegateTracer = getDelegateTracer(); + return delegateTracer.startSpan(spanName); + } + + @Override + public void close() throws IOException { + defaultTracer.close(); + } + + // visible for testing + Tracer getDelegateTracer() { + return telemetrySettings.isTracingEnabled() ? defaultTracer : NoopTracer.INSTANCE; + } +} diff --git a/server/src/test/java/org/opensearch/telemetry/tracing/TracerFactoryTests.java b/server/src/test/java/org/opensearch/telemetry/tracing/TracerFactoryTests.java index df9cdd6669d23..0ffccee505d43 100644 --- a/server/src/test/java/org/opensearch/telemetry/tracing/TracerFactoryTests.java +++ b/server/src/test/java/org/opensearch/telemetry/tracing/TracerFactoryTests.java @@ -36,19 +36,20 @@ public void close() { tracerFactory.close(); } - public void testGetTracerWithTracingDisabledReturnsNoopTracer() { + public void testGetTracerWithUnavailableTracingTelemetryReturnsNoopTracer() { Settings settings = Settings.builder().put(TelemetrySettings.TRACER_ENABLED_SETTING.getKey(), false).build(); TelemetrySettings telemetrySettings = new TelemetrySettings(settings, new ClusterSettings(settings, getClusterSettings())); Telemetry mockTelemetry = mock(Telemetry.class); when(mockTelemetry.getTracingTelemetry()).thenReturn(mock(TracingTelemetry.class)); - tracerFactory = new TracerFactory(telemetrySettings, Optional.of(mockTelemetry), new ThreadContext(Settings.EMPTY)); + tracerFactory = new TracerFactory(telemetrySettings, Optional.empty(), new ThreadContext(Settings.EMPTY)); Tracer tracer = tracerFactory.getTracer(); + assertTrue(tracer instanceof NoopTracer); assertTrue(tracer.startSpan("foo") == SpanScope.NO_OP); } - public void testGetTracerWithTracingEnabledReturnsDefaultTracer() { + public void testGetTracerWithAvailableTracingTelemetryReturnsWrappedTracer() { Settings settings = Settings.builder().put(TelemetrySettings.TRACER_ENABLED_SETTING.getKey(), true).build(); TelemetrySettings telemetrySettings = new TelemetrySettings(settings, new ClusterSettings(settings, getClusterSettings())); Telemetry mockTelemetry = mock(Telemetry.class); @@ -56,7 +57,7 @@ public void testGetTracerWithTracingEnabledReturnsDefaultTracer() { tracerFactory = new TracerFactory(telemetrySettings, Optional.of(mockTelemetry), new ThreadContext(Settings.EMPTY)); Tracer tracer = tracerFactory.getTracer(); - assertTrue(tracer instanceof DefaultTracer); + assertTrue(tracer instanceof WrappedTracer); } diff --git a/server/src/test/java/org/opensearch/telemetry/tracing/WrappedTracerTests.java b/server/src/test/java/org/opensearch/telemetry/tracing/WrappedTracerTests.java new file mode 100644 index 0000000000000..d1abc5a4d98aa --- /dev/null +++ b/server/src/test/java/org/opensearch/telemetry/tracing/WrappedTracerTests.java @@ -0,0 +1,69 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.telemetry.tracing; + +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.telemetry.TelemetrySettings; +import org.opensearch.telemetry.tracing.noop.NoopTracer; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; + +public class WrappedTracerTests extends OpenSearchTestCase { + + public void testStartSpanWithTracingDisabledInvokesNoopTracer() throws Exception { + Settings settings = Settings.builder().put(TelemetrySettings.TRACER_ENABLED_SETTING.getKey(), false).build(); + TelemetrySettings telemetrySettings = new TelemetrySettings(settings, new ClusterSettings(settings, getClusterSettings())); + DefaultTracer mockDefaultTracer = mock(DefaultTracer.class); + + try (WrappedTracer wrappedTracer = new WrappedTracer(telemetrySettings, mockDefaultTracer)) { + wrappedTracer.startSpan("foo"); + assertTrue(wrappedTracer.getDelegateTracer() instanceof NoopTracer); + verify(mockDefaultTracer, never()).startSpan("foo"); + } + } + + public void testStartSpanWithTracingEnabledInvokesDefaultTracer() throws Exception { + Settings settings = Settings.builder().put(TelemetrySettings.TRACER_ENABLED_SETTING.getKey(), true).build(); + TelemetrySettings telemetrySettings = new TelemetrySettings(settings, new ClusterSettings(settings, getClusterSettings())); + DefaultTracer mockDefaultTracer = mock(DefaultTracer.class); + + try (WrappedTracer wrappedTracer = new WrappedTracer(telemetrySettings, mockDefaultTracer)) { + wrappedTracer.startSpan("foo"); + + assertTrue(wrappedTracer.getDelegateTracer() instanceof DefaultTracer); + verify(mockDefaultTracer).startSpan("foo"); + } + } + + public void testClose() throws IOException { + DefaultTracer mockDefaultTracer = mock(DefaultTracer.class); + WrappedTracer wrappedTracer = new WrappedTracer(null, mockDefaultTracer); + + wrappedTracer.close(); + + verify(mockDefaultTracer).close(); + } + + private Set> getClusterSettings() { + Set> allTracerSettings = new HashSet<>(); + ClusterSettings.FEATURE_FLAGGED_CLUSTER_SETTINGS.get(List.of(FeatureFlags.TELEMETRY)).stream().forEach((allTracerSettings::add)); + return allTracerSettings; + } +} From 5f98d674c74ab5d24195993734bb054840906b47 Mon Sep 17 00:00:00 2001 From: Raghuvansh Raj Date: Tue, 11 Jul 2023 18:06:01 +0530 Subject: [PATCH 29/29] [Remote Store] Add multipart upload integration for translog and segment files (#7119) Signed-off-by: Raghuvansh Raj --- .../common/io/InputStreamContainer.java | 11 +- .../RemoteStoreBaseIntegTestCase.java | 4 +- .../RemoteStoreMultipartFileCorruptionIT.java | 111 +++++++++ .../multipart/RemoteStoreMultipartIT.java | 38 +++ .../multipart/mocks/MockFsBlobStore.java | 36 +++ .../multipart/mocks/MockFsRepository.java | 46 ++++ .../mocks/MockFsRepositoryPlugin.java | 38 +++ .../mocks/MockFsVerifyingBlobContainer.java | 120 +++++++++ .../VerifyingMultiStreamBlobContainer.java | 34 +++ .../transfer/RemoteTransferContainer.java | 2 +- .../org/opensearch/common/util/ByteUtils.java | 10 + .../shard/RemoteStoreRefreshListener.java | 227 ++++++++---------- .../index/store/RemoteDirectory.java | 4 + .../store/RemoteSegmentStoreDirectory.java | 127 ++++++++++ .../ChecksumCombinationException.java | 2 +- .../opensearch/index/translog/Checkpoint.java | 2 +- .../TranslogCheckedContainer.java | 17 +- .../index/translog/TranslogHeader.java | 12 +- .../index/translog/TranslogReader.java | 37 ++- .../index/translog/TranslogWriter.java | 25 +- .../index/translog/checked/package-info.java | 10 - .../transfer/BlobStoreTransferService.java | 98 +++++++- .../index/translog/transfer/FileSnapshot.java | 17 +- .../translog/transfer/TransferService.java | 28 ++- .../TranslogCheckpointTransferSnapshot.java | 10 +- .../transfer/TranslogTransferManager.java | 15 +- .../org/opensearch/threadpool/ThreadPool.java | 2 +- .../RemoteTransferContainerTests.java | 41 ++++ .../RemoteSegmentStoreDirectoryTests.java | 86 +++++++ .../index/store/TestUploadListener.java | 43 ++++ ...oreTransferServiceMockRepositoryTests.java | 189 +++++++++++++++ .../BlobStoreTransferServiceTests.java | 22 +- .../translog/transfer/FileSnapshotTests.java | 6 +- .../transfer/FileTransferTrackerTests.java | 12 +- .../TranslogTransferManagerTests.java | 31 ++- 35 files changed, 1293 insertions(+), 220 deletions(-) rename {server => libs/common}/src/main/java/org/opensearch/common/io/InputStreamContainer.java (85%) create mode 100644 server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/RemoteStoreMultipartFileCorruptionIT.java create mode 100644 server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/RemoteStoreMultipartIT.java create mode 100644 server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsBlobStore.java create mode 100644 server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsRepository.java create mode 100644 server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsRepositoryPlugin.java create mode 100644 server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsVerifyingBlobContainer.java create mode 100644 server/src/main/java/org/opensearch/common/blobstore/VerifyingMultiStreamBlobContainer.java rename server/src/main/java/org/opensearch/index/translog/{checked => }/TranslogCheckedContainer.java (74%) delete mode 100644 server/src/main/java/org/opensearch/index/translog/checked/package-info.java create mode 100644 server/src/test/java/org/opensearch/index/store/TestUploadListener.java create mode 100644 server/src/test/java/org/opensearch/index/translog/transfer/BlobStoreTransferServiceMockRepositoryTests.java diff --git a/server/src/main/java/org/opensearch/common/io/InputStreamContainer.java b/libs/common/src/main/java/org/opensearch/common/io/InputStreamContainer.java similarity index 85% rename from server/src/main/java/org/opensearch/common/io/InputStreamContainer.java rename to libs/common/src/main/java/org/opensearch/common/io/InputStreamContainer.java index ce5dcff9f5349..eb8a4e1382497 100644 --- a/server/src/main/java/org/opensearch/common/io/InputStreamContainer.java +++ b/libs/common/src/main/java/org/opensearch/common/io/InputStreamContainer.java @@ -19,6 +19,7 @@ public class InputStreamContainer { private final InputStream inputStream; private final long contentLength; + private final long offset; /** * Construct a new stream object @@ -26,9 +27,10 @@ public class InputStreamContainer { * @param inputStream The input stream that is to be encapsulated * @param contentLength The total content length that is to be read from the stream */ - public InputStreamContainer(InputStream inputStream, long contentLength) { + public InputStreamContainer(InputStream inputStream, long contentLength, long offset) { this.inputStream = inputStream; this.contentLength = contentLength; + this.offset = offset; } /** @@ -44,4 +46,11 @@ public InputStream getInputStream() { public long getContentLength() { return contentLength; } + + /** + * @return offset of the source content. + */ + public long getOffset() { + return offset; + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java index 2b3fcadfc645e..10f01749ab4c5 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java @@ -102,9 +102,7 @@ protected void putRepository(Path path) { protected void setupRepo() { internalCluster().startClusterManagerOnlyNode(); absolutePath = randomRepoPath().toAbsolutePath(); - assertAcked( - clusterAdmin().preparePutRepository(REPOSITORY_NAME).setType("fs").setSettings(Settings.builder().put("location", absolutePath)) - ); + putRepository(absolutePath); } @After diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/RemoteStoreMultipartFileCorruptionIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/RemoteStoreMultipartFileCorruptionIT.java new file mode 100644 index 0000000000000..8f375ca6e2b01 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/RemoteStoreMultipartFileCorruptionIT.java @@ -0,0 +1,111 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore.multipart; + +import org.junit.After; +import org.junit.Before; +import org.opensearch.action.index.IndexResponse; +import org.opensearch.action.support.IndicesOptions; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.UUIDs; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.index.IndexModule; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.plugins.Plugin; +import org.opensearch.remotestore.multipart.mocks.MockFsRepository; +import org.opensearch.remotestore.multipart.mocks.MockFsRepositoryPlugin; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.nio.file.Path; +import java.util.Collection; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; + +public class RemoteStoreMultipartFileCorruptionIT extends OpenSearchIntegTestCase { + + protected static final String REPOSITORY_NAME = "test-remore-store-repo"; + private static final String INDEX_NAME = "remote-store-test-idx-1"; + + @Override + protected Collection> nodePlugins() { + return Stream.concat(super.nodePlugins().stream(), Stream.of(MockFsRepositoryPlugin.class)).collect(Collectors.toList()); + } + + @Override + protected Settings featureFlagSettings() { + return Settings.builder().put(super.featureFlagSettings()).put(FeatureFlags.REMOTE_STORE, "true").build(); + } + + @Before + public void setup() { + internalCluster().startClusterManagerOnlyNode(); + Path absolutePath = randomRepoPath().toAbsolutePath(); + putRepository(absolutePath); + } + + protected void putRepository(Path path) { + assertAcked( + clusterAdmin().preparePutRepository(REPOSITORY_NAME) + .setType(MockFsRepositoryPlugin.TYPE) + .setSettings( + Settings.builder() + .put("location", path) + // custom setting for MockFsRepositoryPlugin + .put(MockFsRepository.TRIGGER_DATA_INTEGRITY_FAILURE.getKey(), true) + ) + ); + } + + @After + public void teardown() { + assertAcked(clusterAdmin().prepareDeleteRepository(REPOSITORY_NAME)); + } + + protected Settings remoteStoreIndexSettings() { + return Settings.builder() + .put(super.indexSettings()) + .put("index.refresh_interval", "300s") + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING.getKey(), false) + .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .put(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, true) + .put(IndexMetadata.SETTING_REMOTE_STORE_REPOSITORY, REPOSITORY_NAME) + .build(); + } + + private IndexResponse indexSingleDoc() { + return client().prepareIndex(INDEX_NAME) + .setId(UUIDs.randomBase64UUID()) + .setSource(randomAlphaOfLength(5), randomAlphaOfLength(5)) + .get(); + } + + public void testLocalFileCorruptionDuringUpload() { + internalCluster().startDataOnlyNodes(1); + createIndex(INDEX_NAME, remoteStoreIndexSettings()); + ensureYellowAndNoInitializingShards(INDEX_NAME); + ensureGreen(INDEX_NAME); + + indexSingleDoc(); + + client().admin() + .indices() + .prepareRefresh(INDEX_NAME) + .setIndicesOptions(IndicesOptions.STRICT_EXPAND_OPEN_HIDDEN_FORBID_CLOSED) + .execute() + .actionGet(); + + // ensuring red cluster meaning shard has failed and is unassigned + ensureRed(INDEX_NAME); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/RemoteStoreMultipartIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/RemoteStoreMultipartIT.java new file mode 100644 index 0000000000000..a523d5c0f5470 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/RemoteStoreMultipartIT.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore.multipart; + +import org.opensearch.common.settings.Settings; +import org.opensearch.plugins.Plugin; +import org.opensearch.remotestore.RemoteStoreIT; +import org.opensearch.remotestore.multipart.mocks.MockFsRepositoryPlugin; + +import java.nio.file.Path; +import java.util.Collection; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; + +public class RemoteStoreMultipartIT extends RemoteStoreIT { + + @Override + protected Collection> nodePlugins() { + return Stream.concat(super.nodePlugins().stream(), Stream.of(MockFsRepositoryPlugin.class)).collect(Collectors.toList()); + } + + @Override + protected void putRepository(Path path) { + assertAcked( + clusterAdmin().preparePutRepository(REPOSITORY_NAME) + .setType(MockFsRepositoryPlugin.TYPE) + .setSettings(Settings.builder().put("location", path)) + ); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsBlobStore.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsBlobStore.java new file mode 100644 index 0000000000000..f1d9fbba84528 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsBlobStore.java @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore.multipart.mocks; + +import org.opensearch.OpenSearchException; +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.fs.FsBlobStore; + +import java.io.IOException; +import java.nio.file.Path; + +public class MockFsBlobStore extends FsBlobStore { + + private final boolean triggerDataIntegrityFailure; + + public MockFsBlobStore(int bufferSizeInBytes, Path path, boolean readonly, boolean triggerDataIntegrityFailure) throws IOException { + super(bufferSizeInBytes, path, readonly); + this.triggerDataIntegrityFailure = triggerDataIntegrityFailure; + } + + @Override + public BlobContainer blobContainer(BlobPath path) { + try { + return new MockFsVerifyingBlobContainer(this, path, buildAndCreate(path), triggerDataIntegrityFailure); + } catch (IOException ex) { + throw new OpenSearchException("failed to create blob container", ex); + } + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsRepository.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsRepository.java new file mode 100644 index 0000000000000..15a9853477081 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsRepository.java @@ -0,0 +1,46 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore.multipart.mocks; + +import org.opensearch.cluster.metadata.RepositoryMetadata; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.blobstore.BlobStore; +import org.opensearch.common.blobstore.fs.FsBlobStore; +import org.opensearch.common.settings.Setting; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.env.Environment; +import org.opensearch.indices.recovery.RecoverySettings; +import org.opensearch.repositories.fs.FsRepository; + +public class MockFsRepository extends FsRepository { + + public static Setting TRIGGER_DATA_INTEGRITY_FAILURE = Setting.boolSetting( + "mock_fs_repository.trigger_data_integrity_failure", + false + ); + + private final boolean triggerDataIntegrityFailure; + + public MockFsRepository( + RepositoryMetadata metadata, + Environment environment, + NamedXContentRegistry namedXContentRegistry, + ClusterService clusterService, + RecoverySettings recoverySettings + ) { + super(metadata, environment, namedXContentRegistry, clusterService, recoverySettings); + triggerDataIntegrityFailure = TRIGGER_DATA_INTEGRITY_FAILURE.get(metadata.settings()); + } + + @Override + protected BlobStore createBlobStore() throws Exception { + FsBlobStore fsBlobStore = (FsBlobStore) super.createBlobStore(); + return new MockFsBlobStore(fsBlobStore.bufferSizeInBytes(), fsBlobStore.path(), isReadOnly(), triggerDataIntegrityFailure); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsRepositoryPlugin.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsRepositoryPlugin.java new file mode 100644 index 0000000000000..ffd53adf4e29e --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsRepositoryPlugin.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore.multipart.mocks; + +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.env.Environment; +import org.opensearch.indices.recovery.RecoverySettings; +import org.opensearch.plugins.Plugin; +import org.opensearch.plugins.RepositoryPlugin; +import org.opensearch.repositories.Repository; + +import java.util.Collections; +import java.util.Map; + +public class MockFsRepositoryPlugin extends Plugin implements RepositoryPlugin { + + public static final String TYPE = "fs_multipart_repository"; + + @Override + public Map getRepositories( + Environment env, + NamedXContentRegistry namedXContentRegistry, + ClusterService clusterService, + RecoverySettings recoverySettings + ) { + return Collections.singletonMap( + "fs_multipart_repository", + metadata -> new MockFsRepository(metadata, env, namedXContentRegistry, clusterService, recoverySettings) + ); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsVerifyingBlobContainer.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsVerifyingBlobContainer.java new file mode 100644 index 0000000000000..8f2814eb7c4c4 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/multipart/mocks/MockFsVerifyingBlobContainer.java @@ -0,0 +1,120 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore.multipart.mocks; + +import org.apache.lucene.index.CorruptIndexException; +import org.opensearch.action.ActionListener; +import org.opensearch.common.blobstore.VerifyingMultiStreamBlobContainer; +import org.opensearch.common.io.InputStreamContainer; +import org.opensearch.common.StreamContext; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.fs.FsBlobContainer; +import org.opensearch.common.blobstore.fs.FsBlobStore; +import org.opensearch.common.blobstore.stream.write.WriteContext; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; + +public class MockFsVerifyingBlobContainer extends FsBlobContainer implements VerifyingMultiStreamBlobContainer { + + private static final int TRANSFER_TIMEOUT_MILLIS = 30000; + + private final boolean triggerDataIntegrityFailure; + + public MockFsVerifyingBlobContainer(FsBlobStore blobStore, BlobPath blobPath, Path path, boolean triggerDataIntegrityFailure) { + super(blobStore, blobPath, path); + this.triggerDataIntegrityFailure = triggerDataIntegrityFailure; + } + + @Override + public void asyncBlobUpload(WriteContext writeContext, ActionListener completionListener) throws IOException { + + int nParts = 10; + long partSize = writeContext.getFileSize() / nParts; + StreamContext streamContext = writeContext.getStreamProvider(partSize); + final Path file = path.resolve(writeContext.getFileName()); + byte[] buffer = new byte[(int) writeContext.getFileSize()]; + AtomicLong totalContentRead = new AtomicLong(); + CountDownLatch latch = new CountDownLatch(streamContext.getNumberOfParts()); + for (int partIdx = 0; partIdx < streamContext.getNumberOfParts(); partIdx++) { + int finalPartIdx = partIdx; + Thread thread = new Thread(() -> { + try { + InputStreamContainer inputStreamContainer = streamContext.provideStream(finalPartIdx); + InputStream inputStream = inputStreamContainer.getInputStream(); + long remainingContentLength = inputStreamContainer.getContentLength(); + long offset = partSize * finalPartIdx; + while (remainingContentLength > 0) { + int readContentLength = inputStream.read(buffer, (int) offset, (int) remainingContentLength); + totalContentRead.addAndGet(readContentLength); + remainingContentLength -= readContentLength; + offset += readContentLength; + } + inputStream.close(); + } catch (IOException e) { + completionListener.onFailure(e); + } finally { + latch.countDown(); + } + }); + thread.start(); + } + try { + if (!latch.await(TRANSFER_TIMEOUT_MILLIS, TimeUnit.MILLISECONDS)) { + throw new IOException("Timed out waiting for file transfer to complete for " + writeContext.getFileName()); + } + } catch (InterruptedException e) { + throw new IOException("Await interrupted on CountDownLatch, transfer failed for " + writeContext.getFileName()); + } + try (OutputStream outputStream = Files.newOutputStream(file, StandardOpenOption.CREATE_NEW)) { + outputStream.write(buffer); + } + if (writeContext.getFileSize() != totalContentRead.get()) { + throw new IOException( + "Incorrect content length read for file " + + writeContext.getFileName() + + ", actual file size: " + + writeContext.getFileSize() + + ", bytes read: " + + totalContentRead.get() + ); + } + + try { + // bulks need to succeed for segment files to be generated + if (isSegmentFile(writeContext.getFileName()) && triggerDataIntegrityFailure) { + completionListener.onFailure( + new RuntimeException( + new CorruptIndexException( + "Data integrity check failure for file: " + writeContext.getFileName(), + writeContext.getFileName() + ) + ) + ); + } else { + writeContext.getUploadFinalizer().accept(true); + completionListener.onResponse(null); + } + } catch (Exception e) { + completionListener.onFailure(e); + } + + } + + private boolean isSegmentFile(String filename) { + return !filename.endsWith(".tlog") && !filename.endsWith(".ckp"); + } +} diff --git a/server/src/main/java/org/opensearch/common/blobstore/VerifyingMultiStreamBlobContainer.java b/server/src/main/java/org/opensearch/common/blobstore/VerifyingMultiStreamBlobContainer.java new file mode 100644 index 0000000000000..0dfcc5c50e4b1 --- /dev/null +++ b/server/src/main/java/org/opensearch/common/blobstore/VerifyingMultiStreamBlobContainer.java @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.blobstore; + +import org.opensearch.action.ActionListener; +import org.opensearch.common.blobstore.stream.write.WriteContext; + +import java.io.IOException; + +/** + * An extension of {@link BlobContainer} that adds {@link VerifyingMultiStreamBlobContainer#asyncBlobUpload} to allow + * multipart uploads and performs integrity checks on transferred files + * + * @opensearch.internal + */ +public interface VerifyingMultiStreamBlobContainer extends BlobContainer { + + /** + * Reads blob content from multiple streams, each from a specific part of the file, which is provided by the + * StreamContextSupplier in the WriteContext passed to this method. An {@link IOException} is thrown if reading + * any of the input streams fails, or writing to the target blob fails + * + * @param writeContext A WriteContext object encapsulating all information needed to perform the upload + * @param completionListener Listener on which upload events should be published. + * @throws IOException if any of the input streams could not be read, or the target blob could not be written to + */ + void asyncBlobUpload(WriteContext writeContext, ActionListener completionListener) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainer.java b/server/src/main/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainer.java index 7864c3ab5c794..ca744efae902d 100644 --- a/server/src/main/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainer.java +++ b/server/src/main/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainer.java @@ -160,7 +160,7 @@ private LocalStreamSupplier getMultipartStreamSupplier( : offsetRangeInputStream; Objects.requireNonNull(inputStreams.get())[streamIdx] = inputStream; - return new InputStreamContainer(inputStream, size); + return new InputStreamContainer(inputStream, size, position); } catch (IOException e) { log.error("Failed to create input stream", e); throw e; diff --git a/server/src/main/java/org/opensearch/common/util/ByteUtils.java b/server/src/main/java/org/opensearch/common/util/ByteUtils.java index 36ae3b1f5bcaa..8c7665d991751 100644 --- a/server/src/main/java/org/opensearch/common/util/ByteUtils.java +++ b/server/src/main/java/org/opensearch/common/util/ByteUtils.java @@ -61,6 +61,16 @@ public static void writeLongLE(long l, byte[] arr, int offset) { assert l == 0; } + /** Convert long to a byte array in big-endian format */ + public static byte[] toByteArrayBE(long l) { + byte[] result = new byte[8]; + for (int i = 7; i >= 0; i--) { + result[i] = (byte) (l & 0xffL); + l >>= 8; + } + return result; + } + /** Write a long in little-endian format. */ public static long readLongLE(byte[] arr, int offset) { long l = arr[offset++] & 0xFFL; diff --git a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java index 46d52bc8ca5df..e087bbb265727 100644 --- a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java +++ b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java @@ -11,6 +11,7 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.search.ReferenceManager; @@ -18,11 +19,14 @@ import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; +import org.opensearch.action.ActionListener; +import org.opensearch.action.LatchedActionListener; import org.opensearch.action.bulk.BackoffPolicy; -import org.opensearch.common.CheckedFunction; +import org.opensearch.action.support.GroupedActionListener; import org.opensearch.common.concurrent.GatedCloseable; import org.opensearch.common.logging.Loggers; import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.UploadListener; import org.opensearch.common.util.concurrent.ConcurrentCollections; import org.opensearch.index.engine.EngineException; import org.opensearch.index.engine.InternalEngine; @@ -46,6 +50,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; @@ -110,7 +115,7 @@ public final class RemoteStoreRefreshListener implements ReferenceManager.Refres private final SegmentReplicationCheckpointPublisher checkpointPublisher; - private final FileUploader fileUploader; + private final UploadListener statsListener; public RemoteStoreRefreshListener( IndexShard indexShard, @@ -137,7 +142,7 @@ public RemoteStoreRefreshListener( this.segmentTracker = segmentTracker; resetBackOffDelayIterator(); this.checkpointPublisher = checkpointPublisher; - this.fileUploader = new FileUploader(new UploadTracker() { + this.statsListener = new UploadListener() { @Override public void beforeUpload(String file) { // Start tracking the upload bytes started @@ -156,7 +161,7 @@ public void onFailure(String file) { // Track upload failure segmentTracker.addUploadBytesFailed(latestFileNameSizeOnLocalMap.get(file)); } - }, remoteDirectory, storeDirectory, this::getChecksumOfLocalFile, logger); + }; } @Override @@ -190,7 +195,7 @@ private synchronized void syncSegments(boolean isRetry) { long refreshTimeMs = segmentTracker.getLocalRefreshTimeMs(), refreshClockTimeMs = segmentTracker.getLocalRefreshClockTimeMs(); long refreshSeqNo = segmentTracker.getLocalRefreshSeqNo(); long bytesBeforeUpload = segmentTracker.getUploadBytesSucceeded(), startTimeInNS = System.nanoTime(); - boolean shouldRetry = true; + try { if (this.primaryTerm != indexShard.getOperationPrimaryTerm()) { @@ -242,18 +247,51 @@ private synchronized void syncSegments(boolean isRetry) { // Create a map of file name to size and update the refresh segment tracker updateLocalSizeMapAndTracker(localSegmentsPostRefresh); + CountDownLatch latch = new CountDownLatch(1); + ActionListener segmentUploadsCompletedListener = new LatchedActionListener<>(new ActionListener<>() { + @Override + public void onResponse(Void unused) { + boolean shouldRetry = true; + try { + // Start metadata file upload + uploadMetadata(localSegmentsPostRefresh, segmentInfos); + clearStaleFilesFromLocalSegmentChecksumMap(localSegmentsPostRefresh); + onSuccessfulSegmentsSync( + refreshTimeMs, + refreshClockTimeMs, + refreshSeqNo, + lastRefreshedCheckpoint, + checkpoint + ); + // At this point since we have uploaded new segments, segment infos and segment metadata file, + // along with marking minSeqNoToKeep, upload has succeeded completely. + shouldRetry = false; + } catch (Exception e) { + // We don't want to fail refresh if upload of new segments fails. The missed segments will be re-tried + // in the next refresh. This should not affect durability of the indexed data after remote trans-log + // integration. + logger.warn("Exception in post new segment upload actions", e); + } finally { + doComplete(shouldRetry); + } + } + + @Override + public void onFailure(Exception e) { + logger.warn("Exception while uploading new segments to the remote segment store", e); + doComplete(true); + } + + private void doComplete(boolean shouldRetry) { + // Update the segment tracker with the final upload status as seen at the end + updateFinalUploadStatusInSegmentTracker(shouldRetry == false, bytesBeforeUpload, startTimeInNS); + afterSegmentsSync(isRetry, shouldRetry); + } + }, latch); // Start the segments files upload - boolean newSegmentsUploadStatus = uploadNewSegments(localSegmentsPostRefresh); - if (newSegmentsUploadStatus) { - // Start metadata file upload - uploadMetadata(localSegmentsPostRefresh, segmentInfos); - clearStaleFilesFromLocalSegmentChecksumMap(localSegmentsPostRefresh); - onSuccessfulSegmentsSync(refreshTimeMs, refreshClockTimeMs, refreshSeqNo, lastRefreshedCheckpoint, checkpoint); - // At this point since we have uploaded new segments, segment infos and segment metadata file, - // along with marking minSeqNoToKeep, upload has succeeded completely. - shouldRetry = false; - } + uploadNewSegments(localSegmentsPostRefresh, segmentUploadsCompletedListener); + latch.await(); } } catch (EngineException e) { logger.warn("Exception while reading SegmentInfosSnapshot", e); @@ -265,11 +303,7 @@ private synchronized void syncSegments(boolean isRetry) { } } catch (Throwable t) { logger.error("Exception in RemoteStoreRefreshListener.afterRefresh()", t); - } finally { - // Update the segment tracker with the final upload status as seen at the end - updateFinalUploadStatusInSegmentTracker(shouldRetry == false, bytesBeforeUpload, startTimeInNS); } - afterSegmentsSync(isRetry, shouldRetry); } /** @@ -378,17 +412,50 @@ void uploadMetadata(Collection localSegmentsPostRefresh, SegmentInfos se } } - private boolean uploadNewSegments(Collection localSegmentsPostRefresh) throws IOException { - AtomicBoolean uploadSuccess = new AtomicBoolean(true); - localSegmentsPostRefresh.forEach(file -> { - try { - fileUploader.uploadFile(file); - } catch (IOException e) { - uploadSuccess.set(false); - logger.warn(() -> new ParameterizedMessage("Exception while uploading file {} to the remote segment store", file), e); - } - }); - return uploadSuccess.get(); + private void uploadNewSegments(Collection localSegmentsPostRefresh, ActionListener listener) { + Collection filteredFiles = localSegmentsPostRefresh.stream().filter(file -> !skipUpload(file)).collect(Collectors.toList()); + if (filteredFiles.size() == 0) { + listener.onResponse(null); + return; + } + + ActionListener> mappedListener = ActionListener.map(listener, resp -> null); + GroupedActionListener batchUploadListener = new GroupedActionListener<>(mappedListener, filteredFiles.size()); + + for (String src : filteredFiles) { + ActionListener aggregatedListener = ActionListener.wrap(resp -> { + statsListener.onSuccess(src); + batchUploadListener.onResponse(resp); + }, ex -> { + logger.warn(() -> new ParameterizedMessage("Exception: [{}] while uploading segment files", ex), ex); + if (ex instanceof CorruptIndexException) { + indexShard.failShard(ex.getMessage(), ex); + } + statsListener.onFailure(src); + batchUploadListener.onFailure(ex); + }); + statsListener.beforeUpload(src); + remoteDirectory.copyFrom(storeDirectory, src, IOContext.DEFAULT, aggregatedListener); + } + } + + /** + * Whether to upload a file or not depending on whether file is in excluded list or has been already uploaded. + * + * @param file that needs to be uploaded. + * @return true if the upload has to be skipped for the file. + */ + private boolean skipUpload(String file) { + try { + // Exclude files that are already uploaded and the exclude files to come up with the list of files to be uploaded. + return EXCLUDE_FILES.contains(file) || remoteDirectory.containsFile(file, getChecksumOfLocalFile(file)); + } catch (IOException e) { + logger.error( + "Exception while reading checksum of local segment file: {}, ignoring the exception and re-uploading the file", + file + ); + } + return false; } private String getChecksumOfLocalFile(String file) throws IOException { @@ -462,104 +529,4 @@ private void updateFinalUploadStatusInSegmentTracker(boolean uploadStatus, long segmentTracker.incrementTotalUploadsFailed(); } } - - /** - * This class is a wrapper over the copying of file from local to remote store allowing to decorate the actual copy - * method along with adding hooks of code that can be run before, on success and on failure. - * - * @opensearch.internal - */ - private static class FileUploader { - - private final Logger logger; - - private final UploadTracker uploadTracker; - - private final RemoteSegmentStoreDirectory remoteDirectory; - - private final Directory storeDirectory; - - private final CheckedFunction checksumProvider; - - public FileUploader( - UploadTracker uploadTracker, - RemoteSegmentStoreDirectory remoteDirectory, - Directory storeDirectory, - CheckedFunction checksumProvider, - Logger logger - ) { - this.uploadTracker = uploadTracker; - this.remoteDirectory = remoteDirectory; - this.storeDirectory = storeDirectory; - this.checksumProvider = checksumProvider; - this.logger = logger; - } - - /** - * Calling this method will lead to before getting executed and then the actual upload. Based on the upload status, - * the onSuccess or onFailure method gets invoked. - * - * @param file the file which is to be uploaded. - * @throws IOException is thrown if the upload fails. - */ - private void uploadFile(String file) throws IOException { - if (skipUpload(file)) { - return; - } - uploadTracker.beforeUpload(file); - boolean success = false; - try { - performUpload(file); - uploadTracker.onSuccess(file); - success = true; - } finally { - if (!success) { - uploadTracker.onFailure(file); - } - } - } - - /** - * Whether to upload a file or not depending on whether file is in excluded list or has been already uploaded. - * - * @param file that needs to be uploaded. - * @return true if the upload has to be skipped for the file. - */ - private boolean skipUpload(String file) { - try { - // Exclude files that are already uploaded and the exclude files to come up with the list of files to be uploaded. - return EXCLUDE_FILES.contains(file) || remoteDirectory.containsFile(file, checksumProvider.apply(file)); - } catch (IOException e) { - logger.error( - "Exception while reading checksum of local segment file: {}, ignoring the exception and re-uploading the file", - file - ); - } - return false; - } - - /** - * This method does the actual upload. - * - * @param file that needs to be uploaded. - * @throws IOException is thrown if the upload fails. - */ - private void performUpload(String file) throws IOException { - remoteDirectory.copyFrom(storeDirectory, file, file, IOContext.DEFAULT); - } - } - - /** - * A tracker class that is fed to FileUploader. - * - * @opensearch.internal - */ - interface UploadTracker { - - void beforeUpload(String file); - - void onSuccess(String file); - - void onFailure(String file); - } } diff --git a/server/src/main/java/org/opensearch/index/store/RemoteDirectory.java b/server/src/main/java/org/opensearch/index/store/RemoteDirectory.java index 8782808c070ab..f7fe7ca62e6ba 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteDirectory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteDirectory.java @@ -45,6 +45,10 @@ public class RemoteDirectory extends Directory { protected final BlobContainer blobContainer; + public BlobContainer getBlobContainer() { + return blobContainer; + } + public RemoteDirectory(BlobContainer blobContainer) { this.blobContainer = blobContainer; } diff --git a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java index e7602203440d2..395ecba442e86 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java @@ -8,9 +8,12 @@ package org.opensearch.index.store; +import com.jcraft.jzlib.JZlib; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.store.ByteBuffersDataOutput; import org.apache.lucene.store.ByteBuffersIndexOutput; @@ -19,10 +22,20 @@ import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; +import org.opensearch.ExceptionsHelper; +import org.opensearch.action.ActionListener; import org.opensearch.common.UUIDs; +import org.opensearch.common.blobstore.VerifyingMultiStreamBlobContainer; +import org.opensearch.common.blobstore.exception.CorruptFileException; +import org.opensearch.common.blobstore.stream.write.WriteContext; +import org.opensearch.common.blobstore.stream.write.WritePriority; +import org.opensearch.common.blobstore.transfer.RemoteTransferContainer; +import org.opensearch.common.blobstore.transfer.stream.OffsetRangeIndexInputStream; import org.opensearch.common.io.VersionedCodecStreamWrapper; import org.opensearch.common.lucene.store.ByteArrayIndexInput; +import org.opensearch.common.util.ByteUtils; import org.opensearch.index.remote.RemoteStoreUtils; +import org.opensearch.index.store.exception.ChecksumCombinationException; import org.opensearch.index.store.lockmanager.FileLockInfo; import org.opensearch.index.store.lockmanager.RemoteStoreCommitLevelLockManager; import org.opensearch.index.store.lockmanager.RemoteStoreLockManager; @@ -44,6 +57,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; +import java.util.zip.CRC32; /** * A RemoteDirectory extension for remote segment store. We need to make sure we don't overwrite a segment file once uploaded. @@ -62,6 +76,11 @@ public final class RemoteSegmentStoreDirectory extends FilterDirectory implement */ public static final String SEGMENT_NAME_UUID_SEPARATOR = "__"; + /** + * Number of bytes in the segment file to store checksum + */ + private static final int SEGMENT_CHECKSUM_BYTES = 8; + /** * remoteDataDirectory is used to store segment files at path: cluster_UUID/index_UUID/shardId/segments/data */ @@ -349,6 +368,89 @@ public IndexInput openInput(String name, IOContext context) throws IOException { } } + /** + * Copies a file from the source directory to a remote based on multi-stream upload support. + * If vendor plugin supports uploading multiple parts in parallel, BlobContainer#writeBlobByStreams + * will be used, else, the legacy {@link RemoteSegmentStoreDirectory#copyFrom(Directory, String, String, IOContext)} + * will be called. + * + * @param from The directory for the file to be uploaded + * @param src File to be uploaded + * @param context IOContext to be used to open IndexInput of file during remote upload + * @param listener Listener to handle upload callback events + */ + public void copyFrom(Directory from, String src, IOContext context, ActionListener listener) { + if (remoteDataDirectory.getBlobContainer() instanceof VerifyingMultiStreamBlobContainer) { + try { + String remoteFilename = getNewRemoteSegmentFilename(src); + uploadBlob(from, src, remoteFilename, context, listener); + } catch (Exception e) { + listener.onFailure(e); + } + } else { + try { + copyFrom(from, src, src, context); + listener.onResponse(null); + } catch (Exception e) { + logger.warn(() -> new ParameterizedMessage("Exception while uploading file {} to the remote segment store", src), e); + listener.onFailure(e); + } + } + } + + private void uploadBlob(Directory from, String src, String remoteFileName, IOContext ioContext, ActionListener listener) + throws Exception { + long expectedChecksum = calculateChecksumOfChecksum(from, src); + long contentLength; + try (IndexInput indexInput = from.openInput(src, ioContext)) { + contentLength = indexInput.length(); + } + RemoteTransferContainer remoteTransferContainer = new RemoteTransferContainer( + src, + remoteFileName, + contentLength, + true, + WritePriority.NORMAL, + (size, position) -> new OffsetRangeIndexInputStream(from.openInput(src, ioContext), size, position), + expectedChecksum, + remoteDataDirectory.getBlobContainer() instanceof VerifyingMultiStreamBlobContainer + ); + ActionListener completionListener = ActionListener.wrap(resp -> { + try { + postUpload(from, src, remoteFileName, getChecksumOfLocalFile(from, src)); + listener.onResponse(null); + } catch (Exception e) { + logger.error(() -> new ParameterizedMessage("Exception in segment postUpload for file [{}]", src), e); + listener.onFailure(e); + } + }, ex -> { + logger.error(() -> new ParameterizedMessage("Failed to upload blob {}", src), ex); + IOException corruptIndexException = ExceptionsHelper.unwrapCorruption(ex); + if (corruptIndexException != null) { + listener.onFailure(corruptIndexException); + return; + } + Throwable throwable = ExceptionsHelper.unwrap(ex, CorruptFileException.class); + if (throwable != null) { + CorruptFileException corruptFileException = (CorruptFileException) throwable; + listener.onFailure(new CorruptIndexException(corruptFileException.getMessage(), corruptFileException.getFileName())); + return; + } + listener.onFailure(ex); + }); + + completionListener = ActionListener.runBefore(completionListener, () -> { + try { + remoteTransferContainer.close(); + } catch (Exception e) { + logger.warn("Error occurred while closing streams", e); + } + }); + + WriteContext writeContext = remoteTransferContainer.createWriteContext(); + ((VerifyingMultiStreamBlobContainer) remoteDataDirectory.getBlobContainer()).asyncBlobUpload(writeContext, completionListener); + } + /** * This acquires a lock on a given commit by creating a lock file in lock directory using {@code FileLockInfo} * @param primaryTerm Primary Term of index at the time of commit. @@ -425,6 +527,10 @@ public void copyFrom(Directory from, String src, String dest, IOContext context, String remoteFilename; remoteFilename = getNewRemoteSegmentFilename(dest); remoteDataDirectory.copyFrom(from, src, remoteFilename, context); + postUpload(from, src, remoteFilename, checksum); + } + + private void postUpload(Directory from, String src, String remoteFilename, String checksum) throws IOException { UploadedSegmentMetadata segmentMetadata = new UploadedSegmentMetadata(src, remoteFilename, checksum, from.fileLength(src)); segmentsUploadedToRemoteStore.put(src, segmentMetadata); } @@ -528,6 +634,27 @@ private String getChecksumOfLocalFile(Directory directory, String file) throws I } } + private long calculateChecksumOfChecksum(Directory directory, String file) throws IOException { + try (IndexInput indexInput = directory.openInput(file, IOContext.DEFAULT)) { + long storedChecksum = CodecUtil.retrieveChecksum(indexInput); + CRC32 checksumOfChecksum = new CRC32(); + checksumOfChecksum.update(ByteUtils.toByteArrayBE(storedChecksum)); + try { + return JZlib.crc32_combine(storedChecksum, checksumOfChecksum.getValue(), SEGMENT_CHECKSUM_BYTES); + } catch (Exception e) { + throw new ChecksumCombinationException( + "Potentially corrupted file: Checksum combination failed while combining stored checksum " + + "and calculated checksum of stored checksum in segment file: " + + file + + ", directory: " + + directory, + file, + e + ); + } + } + } + private String getExistingRemoteFilename(String localFilename) { if (segmentsUploadedToRemoteStore.containsKey(localFilename)) { return segmentsUploadedToRemoteStore.get(localFilename).uploadedFilename; diff --git a/server/src/main/java/org/opensearch/index/store/exception/ChecksumCombinationException.java b/server/src/main/java/org/opensearch/index/store/exception/ChecksumCombinationException.java index a355473aa2afd..d8e1739fbaa9d 100644 --- a/server/src/main/java/org/opensearch/index/store/exception/ChecksumCombinationException.java +++ b/server/src/main/java/org/opensearch/index/store/exception/ChecksumCombinationException.java @@ -11,7 +11,7 @@ import org.apache.lucene.index.CorruptIndexException; /** - * Exception is raised when combination to two crc checksums fail. + * Exception is raised when combination of two CRC checksums fail. * * @opensearch.internal */ diff --git a/server/src/main/java/org/opensearch/index/translog/Checkpoint.java b/server/src/main/java/org/opensearch/index/translog/Checkpoint.java index 56de7e5daf55f..a9f905f52bc3a 100644 --- a/server/src/main/java/org/opensearch/index/translog/Checkpoint.java +++ b/server/src/main/java/org/opensearch/index/translog/Checkpoint.java @@ -233,7 +233,7 @@ public static void write(FileChannel fileChannel, Path checkpointFile, Checkpoin } } - private static byte[] createCheckpointBytes(Path checkpointFile, Checkpoint checkpoint) throws IOException { + public static byte[] createCheckpointBytes(Path checkpointFile, Checkpoint checkpoint) throws IOException { final ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream(V4_FILE_SIZE) { @Override public synchronized byte[] toByteArray() { diff --git a/server/src/main/java/org/opensearch/index/translog/checked/TranslogCheckedContainer.java b/server/src/main/java/org/opensearch/index/translog/TranslogCheckedContainer.java similarity index 74% rename from server/src/main/java/org/opensearch/index/translog/checked/TranslogCheckedContainer.java rename to server/src/main/java/org/opensearch/index/translog/TranslogCheckedContainer.java index b90794e29d2b1..7e2a38559166f 100644 --- a/server/src/main/java/org/opensearch/index/translog/checked/TranslogCheckedContainer.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogCheckedContainer.java @@ -6,13 +6,10 @@ * compatible open source license. */ -package org.opensearch.index.translog.checked; +package org.opensearch.index.translog; -import org.opensearch.common.io.Channels; import org.opensearch.common.util.concurrent.ReleasableLock; -import java.io.IOException; -import java.nio.channels.FileChannel; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantLock; import java.util.zip.CRC32; @@ -28,21 +25,15 @@ public class TranslogCheckedContainer { private final Checksum checksum; private final AtomicLong contentLength; private final ReleasableLock updateLock = new ReleasableLock(new ReentrantLock()); - private final String file; /** - * Creates TranslogCheckedContainer from provided channel. + * Create TranslogCheckedContainer from provided bytes * - * @param channel {@link FileChannel} to read from - * @param offset offset of channel from which bytes are to be read. - * @param len Length of bytes to be read. + * @param bytes The byte array to read from */ - public TranslogCheckedContainer(FileChannel channel, int offset, int len, String file) throws IOException { + public TranslogCheckedContainer(byte[] bytes) { this.checksum = new CRC32(); this.contentLength = new AtomicLong(); - this.file = file; - - byte[] bytes = Channels.readFromFileChannel(channel, offset, len); updateFromBytes(bytes, 0, bytes.length); } diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java b/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java index af6ebcf7b7c66..8067cccb772a2 100644 --- a/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogHeader.java @@ -46,6 +46,7 @@ import java.io.EOFException; import java.io.IOException; +import java.io.OutputStream; import java.nio.channels.FileChannel; import java.nio.file.Path; @@ -213,12 +214,10 @@ private static void tryReportOldVersionError(final Path path, final FileChannel /** * Writes this header with the latest format into the file channel */ - void write(final FileChannel channel, boolean fsync) throws IOException { + void write(final OutputStream outputStream) throws IOException { // This output is intentionally not closed because closing it will close the FileChannel. @SuppressWarnings({ "IOResourceOpenedButNotSafelyClosed", "resource" }) - final BufferedChecksumStreamOutput out = new BufferedChecksumStreamOutput( - new OutputStreamStreamOutput(java.nio.channels.Channels.newOutputStream(channel)) - ); + final BufferedChecksumStreamOutput out = new BufferedChecksumStreamOutput(new OutputStreamStreamOutput(outputStream)); CodecUtil.writeHeader(new OutputStreamDataOutput(out), TRANSLOG_CODEC, CURRENT_VERSION); // Write uuid final BytesRef uuid = new BytesRef(translogUUID); @@ -229,6 +228,11 @@ void write(final FileChannel channel, boolean fsync) throws IOException { // Checksum header out.writeInt((int) out.getChecksum()); out.flush(); + } + + void write(final FileChannel channel, boolean fsync) throws IOException { + OutputStream outputStream = java.nio.channels.Channels.newOutputStream(channel); + write(outputStream); if (fsync == true) { channel.force(true); } diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogReader.java b/server/src/main/java/org/opensearch/index/translog/TranslogReader.java index c4a4fb7a460a0..9ea3328587645 100644 --- a/server/src/main/java/org/opensearch/index/translog/TranslogReader.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogReader.java @@ -33,6 +33,7 @@ package org.opensearch.index.translog; import org.apache.lucene.store.AlreadyClosedException; +import org.opensearch.common.Nullable; import org.opensearch.common.io.Channels; import org.opensearch.common.util.io.IOUtils; import org.opensearch.index.seqno.SequenceNumbers; @@ -59,6 +60,11 @@ public class TranslogReader extends BaseTranslogReader implements Closeable { private final Checkpoint checkpoint; protected final AtomicBoolean closed = new AtomicBoolean(false); + @Nullable + private final Long translogChecksum; + @Nullable + private final Long checkpointChecksum; + /** * Create a translog writer against the specified translog file channel. * @@ -67,11 +73,34 @@ public class TranslogReader extends BaseTranslogReader implements Closeable { * @param path the path to the translog * @param header the header of the translog file */ - TranslogReader(final Checkpoint checkpoint, final FileChannel channel, final Path path, final TranslogHeader header) { + TranslogReader( + final Checkpoint checkpoint, + final FileChannel channel, + final Path path, + final TranslogHeader header, + final Long translogChecksum + ) throws IOException { super(checkpoint.generation, channel, path, header); this.length = checkpoint.offset; this.totalOperations = checkpoint.numOps; this.checkpoint = checkpoint; + this.translogChecksum = translogChecksum; + this.checkpointChecksum = (translogChecksum != null) ? calculateCheckpointChecksum(checkpoint, path) : null; + } + + private static Long calculateCheckpointChecksum(Checkpoint checkpoint, Path path) throws IOException { + TranslogCheckedContainer checkpointCheckedContainer = new TranslogCheckedContainer( + Checkpoint.createCheckpointBytes(path.getParent().resolve(Translog.CHECKPOINT_FILE_NAME), checkpoint) + ); + return checkpointCheckedContainer.getChecksum(); + } + + public Long getTranslogChecksum() { + return translogChecksum; + } + + public Long getCheckpointChecksum() { + return checkpointChecksum; } /** @@ -87,7 +116,7 @@ public class TranslogReader extends BaseTranslogReader implements Closeable { public static TranslogReader open(final FileChannel channel, final Path path, final Checkpoint checkpoint, final String translogUUID) throws IOException { final TranslogHeader header = TranslogHeader.read(translogUUID, path, channel); - return new TranslogReader(checkpoint, channel, path, header); + return new TranslogReader(checkpoint, channel, path, header, null); } /** @@ -115,9 +144,9 @@ TranslogReader closeIntoTrimmedReader(long aboveSeqNo, ChannelFactory channelFac IOUtils.fsync(checkpointFile.getParent(), true); - newReader = new TranslogReader(newCheckpoint, channel, path, header); + newReader = new TranslogReader(newCheckpoint, channel, path, header, translogChecksum); } else { - newReader = new TranslogReader(checkpoint, channel, path, header); + newReader = new TranslogReader(checkpoint, channel, path, header, translogChecksum); } toCloseOnFailure = null; return newReader; diff --git a/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java b/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java index e19aece60adc0..e7b08b1dda3d2 100644 --- a/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java +++ b/server/src/main/java/org/opensearch/index/translog/TranslogWriter.java @@ -37,6 +37,7 @@ import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; +import org.opensearch.common.Nullable; import org.opensearch.common.SuppressForbidden; import org.opensearch.common.bytes.BytesArray; import org.opensearch.common.bytes.BytesReference; @@ -54,6 +55,7 @@ import org.opensearch.index.seqno.SequenceNumbers; import org.opensearch.index.shard.ShardId; +import java.io.ByteArrayOutputStream; import java.io.Closeable; import java.io.IOException; import java.nio.ByteBuffer; @@ -110,6 +112,9 @@ public class TranslogWriter extends BaseTranslogReader implements Closeable { private final Map> seenSequenceNumbers; + @Nullable + private final TranslogCheckedContainer translogCheckedContainer; + private final Boolean remoteTranslogEnabled; private TranslogWriter( @@ -126,6 +131,7 @@ private TranslogWriter( final TragicExceptionHolder tragedy, final LongConsumer persistedSequenceNumberConsumer, final BigArrays bigArrays, + TranslogCheckedContainer translogCheckedContainer, Boolean remoteTranslogEnabled ) throws IOException { super(initialCheckpoint.generation, channel, path, header); @@ -151,6 +157,7 @@ private TranslogWriter( this.bigArrays = bigArrays; this.seenSequenceNumbers = Assertions.ENABLED ? new HashMap<>() : null; this.tragedy = tragedy; + this.translogCheckedContainer = translogCheckedContainer; this.remoteTranslogEnabled = remoteTranslogEnabled; } @@ -179,6 +186,12 @@ public static TranslogWriter create( checkpointChannel = channelFactory.open(checkpointFile, StandardOpenOption.WRITE); final TranslogHeader header = new TranslogHeader(translogUUID, primaryTerm); header.write(channel, !Boolean.TRUE.equals(remoteTranslogEnabled)); + TranslogCheckedContainer translogCheckedContainer = null; + if (Boolean.TRUE.equals(remoteTranslogEnabled)) { + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + header.write(byteArrayOutputStream); + translogCheckedContainer = new TranslogCheckedContainer(byteArrayOutputStream.toByteArray()); + } final Checkpoint checkpoint = Checkpoint.emptyTranslogCheckpoint( header.sizeInBytes(), fileGeneration, @@ -214,6 +227,7 @@ public static TranslogWriter create( tragedy, persistedSequenceNumberConsumer, bigArrays, + translogCheckedContainer, remoteTranslogEnabled ); } catch (Exception exception) { @@ -438,7 +452,13 @@ public TranslogReader closeIntoReader() throws IOException { closeWithTragicEvent(ex); throw ex; } - return new TranslogReader(getLastSyncedCheckpoint(), channel, path, header); + return new TranslogReader( + getLastSyncedCheckpoint(), + channel, + path, + header, + (translogCheckedContainer != null) ? translogCheckedContainer.getChecksum() : null + ); } else { throw new AlreadyClosedException( "translog [" + getGeneration() + "] is already closed (path [" + path + "]", @@ -571,6 +591,9 @@ private void writeAndReleaseOps(ReleasableBytesReference toWrite) throws IOExcep while (currentBytesConsumed != current.length) { int nBytesToWrite = Math.min(current.length - currentBytesConsumed, ioBuffer.remaining()); ioBuffer.put(current.bytes, current.offset + currentBytesConsumed, nBytesToWrite); + if (translogCheckedContainer != null) { + translogCheckedContainer.updateFromBytes(current.bytes, current.offset + currentBytesConsumed, nBytesToWrite); + } currentBytesConsumed += nBytesToWrite; if (ioBuffer.hasRemaining() == false) { ioBuffer.flip(); diff --git a/server/src/main/java/org/opensearch/index/translog/checked/package-info.java b/server/src/main/java/org/opensearch/index/translog/checked/package-info.java deleted file mode 100644 index ddb235fdbedce..0000000000000 --- a/server/src/main/java/org/opensearch/index/translog/checked/package-info.java +++ /dev/null @@ -1,10 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -/** Contains checksum related utilities for translog files */ -package org.opensearch.index.translog.checked; diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/BlobStoreTransferService.java b/server/src/main/java/org/opensearch/index/translog/transfer/BlobStoreTransferService.java index d9feb1a832681..974e8af42b939 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/BlobStoreTransferService.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/BlobStoreTransferService.java @@ -16,12 +16,22 @@ import org.opensearch.common.blobstore.BlobMetadata; import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.blobstore.BlobStore; +import org.opensearch.common.blobstore.VerifyingMultiStreamBlobContainer; +import org.opensearch.common.blobstore.stream.write.WriteContext; +import org.opensearch.common.blobstore.stream.write.WritePriority; +import org.opensearch.common.blobstore.transfer.RemoteTransferContainer; +import org.opensearch.common.blobstore.transfer.stream.OffsetRangeFileInputStream; +import org.opensearch.index.translog.ChannelFactory; import org.opensearch.index.translog.transfer.FileSnapshot.TransferFileSnapshot; import org.opensearch.threadpool.ThreadPool; import java.io.IOException; import java.io.InputStream; +import java.nio.channels.FileChannel; +import java.nio.file.StandardOpenOption; import java.util.List; +import java.util.Map; +import java.util.Objects; import java.util.Set; import static org.opensearch.common.blobstore.BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC; @@ -44,18 +54,18 @@ public BlobStoreTransferService(BlobStore blobStore, ThreadPool threadPool) { } @Override - public void uploadBlobAsync( - String threadpoolName, + public void uploadBlob( + String threadPoolName, final TransferFileSnapshot fileSnapshot, Iterable remoteTransferPath, - ActionListener listener + ActionListener listener, + WritePriority writePriority ) { assert remoteTransferPath instanceof BlobPath; BlobPath blobPath = (BlobPath) remoteTransferPath; - threadPool.executor(threadpoolName).execute(ActionRunnable.wrap(listener, l -> { - try (InputStream inputStream = fileSnapshot.inputStream()) { - blobStore.blobContainer(blobPath) - .writeBlobAtomic(fileSnapshot.getName(), inputStream, fileSnapshot.getContentLength(), true); + threadPool.executor(threadPoolName).execute(ActionRunnable.wrap(listener, l -> { + try { + uploadBlob(fileSnapshot, blobPath, writePriority); l.onResponse(fileSnapshot); } catch (Exception e) { logger.error(() -> new ParameterizedMessage("Failed to upload blob {}", fileSnapshot.getName()), e); @@ -65,14 +75,84 @@ public void uploadBlobAsync( } @Override - public void uploadBlob(final TransferFileSnapshot fileSnapshot, Iterable remoteTransferPath) throws IOException { - assert remoteTransferPath instanceof BlobPath; + public void uploadBlob(final TransferFileSnapshot fileSnapshot, Iterable remoteTransferPath, WritePriority writePriority) + throws IOException { BlobPath blobPath = (BlobPath) remoteTransferPath; try (InputStream inputStream = fileSnapshot.inputStream()) { blobStore.blobContainer(blobPath).writeBlobAtomic(fileSnapshot.getName(), inputStream, fileSnapshot.getContentLength(), true); } } + @Override + public void uploadBlobs( + Set fileSnapshots, + final Map blobPaths, + ActionListener listener, + WritePriority writePriority + ) { + fileSnapshots.forEach(fileSnapshot -> { + BlobPath blobPath = blobPaths.get(fileSnapshot.getPrimaryTerm()); + if (!(blobStore.blobContainer(blobPath) instanceof VerifyingMultiStreamBlobContainer)) { + uploadBlob(ThreadPool.Names.TRANSLOG_TRANSFER, fileSnapshot, blobPath, listener, writePriority); + } else { + uploadBlob(fileSnapshot, listener, blobPath, writePriority); + } + }); + + } + + private void uploadBlob( + TransferFileSnapshot fileSnapshot, + ActionListener listener, + BlobPath blobPath, + WritePriority writePriority + ) { + + try { + ChannelFactory channelFactory = FileChannel::open; + long contentLength; + try (FileChannel channel = channelFactory.open(fileSnapshot.getPath(), StandardOpenOption.READ)) { + contentLength = channel.size(); + } + RemoteTransferContainer remoteTransferContainer = new RemoteTransferContainer( + fileSnapshot.getName(), + fileSnapshot.getName(), + contentLength, + true, + writePriority, + (size, position) -> new OffsetRangeFileInputStream(fileSnapshot.getPath(), size, position), + Objects.requireNonNull(fileSnapshot.getChecksum()), + blobStore.blobContainer(blobPath) instanceof VerifyingMultiStreamBlobContainer + ); + ActionListener completionListener = ActionListener.wrap(resp -> listener.onResponse(fileSnapshot), ex -> { + logger.error(() -> new ParameterizedMessage("Failed to upload blob {}", fileSnapshot.getName()), ex); + listener.onFailure(new FileTransferException(fileSnapshot, ex)); + }); + + completionListener = ActionListener.runBefore(completionListener, () -> { + try { + remoteTransferContainer.close(); + } catch (Exception e) { + logger.warn("Error occurred while closing streams", e); + } + }); + + WriteContext writeContext = remoteTransferContainer.createWriteContext(); + ((VerifyingMultiStreamBlobContainer) blobStore.blobContainer(blobPath)).asyncBlobUpload(writeContext, completionListener); + + } catch (Exception e) { + logger.error(() -> new ParameterizedMessage("Failed to upload blob {}", fileSnapshot.getName()), e); + listener.onFailure(new FileTransferException(fileSnapshot, e)); + } finally { + try { + fileSnapshot.close(); + } catch (IOException e) { + logger.warn("Error while closing TransferFileSnapshot", e); + } + } + + } + @Override public InputStream downloadBlob(Iterable path, String fileName) throws IOException { return blobStore.blobContainer((BlobPath) path).readBlob(fileName); diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/FileSnapshot.java b/server/src/main/java/org/opensearch/index/translog/transfer/FileSnapshot.java index 239ef7c3c9300..dcec94edd694f 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/FileSnapshot.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/FileSnapshot.java @@ -107,10 +107,12 @@ public void close() throws IOException { public static class TransferFileSnapshot extends FileSnapshot { private final long primaryTerm; + private Long checksum; - public TransferFileSnapshot(Path path, long primaryTerm) throws IOException { + public TransferFileSnapshot(Path path, long primaryTerm, Long checksum) throws IOException { super(path); this.primaryTerm = primaryTerm; + this.checksum = checksum; } public TransferFileSnapshot(String name, byte[] content, long primaryTerm) throws IOException { @@ -118,6 +120,10 @@ public TransferFileSnapshot(String name, byte[] content, long primaryTerm) throw this.primaryTerm = primaryTerm; } + public Long getChecksum() { + return checksum; + } + public long getPrimaryTerm() { return primaryTerm; } @@ -148,8 +154,8 @@ public static final class TranslogFileSnapshot extends TransferFileSnapshot { private final long generation; - public TranslogFileSnapshot(long primaryTerm, long generation, Path path) throws IOException { - super(path, primaryTerm); + public TranslogFileSnapshot(long primaryTerm, long generation, Path path, Long checksum) throws IOException { + super(path, primaryTerm, checksum); this.generation = generation; } @@ -185,8 +191,9 @@ public static final class CheckpointFileSnapshot extends TransferFileSnapshot { private final long minTranslogGeneration; - public CheckpointFileSnapshot(long primaryTerm, long generation, long minTranslogGeneration, Path path) throws IOException { - super(path, primaryTerm); + public CheckpointFileSnapshot(long primaryTerm, long generation, long minTranslogGeneration, Path path, Long checksum) + throws IOException { + super(path, primaryTerm, checksum); this.minTranslogGeneration = minTranslogGeneration; this.generation = generation; } diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TransferService.java b/server/src/main/java/org/opensearch/index/translog/transfer/TransferService.java index 0e6496042e3d8..a240fd38cda11 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TransferService.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TransferService.java @@ -10,11 +10,14 @@ import org.opensearch.action.ActionListener; import org.opensearch.common.blobstore.BlobMetadata; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.index.translog.transfer.FileSnapshot.TransferFileSnapshot; import java.io.IOException; import java.io.InputStream; import java.util.List; +import java.util.Map; import java.util.Set; /** @@ -26,25 +29,40 @@ public interface TransferService { /** * Uploads the {@link TransferFileSnapshot} async, once the upload is complete the callback is invoked - * @param threadpoolName threadpool type which will be used to upload blobs asynchronously + * @param threadPoolName threadpool type which will be used to upload blobs asynchronously * @param fileSnapshot the file snapshot to upload * @param remotePath the remote path where upload should be made * @param listener the callback to be invoked once upload completes successfully/fails */ - void uploadBlobAsync( - String threadpoolName, + void uploadBlob( + String threadPoolName, final TransferFileSnapshot fileSnapshot, Iterable remotePath, - ActionListener listener + ActionListener listener, + WritePriority writePriority ); + /** + * Uploads multiple {@link TransferFileSnapshot}, once the upload is complete the callback is invoked + * @param fileSnapshots the file snapshots to upload + * @param blobPaths Primary term to {@link BlobPath} map + * @param listener the callback to be invoked once uploads complete successfully/fail + */ + void uploadBlobs( + Set fileSnapshots, + final Map blobPaths, + ActionListener listener, + WritePriority writePriority + ) throws Exception; + /** * Uploads the {@link TransferFileSnapshot} blob * @param fileSnapshot the file snapshot to upload * @param remotePath the remote path where upload should be made + * @param writePriority Priority by which content needs to be written. * @throws IOException the exception while transferring the data */ - void uploadBlob(final TransferFileSnapshot fileSnapshot, Iterable remotePath) throws IOException; + void uploadBlob(final TransferFileSnapshot fileSnapshot, Iterable remotePath, WritePriority writePriority) throws IOException; void deleteBlobs(Iterable path, List fileNames) throws IOException; diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogCheckpointTransferSnapshot.java b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogCheckpointTransferSnapshot.java index b34c2282e874f..10dec13c81e1a 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogCheckpointTransferSnapshot.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogCheckpointTransferSnapshot.java @@ -145,8 +145,14 @@ public TranslogCheckpointTransferSnapshot build() throws IOException { Path checkpointPath = location.resolve(checkpointGenFileNameMapper.apply(readerGeneration)); generations.add(readerGeneration); translogTransferSnapshot.add( - new TranslogFileSnapshot(readerPrimaryTerm, readerGeneration, translogPath), - new CheckpointFileSnapshot(readerPrimaryTerm, checkpointGeneration, minTranslogGeneration, checkpointPath) + new TranslogFileSnapshot(readerPrimaryTerm, readerGeneration, translogPath, reader.getTranslogChecksum()), + new CheckpointFileSnapshot( + readerPrimaryTerm, + checkpointGeneration, + minTranslogGeneration, + checkpointPath, + reader.getCheckpointChecksum() + ) ); if (readerGeneration > highestGeneration) { highestGeneration = readerGeneration; diff --git a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java index 54140226e3744..0c63a7ffe4cce 100644 --- a/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java +++ b/server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java @@ -17,6 +17,7 @@ import org.opensearch.common.SetOnce; import org.opensearch.common.blobstore.BlobMetadata; import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.common.bytes.BytesReference; import org.opensearch.common.io.VersionedCodecStreamWrapper; import org.opensearch.common.io.stream.BytesStreamOutput; @@ -119,14 +120,16 @@ public boolean transferSnapshot(TransferSnapshot transferSnapshot, TranslogTrans }), latch ); + Map blobPathMap = new HashMap<>(); toUpload.forEach( - fileSnapshot -> transferService.uploadBlobAsync( - ThreadPool.Names.TRANSLOG_TRANSFER, - fileSnapshot, - remoteDataTransferPath.add(String.valueOf(fileSnapshot.getPrimaryTerm())), - latchedActionListener + fileSnapshot -> blobPathMap.put( + fileSnapshot.getPrimaryTerm(), + remoteDataTransferPath.add(String.valueOf(fileSnapshot.getPrimaryTerm())) ) ); + + transferService.uploadBlobs(toUpload, blobPathMap, latchedActionListener, WritePriority.HIGH); + try { if (latch.await(TRANSFER_TIMEOUT_IN_MILLIS, TimeUnit.MILLISECONDS) == false) { Exception ex = new TimeoutException("Timed out waiting for transfer of snapshot " + transferSnapshot + " to complete"); @@ -139,7 +142,7 @@ public boolean transferSnapshot(TransferSnapshot transferSnapshot, TranslogTrans throw ex; } if (exceptionList.isEmpty()) { - transferService.uploadBlob(prepareMetadata(transferSnapshot), remoteMetadataTransferPath); + transferService.uploadBlob(prepareMetadata(transferSnapshot), remoteMetadataTransferPath, WritePriority.HIGH); translogTransferListener.onUploadComplete(transferSnapshot); return true; } else { diff --git a/server/src/main/java/org/opensearch/threadpool/ThreadPool.java b/server/src/main/java/org/opensearch/threadpool/ThreadPool.java index ebc68c288e25a..d9f73a9b41658 100644 --- a/server/src/main/java/org/opensearch/threadpool/ThreadPool.java +++ b/server/src/main/java/org/opensearch/threadpool/ThreadPool.java @@ -46,10 +46,10 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.util.concurrent.OpenSearchExecutors; -import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; import org.opensearch.common.util.concurrent.OpenSearchThreadPoolExecutor; import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.common.util.concurrent.XRejectedExecutionHandler; +import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; import org.opensearch.core.xcontent.ToXContentFragment; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.node.Node; diff --git a/server/src/test/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainerTests.java b/server/src/test/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainerTests.java index 1ebec46042247..48940a0d401fd 100644 --- a/server/src/test/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainerTests.java +++ b/server/src/test/java/org/opensearch/common/blobstore/transfer/RemoteTransferContainerTests.java @@ -9,6 +9,7 @@ package org.opensearch.common.blobstore.transfer; import org.junit.Before; +import org.opensearch.common.blobstore.stream.write.WriteContext; import org.opensearch.common.io.InputStreamContainer; import org.opensearch.common.StreamContext; import org.opensearch.common.blobstore.stream.write.WritePriority; @@ -21,6 +22,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; +import java.util.UUID; public class RemoteTransferContainerTests extends OpenSearchTestCase { @@ -140,6 +142,45 @@ public void testTypeOfProvidedStreamsAllCases() throws IOException { testTypeOfProvidedStreams(false); } + public void testCreateWriteContextAllCases() throws IOException { + testCreateWriteContext(true); + testCreateWriteContext(false); + } + + private void testCreateWriteContext(boolean doRemoteDataIntegrityCheck) throws IOException { + String remoteFileName = testFile.getFileName().toString() + UUID.randomUUID(); + Long expectedChecksum = randomLong(); + try ( + RemoteTransferContainer remoteTransferContainer = new RemoteTransferContainer( + testFile.getFileName().toString(), + remoteFileName, + TEST_FILE_SIZE_BYTES, + true, + WritePriority.HIGH, + new RemoteTransferContainer.OffsetRangeInputStreamSupplier() { + @Override + public OffsetRangeInputStream get(long size, long position) throws IOException { + return new OffsetRangeFileInputStream(testFile, size, position); + } + }, + expectedChecksum, + doRemoteDataIntegrityCheck + ) + ) { + WriteContext writeContext = remoteTransferContainer.createWriteContext(); + assertEquals(remoteFileName, writeContext.getFileName()); + assertTrue(writeContext.isFailIfAlreadyExists()); + assertEquals(TEST_FILE_SIZE_BYTES, writeContext.getFileSize()); + assertEquals(WritePriority.HIGH, writeContext.getWritePriority()); + assertEquals(doRemoteDataIntegrityCheck, writeContext.doRemoteDataIntegrityCheck()); + if (doRemoteDataIntegrityCheck) { + assertEquals(expectedChecksum, writeContext.getExpectedChecksum()); + } else { + assertNull(writeContext.getExpectedChecksum()); + } + } + } + private void testTypeOfProvidedStreams(boolean isRemoteDataIntegritySupported) throws IOException { try ( RemoteTransferContainer remoteTransferContainer = new RemoteTransferContainer( diff --git a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java index c37893877253e..ea092fffa3a9a 100644 --- a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java +++ b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java @@ -23,8 +23,12 @@ import org.apache.lucene.tests.util.LuceneTestCase; import org.junit.After; import org.junit.Before; +import org.mockito.Mockito; +import org.opensearch.action.ActionListener; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.UUIDs; +import org.opensearch.common.blobstore.VerifyingMultiStreamBlobContainer; +import org.opensearch.common.blobstore.stream.write.WriteContext; import org.opensearch.common.bytes.BytesReference; import org.opensearch.common.io.VersionedCodecStreamWrapper; import org.opensearch.common.io.stream.BytesStreamOutput; @@ -48,6 +52,10 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.HashMap; +import java.util.Collection; import java.util.concurrent.ExecutorService; import static org.mockito.Mockito.mock; @@ -68,6 +76,7 @@ public class RemoteSegmentStoreDirectoryTests extends IndexShardTestCase { private RemoteStoreMetadataLockManager mdLockManager; private RemoteSegmentStoreDirectory remoteSegmentStoreDirectory; + private TestUploadListener testUploadTracker; private IndexShard indexShard; private SegmentInfos segmentInfos; private ThreadPool threadPool; @@ -89,6 +98,7 @@ public void setup() throws IOException { mdLockManager, threadPool ); + testUploadTracker = new TestUploadListener(); Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, org.opensearch.Version.CURRENT).build(); ExecutorService executorService = OpenSearchExecutors.newDirectExecutorService(); @@ -503,6 +513,82 @@ public void testCopyFrom() throws IOException { storeDirectory.close(); } + public void testCopyFilesFromMultipart() throws Exception { + String filename = "_100.si"; + populateMetadata(); + remoteSegmentStoreDirectory.init(); + + Directory storeDirectory = LuceneTestCase.newDirectory(); + IndexOutput indexOutput = storeDirectory.createOutput(filename, IOContext.DEFAULT); + indexOutput.writeString("Hello World!"); + CodecUtil.writeFooter(indexOutput); + indexOutput.close(); + storeDirectory.sync(List.of(filename)); + + assertFalse(remoteSegmentStoreDirectory.getSegmentsUploadedToRemoteStore().containsKey(filename)); + + VerifyingMultiStreamBlobContainer blobContainer = mock(VerifyingMultiStreamBlobContainer.class); + when(remoteDataDirectory.getBlobContainer()).thenReturn(blobContainer); + Mockito.doAnswer(invocation -> { + ActionListener completionListener = invocation.getArgument(1); + completionListener.onResponse(null); + return null; + }).when(blobContainer).asyncBlobUpload(any(WriteContext.class), any()); + + CountDownLatch latch = new CountDownLatch(1); + ActionListener completionListener = new ActionListener() { + @Override + public void onResponse(Void unused) { + latch.countDown(); + } + + @Override + public void onFailure(Exception e) {} + }; + remoteSegmentStoreDirectory.copyFrom(storeDirectory, filename, IOContext.DEFAULT, completionListener); + assertTrue(latch.await(5000, TimeUnit.SECONDS)); + assertTrue(remoteSegmentStoreDirectory.getSegmentsUploadedToRemoteStore().containsKey(filename)); + storeDirectory.close(); + } + + public void testCopyFilesFromMultipartIOException() throws Exception { + String filename = "_100.si"; + populateMetadata(); + remoteSegmentStoreDirectory.init(); + + Directory storeDirectory = LuceneTestCase.newDirectory(); + IndexOutput indexOutput = storeDirectory.createOutput(filename, IOContext.DEFAULT); + indexOutput.writeString("Hello World!"); + CodecUtil.writeFooter(indexOutput); + indexOutput.close(); + storeDirectory.sync(List.of(filename)); + + assertFalse(remoteSegmentStoreDirectory.getSegmentsUploadedToRemoteStore().containsKey(filename)); + + VerifyingMultiStreamBlobContainer blobContainer = mock(VerifyingMultiStreamBlobContainer.class); + when(remoteDataDirectory.getBlobContainer()).thenReturn(blobContainer); + Mockito.doAnswer(invocation -> { + ActionListener completionListener = invocation.getArgument(1); + completionListener.onFailure(new Exception("Test exception")); + return null; + }).when(blobContainer).asyncBlobUpload(any(WriteContext.class), any()); + CountDownLatch latch = new CountDownLatch(1); + ActionListener completionListener = new ActionListener<>() { + @Override + public void onResponse(Void unused) {} + + @Override + public void onFailure(Exception e) { + latch.countDown(); + } + }; + remoteSegmentStoreDirectory.copyFrom(storeDirectory, filename, IOContext.DEFAULT, completionListener); + assertTrue(latch.await(5000, TimeUnit.SECONDS)); + assertFalse(remoteSegmentStoreDirectory.getSegmentsUploadedToRemoteStore().containsKey(filename)); + + storeDirectory.close(); + } + public void testCopyFromException() throws IOException { String filename = "_100.si"; Directory storeDirectory = LuceneTestCase.newDirectory(); diff --git a/server/src/test/java/org/opensearch/index/store/TestUploadListener.java b/server/src/test/java/org/opensearch/index/store/TestUploadListener.java new file mode 100644 index 0000000000000..a2a61a93371e8 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/store/TestUploadListener.java @@ -0,0 +1,43 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.store; + +import org.opensearch.common.util.UploadListener; + +import java.util.concurrent.ConcurrentHashMap; + +public class TestUploadListener implements UploadListener { + + private final ConcurrentHashMap uploadStatusMap = new ConcurrentHashMap<>(); + + enum UploadStatus { + BEFORE_UPLOAD, + UPLOAD_SUCCESS, + UPLOAD_FAILURE + } + + @Override + public void beforeUpload(String file) { + uploadStatusMap.put(file, UploadStatus.BEFORE_UPLOAD); + } + + @Override + public void onSuccess(String file) { + uploadStatusMap.put(file, UploadStatus.UPLOAD_SUCCESS); + } + + @Override + public void onFailure(String file) { + uploadStatusMap.put(file, UploadStatus.UPLOAD_FAILURE); + } + + public UploadStatus getUploadStatus(String file) { + return uploadStatusMap.get(file); + } +} diff --git a/server/src/test/java/org/opensearch/index/translog/transfer/BlobStoreTransferServiceMockRepositoryTests.java b/server/src/test/java/org/opensearch/index/translog/transfer/BlobStoreTransferServiceMockRepositoryTests.java new file mode 100644 index 0000000000000..1175716679d0f --- /dev/null +++ b/server/src/test/java/org/opensearch/index/translog/transfer/BlobStoreTransferServiceMockRepositoryTests.java @@ -0,0 +1,189 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.translog.transfer; + +import org.mockito.Mockito; +import org.opensearch.action.ActionListener; +import org.opensearch.action.LatchedActionListener; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.BlobStore; +import org.opensearch.common.blobstore.VerifyingMultiStreamBlobContainer; +import org.opensearch.common.blobstore.stream.write.WriteContext; +import org.opensearch.common.blobstore.stream.write.WritePriority; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.Collections; +import java.util.HashMap; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class BlobStoreTransferServiceMockRepositoryTests extends OpenSearchTestCase { + + private ThreadPool threadPool; + + private BlobStore blobStore; + + @Override + public void setUp() throws Exception { + super.setUp(); + blobStore = mock(BlobStore.class); + threadPool = new TestThreadPool(getClass().getName()); + } + + public void testUploadBlobs() throws Exception { + Path testFile = createTempFile(); + Files.write(testFile, randomByteArrayOfLength(128), StandardOpenOption.APPEND); + FileSnapshot.TransferFileSnapshot transferFileSnapshot = new FileSnapshot.TransferFileSnapshot( + testFile, + randomNonNegativeLong(), + 0L + ); + + VerifyingMultiStreamBlobContainer blobContainer = mock(VerifyingMultiStreamBlobContainer.class); + Mockito.doAnswer(invocation -> { + ActionListener completionListener = invocation.getArgument(1); + completionListener.onResponse(null); + return null; + }).when(blobContainer).asyncBlobUpload(any(WriteContext.class), any()); + when(blobStore.blobContainer(any(BlobPath.class))).thenReturn(blobContainer); + + TransferService transferService = new BlobStoreTransferService(blobStore, threadPool); + CountDownLatch latch = new CountDownLatch(1); + AtomicBoolean onResponseCalled = new AtomicBoolean(false); + AtomicReference exceptionRef = new AtomicReference<>(); + AtomicReference fileSnapshotRef = new AtomicReference<>(); + transferService.uploadBlobs(Collections.singleton(transferFileSnapshot), new HashMap<>() { + { + put(transferFileSnapshot.getPrimaryTerm(), new BlobPath().add("sample_path")); + } + }, new LatchedActionListener<>(new ActionListener<>() { + @Override + public void onResponse(FileSnapshot.TransferFileSnapshot fileSnapshot) { + onResponseCalled.set(true); + fileSnapshotRef.set(fileSnapshot); + } + + @Override + public void onFailure(Exception e) { + exceptionRef.set(e); + } + }, latch), WritePriority.HIGH); + + assertTrue(latch.await(1000, TimeUnit.MILLISECONDS)); + verify(blobContainer).asyncBlobUpload(any(WriteContext.class), any()); + assertTrue(onResponseCalled.get()); + assertEquals(transferFileSnapshot.getPrimaryTerm(), fileSnapshotRef.get().getPrimaryTerm()); + assertEquals(transferFileSnapshot.getName(), fileSnapshotRef.get().getName()); + assertNull(exceptionRef.get()); + } + + public void testUploadBlobsIOException() throws Exception { + Path testFile = createTempFile(); + Files.write(testFile, randomByteArrayOfLength(128), StandardOpenOption.APPEND); + FileSnapshot.TransferFileSnapshot transferFileSnapshot = new FileSnapshot.TransferFileSnapshot( + testFile, + randomNonNegativeLong(), + 0L + ); + + VerifyingMultiStreamBlobContainer blobContainer = mock(VerifyingMultiStreamBlobContainer.class); + doThrow(new IOException()).when(blobContainer).asyncBlobUpload(any(WriteContext.class), any()); + when(blobStore.blobContainer(any(BlobPath.class))).thenReturn(blobContainer); + + TransferService transferService = new BlobStoreTransferService(blobStore, threadPool); + CountDownLatch latch = new CountDownLatch(1); + AtomicBoolean onResponseCalled = new AtomicBoolean(false); + AtomicReference exceptionRef = new AtomicReference<>(); + transferService.uploadBlobs(Collections.singleton(transferFileSnapshot), new HashMap<>() { + { + put(transferFileSnapshot.getPrimaryTerm(), new BlobPath().add("sample_path")); + } + }, new LatchedActionListener<>(new ActionListener<>() { + @Override + public void onResponse(FileSnapshot.TransferFileSnapshot fileSnapshot) { + onResponseCalled.set(true); + } + + @Override + public void onFailure(Exception e) { + exceptionRef.set(e); + } + }, latch), WritePriority.HIGH); + + assertTrue(latch.await(1000, TimeUnit.MILLISECONDS)); + verify(blobContainer).asyncBlobUpload(any(WriteContext.class), any()); + assertFalse(onResponseCalled.get()); + assertTrue(exceptionRef.get() instanceof FileTransferException); + } + + public void testUploadBlobsUploadFutureCompletedExceptionally() throws Exception { + Path testFile = createTempFile(); + Files.write(testFile, randomByteArrayOfLength(128), StandardOpenOption.APPEND); + FileSnapshot.TransferFileSnapshot transferFileSnapshot = new FileSnapshot.TransferFileSnapshot( + testFile, + randomNonNegativeLong(), + 0L + ); + + VerifyingMultiStreamBlobContainer blobContainer = mock(VerifyingMultiStreamBlobContainer.class); + Mockito.doAnswer(invocation -> { + ActionListener completionListener = invocation.getArgument(1); + completionListener.onFailure(new Exception("Test exception")); + return null; + }).when(blobContainer).asyncBlobUpload(any(WriteContext.class), any()); + + when(blobStore.blobContainer(any(BlobPath.class))).thenReturn(blobContainer); + + TransferService transferService = new BlobStoreTransferService(blobStore, threadPool); + CountDownLatch latch = new CountDownLatch(1); + AtomicBoolean onResponseCalled = new AtomicBoolean(false); + AtomicReference exceptionRef = new AtomicReference<>(); + LatchedActionListener listener = new LatchedActionListener<>(new ActionListener<>() { + @Override + public void onResponse(FileSnapshot.TransferFileSnapshot fileSnapshot) { + onResponseCalled.set(true); + } + + @Override + public void onFailure(Exception e) { + exceptionRef.set(e); + } + }, latch); + transferService.uploadBlobs(Collections.singleton(transferFileSnapshot), new HashMap<>() { + { + put(transferFileSnapshot.getPrimaryTerm(), new BlobPath().add("sample_path")); + } + }, listener, WritePriority.HIGH); + + assertTrue(latch.await(1000, TimeUnit.MILLISECONDS)); + verify(blobContainer).asyncBlobUpload(any(WriteContext.class), any()); + assertFalse(onResponseCalled.get()); + assertTrue(exceptionRef.get() instanceof FileTransferException); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + ThreadPool.terminate(threadPool, 10, TimeUnit.SECONDS); + } +} diff --git a/server/src/test/java/org/opensearch/index/translog/transfer/BlobStoreTransferServiceTests.java b/server/src/test/java/org/opensearch/index/translog/transfer/BlobStoreTransferServiceTests.java index 196fbd58c2c20..5502dc3089c62 100644 --- a/server/src/test/java/org/opensearch/index/translog/transfer/BlobStoreTransferServiceTests.java +++ b/server/src/test/java/org/opensearch/index/translog/transfer/BlobStoreTransferServiceTests.java @@ -12,6 +12,7 @@ import org.opensearch.action.LatchedActionListener; import org.opensearch.cluster.metadata.RepositoryMetadata; import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.env.Environment; @@ -49,9 +50,13 @@ public void setUp() throws Exception { public void testUploadBlob() throws IOException { Path testFile = createTempFile(); Files.write(testFile, randomByteArrayOfLength(128), StandardOpenOption.APPEND); - FileSnapshot.TransferFileSnapshot transferFileSnapshot = new FileSnapshot.TransferFileSnapshot(testFile, randomNonNegativeLong()); + FileSnapshot.TransferFileSnapshot transferFileSnapshot = new FileSnapshot.TransferFileSnapshot( + testFile, + randomNonNegativeLong(), + null + ); TransferService transferService = new BlobStoreTransferService(repository.blobStore(), threadPool); - transferService.uploadBlob(transferFileSnapshot, repository.basePath()); + transferService.uploadBlob(transferFileSnapshot, repository.basePath(), WritePriority.HIGH); } public void testUploadBlobFromByteArray() throws IOException { @@ -61,17 +66,21 @@ public void testUploadBlobFromByteArray() throws IOException { 1 ); TransferService transferService = new BlobStoreTransferService(repository.blobStore(), threadPool); - transferService.uploadBlob(transferFileSnapshot, repository.basePath()); + transferService.uploadBlob(transferFileSnapshot, repository.basePath(), WritePriority.NORMAL); } public void testUploadBlobAsync() throws IOException, InterruptedException { Path testFile = createTempFile(); Files.write(testFile, randomByteArrayOfLength(128), StandardOpenOption.APPEND); AtomicBoolean succeeded = new AtomicBoolean(false); - FileSnapshot.TransferFileSnapshot transferFileSnapshot = new FileSnapshot.TransferFileSnapshot(testFile, randomNonNegativeLong()); + FileSnapshot.TransferFileSnapshot transferFileSnapshot = new FileSnapshot.TransferFileSnapshot( + testFile, + randomNonNegativeLong(), + null + ); CountDownLatch latch = new CountDownLatch(1); TransferService transferService = new BlobStoreTransferService(repository.blobStore(), threadPool); - transferService.uploadBlobAsync( + transferService.uploadBlob( ThreadPool.Names.TRANSLOG_TRANSFER, transferFileSnapshot, repository.basePath(), @@ -87,7 +96,8 @@ public void onResponse(FileSnapshot.TransferFileSnapshot fileSnapshot) { public void onFailure(Exception e) { throw new AssertionError("Failed to perform uploadBlobAsync", e); } - }, latch) + }, latch), + WritePriority.HIGH ); assertTrue(latch.await(1000, TimeUnit.MILLISECONDS)); assertTrue(succeeded.get()); diff --git a/server/src/test/java/org/opensearch/index/translog/transfer/FileSnapshotTests.java b/server/src/test/java/org/opensearch/index/translog/transfer/FileSnapshotTests.java index 6d2fb3794b107..8d07af5927135 100644 --- a/server/src/test/java/org/opensearch/index/translog/transfer/FileSnapshotTests.java +++ b/server/src/test/java/org/opensearch/index/translog/transfer/FileSnapshotTests.java @@ -28,15 +28,15 @@ public void tearDown() throws Exception { public void testFileSnapshotPath() throws IOException { Path file = createTempFile(); Files.writeString(file, "hello"); - fileSnapshot = new FileSnapshot.TransferFileSnapshot(file, 12); + fileSnapshot = new FileSnapshot.TransferFileSnapshot(file, 12, null); assertFileSnapshotProperties(file); - try (FileSnapshot sameFileSnapshot = new FileSnapshot.TransferFileSnapshot(file, 12)) { + try (FileSnapshot sameFileSnapshot = new FileSnapshot.TransferFileSnapshot(file, 12, null)) { assertEquals(sameFileSnapshot, fileSnapshot); } - try (FileSnapshot sameFileDiffPTSnapshot = new FileSnapshot.TransferFileSnapshot(file, 34)) { + try (FileSnapshot sameFileDiffPTSnapshot = new FileSnapshot.TransferFileSnapshot(file, 34, null)) { assertNotEquals(sameFileDiffPTSnapshot, fileSnapshot); } } diff --git a/server/src/test/java/org/opensearch/index/translog/transfer/FileTransferTrackerTests.java b/server/src/test/java/org/opensearch/index/translog/transfer/FileTransferTrackerTests.java index be14e4a7bd380..fd0d44564ef6b 100644 --- a/server/src/test/java/org/opensearch/index/translog/transfer/FileTransferTrackerTests.java +++ b/server/src/test/java/org/opensearch/index/translog/transfer/FileTransferTrackerTests.java @@ -34,7 +34,8 @@ public void testOnSuccess() throws IOException { try ( FileSnapshot.TransferFileSnapshot transferFileSnapshot = new FileSnapshot.TransferFileSnapshot( testFile, - randomNonNegativeLong() + randomNonNegativeLong(), + null ) ) { fileTransferTracker.onSuccess(transferFileSnapshot); @@ -58,11 +59,13 @@ public void testOnFailure() throws IOException { try ( FileSnapshot.TransferFileSnapshot transferFileSnapshot = new FileSnapshot.TransferFileSnapshot( testFile, - randomNonNegativeLong() + randomNonNegativeLong(), + null ); FileSnapshot.TransferFileSnapshot transferFileSnapshot2 = new FileSnapshot.TransferFileSnapshot( testFile2, - randomNonNegativeLong() + randomNonNegativeLong(), + null ) ) { @@ -82,7 +85,8 @@ public void testUploaded() throws IOException { try ( FileSnapshot.TransferFileSnapshot transferFileSnapshot = new FileSnapshot.TransferFileSnapshot( testFile, - randomNonNegativeLong() + randomNonNegativeLong(), + null ); ) { diff --git a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java index 5f8aa64457896..66cd257299e25 100644 --- a/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java +++ b/server/src/test/java/org/opensearch/index/translog/transfer/TranslogTransferManagerTests.java @@ -17,6 +17,7 @@ import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.blobstore.BlobStore; import org.opensearch.common.blobstore.support.PlainBlobMetadata; +import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.index.Index; import org.opensearch.index.shard.ShardId; import org.opensearch.index.translog.Translog; @@ -41,6 +42,8 @@ import java.util.concurrent.atomic.AtomicInteger; import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyMap; +import static org.mockito.ArgumentMatchers.anySet; import static org.mockito.Mockito.any; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doNothing; @@ -80,20 +83,24 @@ public void tearDown() throws Exception { } @SuppressWarnings("unchecked") - public void testTransferSnapshot() throws IOException { + public void testTransferSnapshot() throws Exception { AtomicInteger fileTransferSucceeded = new AtomicInteger(); AtomicInteger fileTransferFailed = new AtomicInteger(); AtomicInteger translogTransferSucceeded = new AtomicInteger(); AtomicInteger translogTransferFailed = new AtomicInteger(); doNothing().when(transferService) - .uploadBlob(any(TransferFileSnapshot.class), Mockito.eq(remoteBaseTransferPath.add(String.valueOf(primaryTerm)))); + .uploadBlob( + any(TransferFileSnapshot.class), + Mockito.eq(remoteBaseTransferPath.add(String.valueOf(primaryTerm))), + any(WritePriority.class) + ); doAnswer(invocationOnMock -> { - ActionListener listener = (ActionListener) invocationOnMock.getArguments()[3]; - listener.onResponse((TransferFileSnapshot) invocationOnMock.getArguments()[1]); + ActionListener listener = (ActionListener) invocationOnMock.getArguments()[2]; + Set transferFileSnapshots = (Set) invocationOnMock.getArguments()[0]; + transferFileSnapshots.forEach(listener::onResponse); return null; - }).when(transferService) - .uploadBlobAsync(any(String.class), any(TransferFileSnapshot.class), any(BlobPath.class), any(ActionListener.class)); + }).when(transferService).uploadBlobs(anySet(), anyMap(), any(ActionListener.class), any(WritePriority.class)); FileTransferTracker fileTransferTracker = new FileTransferTracker(new ShardId("index", "indexUUid", 0)) { @Override @@ -145,13 +152,15 @@ public Set getCheckpointFileSnapshots() { primaryTerm, generation, minTranslogGeneration, - createTempFile(Translog.TRANSLOG_FILE_PREFIX + generation, Translog.CHECKPOINT_SUFFIX) + createTempFile(Translog.TRANSLOG_FILE_PREFIX + generation, Translog.CHECKPOINT_SUFFIX), + null ), new CheckpointFileSnapshot( primaryTerm, generation, minTranslogGeneration, - createTempFile(Translog.TRANSLOG_FILE_PREFIX + (generation - 1), Translog.CHECKPOINT_SUFFIX) + createTempFile(Translog.TRANSLOG_FILE_PREFIX + (generation - 1), Translog.CHECKPOINT_SUFFIX), + null ) ); } catch (IOException e) { @@ -166,12 +175,14 @@ public Set getTranslogFileSnapshots() { new TranslogFileSnapshot( primaryTerm, generation, - createTempFile(Translog.TRANSLOG_FILE_PREFIX + generation, Translog.TRANSLOG_FILE_SUFFIX) + createTempFile(Translog.TRANSLOG_FILE_PREFIX + generation, Translog.TRANSLOG_FILE_SUFFIX), + null ), new TranslogFileSnapshot( primaryTerm, generation - 1, - createTempFile(Translog.TRANSLOG_FILE_PREFIX + (generation - 1), Translog.TRANSLOG_FILE_SUFFIX) + createTempFile(Translog.TRANSLOG_FILE_PREFIX + (generation - 1), Translog.TRANSLOG_FILE_SUFFIX), + null ) ); } catch (IOException e) {