diff --git a/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java b/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java index 987a3e3ffa7d3..7fa63ae8abc62 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java @@ -39,6 +39,7 @@ import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.util.io.IOUtils; import java.io.Closeable; @@ -52,6 +53,7 @@ import java.util.Set; import static org.opensearch.cluster.coordination.Coordinator.ZEN1_BWC_TERM; +import static org.opensearch.common.util.FeatureFlags.REMOTE_PUBLICATION_EXPERIMENTAL; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteStoreClusterStateEnabled; /** @@ -79,6 +81,7 @@ public class CoordinationState { private VotingConfiguration lastPublishedConfiguration; private VoteCollection publishVotes; private final boolean isRemoteStateEnabled; + private final boolean isRemotePublicationEnabled; public CoordinationState( DiscoveryNode localNode, @@ -102,6 +105,12 @@ public CoordinationState( .getLastAcceptedConfiguration(); this.publishVotes = new VoteCollection(); this.isRemoteStateEnabled = isRemoteStoreClusterStateEnabled(settings); + this.isRemotePublicationEnabled = FeatureFlags.isEnabled(REMOTE_PUBLICATION_EXPERIMENTAL) + && localNode.isRemoteStatePublicationEnabled(); + } + + public boolean isRemotePublicationEnabled() { + return isRemotePublicationEnabled; } public long getCurrentTerm() { diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index 4d371cba45e03..bbfa265d50bcf 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -86,6 +86,7 @@ import org.opensearch.discovery.PeerFinder; import org.opensearch.discovery.SeedHostsProvider; import org.opensearch.discovery.SeedHostsResolver; +import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.monitor.NodeHealthService; import org.opensearch.monitor.StatusInfo; import org.opensearch.node.remotestore.RemoteStoreNodeService; @@ -210,7 +211,8 @@ public Coordinator( NodeHealthService nodeHealthService, PersistedStateRegistry persistedStateRegistry, RemoteStoreNodeService remoteStoreNodeService, - ClusterManagerMetrics clusterManagerMetrics + ClusterManagerMetrics clusterManagerMetrics, + RemoteClusterStateService remoteClusterStateService ) { this.settings = settings; this.transportService = transportService; @@ -262,7 +264,8 @@ public Coordinator( transportService, namedWriteableRegistry, this::handlePublishRequest, - this::handleApplyCommit + this::handleApplyCommit, + remoteClusterStateService ); this.leaderChecker = new LeaderChecker( settings, @@ -1331,7 +1334,9 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) + clusterState; final PublicationTransportHandler.PublicationContext publicationContext = publicationHandler.newPublicationContext( - clusterChangedEvent + clusterChangedEvent, + coordinationState.get().isRemotePublicationEnabled(), + persistedStateRegistry ); final PublishRequest publishRequest = coordinationState.get().handleClientValue(clusterState); diff --git a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java index 1fdaeead0d28d..36eabd51ffda1 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java @@ -40,6 +40,7 @@ import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.Diff; import org.opensearch.cluster.IncompatibleClusterStateVersionException; +import org.opensearch.cluster.coordination.PersistedStateRegistry.PersistedStateType; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.core.action.ActionListener; @@ -47,6 +48,9 @@ import org.opensearch.core.common.io.stream.NamedWriteableRegistry; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.transport.TransportResponse; +import org.opensearch.gateway.GatewayMetaState.RemotePersistedState; +import org.opensearch.gateway.remote.ClusterMetadataManifest; +import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.BytesTransportRequest; import org.opensearch.transport.TransportChannel; @@ -74,6 +78,7 @@ public class PublicationTransportHandler { private static final Logger logger = LogManager.getLogger(PublicationTransportHandler.class); public static final String PUBLISH_STATE_ACTION_NAME = "internal:cluster/coordination/publish_state"; + public static final String PUBLISH_REMOTE_STATE_ACTION_NAME = "internal:cluster/coordination/publish_remote_state"; public static final String COMMIT_STATE_ACTION_NAME = "internal:cluster/coordination/commit_state"; private final TransportService transportService; @@ -97,16 +102,19 @@ public class PublicationTransportHandler { private final TransportRequestOptions stateRequestOptions = TransportRequestOptions.builder() .withType(TransportRequestOptions.Type.STATE) .build(); + private final RemoteClusterStateService remoteClusterStateService; public PublicationTransportHandler( TransportService transportService, NamedWriteableRegistry namedWriteableRegistry, Function handlePublishRequest, - BiConsumer> handleApplyCommit + BiConsumer> handleApplyCommit, + RemoteClusterStateService remoteClusterStateService ) { this.transportService = transportService; this.namedWriteableRegistry = namedWriteableRegistry; this.handlePublishRequest = handlePublishRequest; + this.remoteClusterStateService = remoteClusterStateService; transportService.registerRequestHandler( PUBLISH_STATE_ACTION_NAME, @@ -117,6 +125,15 @@ public PublicationTransportHandler( (request, channel, task) -> channel.sendResponse(handleIncomingPublishRequest(request)) ); + transportService.registerRequestHandler( + PUBLISH_REMOTE_STATE_ACTION_NAME, + ThreadPool.Names.GENERIC, + false, + false, + RemotePublishRequest::new, + (request, channel, task) -> channel.sendResponse(handleIncomingRemotePublishRequest(request)) + ); + transportService.registerRequestHandler( COMMIT_STATE_ACTION_NAME, ThreadPool.Names.GENERIC, @@ -211,6 +228,74 @@ private PublishWithJoinResponse handleIncomingPublishRequest(BytesTransportReque } } + // package private for testing + PublishWithJoinResponse handleIncomingRemotePublishRequest(RemotePublishRequest request) throws IOException { + if (transportService.getLocalNode().equals(request.getSourceNode())) { + return acceptRemoteStateOnLocalNode(request); + } + // TODO Make cluster state download non-blocking: https://github.com/opensearch-project/OpenSearch/issues/14102 + ClusterMetadataManifest manifest = remoteClusterStateService.getClusterMetadataManifestByFileName( + request.getClusterUUID(), + request.getManifestFile() + ); + if (manifest == null) { + throw new IllegalStateException("Publication failed as manifest was not found for " + request); + } + boolean applyFullState = false; + final ClusterState lastSeen = lastSeenClusterState.get(); + if (lastSeen == null) { + logger.debug(() -> "Diff cannot be applied as there is no last cluster state"); + applyFullState = true; + } else if (manifest.getDiffManifest() == null) { + logger.trace(() -> "There is no diff in the manifest"); + applyFullState = true; + } else if (manifest.getDiffManifest().getFromStateUUID().equals(lastSeen.stateUUID()) == false) { + logger.debug(() -> "Last cluster state not compatible with the diff"); + applyFullState = true; + } + + if (applyFullState == true) { + logger.debug( + () -> new ParameterizedMessage( + "Downloading full cluster state for term {}, version {}, stateUUID {}", + manifest.getClusterTerm(), + manifest.getStateVersion(), + manifest.getStateUUID() + ) + ); + ClusterState clusterState = remoteClusterStateService.getClusterStateForManifest( + request.getClusterName(), + manifest, + transportService.getLocalNode().getId(), + true + ); + fullClusterStateReceivedCount.incrementAndGet(); + final PublishWithJoinResponse response = acceptState(clusterState); + lastSeenClusterState.set(clusterState); + return response; + } else { + logger.debug( + () -> new ParameterizedMessage( + "Downloading diff cluster state for term {}, version {}, previousUUID {}, current UUID {}", + manifest.getClusterTerm(), + manifest.getStateVersion(), + manifest.getDiffManifest().getFromStateUUID(), + manifest.getStateUUID() + ) + ); + ClusterState clusterState = remoteClusterStateService.getClusterStateUsingDiff( + request.getClusterName(), + manifest, + lastSeen, + transportService.getLocalNode().getId() + ); + compatibleClusterStateDiffReceivedCount.incrementAndGet(); + final PublishWithJoinResponse response = acceptState(clusterState); + lastSeenClusterState.compareAndSet(lastSeen, clusterState); + return response; + } + } + private PublishWithJoinResponse acceptState(ClusterState incomingState) { // if the state is coming from the current node, use original request instead (see currentPublishRequestToSelf for explanation) if (transportService.getLocalNode().equals(incomingState.nodes().getClusterManagerNode())) { @@ -224,8 +309,35 @@ private PublishWithJoinResponse acceptState(ClusterState incomingState) { return handlePublishRequest.apply(new PublishRequest(incomingState)); } - public PublicationContext newPublicationContext(ClusterChangedEvent clusterChangedEvent) { - final PublicationContext publicationContext = new PublicationContext(clusterChangedEvent); + private PublishWithJoinResponse acceptRemoteStateOnLocalNode(RemotePublishRequest remotePublishRequest) { + final PublishRequest publishRequest = currentPublishRequestToSelf.get(); + if (publishRequest == null + || publishRequest.getAcceptedState().coordinationMetadata().term() != remotePublishRequest.term + || publishRequest.getAcceptedState().version() != remotePublishRequest.version) { + logger.debug( + () -> new ParameterizedMessage( + "Publication failure for current publish request : {} and remote publish request: {}", + publishRequest, + remotePublishRequest + ) + ); + throw new IllegalStateException("publication to self failed for " + remotePublishRequest); + } + PublishWithJoinResponse publishWithJoinResponse = handlePublishRequest.apply(publishRequest); + lastSeenClusterState.set(publishRequest.getAcceptedState()); + return publishWithJoinResponse; + } + + public PublicationContext newPublicationContext( + ClusterChangedEvent clusterChangedEvent, + boolean isRemotePublicationEnabled, + PersistedStateRegistry persistedStateRegistry + ) { + final PublicationContext publicationContext = new PublicationContext( + clusterChangedEvent, + isRemotePublicationEnabled, + persistedStateRegistry + ); // Build the serializations we expect to need now, early in the process, so that an error during serialization fails the publication // straight away. This isn't watertight since we send diffs on a best-effort basis and may fall back to sending a full state (and @@ -234,6 +346,16 @@ public PublicationContext newPublicationContext(ClusterChangedEvent clusterChang return publicationContext; } + // package private for testing + void setCurrentPublishRequestToSelf(PublishRequest publishRequest) { + this.currentPublishRequestToSelf.set(publishRequest); + } + + // package private for testing + void setLastSeenClusterState(ClusterState clusterState) { + this.lastSeenClusterState.set(clusterState); + } + private static BytesReference serializeFullClusterState(ClusterState clusterState, Version nodeVersion) throws IOException { final BytesReference serializedState = CompressedStreamUtils.createCompressedStream(nodeVersion, stream -> { stream.writeBoolean(true); @@ -270,12 +392,20 @@ public class PublicationContext { private final boolean sendFullVersion; private final Map serializedStates = new HashMap<>(); private final Map serializedDiffs = new HashMap<>(); + private final boolean sendRemoteState; + private final PersistedStateRegistry persistedStateRegistry; - PublicationContext(ClusterChangedEvent clusterChangedEvent) { + PublicationContext( + ClusterChangedEvent clusterChangedEvent, + boolean isRemotePublicationEnabled, + PersistedStateRegistry persistedStateRegistry + ) { discoveryNodes = clusterChangedEvent.state().nodes(); newState = clusterChangedEvent.state(); previousState = clusterChangedEvent.previousState(); sendFullVersion = previousState.getBlocks().disableStatePersistence(); + sendRemoteState = isRemotePublicationEnabled; + this.persistedStateRegistry = persistedStateRegistry; } void buildDiffAndSerializeStates() { @@ -339,7 +469,11 @@ public void onFailure(Exception e) { } else { responseActionListener = listener; } - if (sendFullVersion || previousState.nodes().nodeExists(destination) == false) { + // TODO Decide to send remote state before starting publication by checking remote publication on all nodes + if (sendRemoteState && destination.isRemoteStatePublicationEnabled()) { + logger.trace("sending remote cluster state version [{}] to [{}]", newState.version(), destination); + sendRemoteClusterState(destination, publishRequest.getAcceptedState(), responseActionListener); + } else if (sendFullVersion || previousState.nodes().nodeExists(destination) == false) { logger.trace("sending full cluster state version [{}] to [{}]", newState.version(), destination); sendFullClusterState(destination, responseActionListener); } else { @@ -384,6 +518,61 @@ public String executor() { ); } + private void sendRemoteClusterState( + final DiscoveryNode destination, + final ClusterState clusterState, + final ActionListener listener + ) { + try { + final String manifestFileName = ((RemotePersistedState) persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE)) + .getLastUploadedManifestFile(); + final RemotePublishRequest remotePublishRequest = new RemotePublishRequest( + discoveryNodes.getLocalNode(), + clusterState.term(), + clusterState.getVersion(), + clusterState.getClusterName().value(), + clusterState.metadata().clusterUUID(), + manifestFileName + ); + final Consumer transportExceptionHandler = exp -> { + logger.debug(() -> new ParameterizedMessage("failed to send remote cluster state to {}", destination), exp); + listener.onFailure(exp); + }; + final TransportResponseHandler responseHandler = new TransportResponseHandler<>() { + + @Override + public PublishWithJoinResponse read(StreamInput in) throws IOException { + return new PublishWithJoinResponse(in); + } + + @Override + public void handleResponse(PublishWithJoinResponse response) { + listener.onResponse(response); + } + + @Override + public void handleException(TransportException exp) { + transportExceptionHandler.accept(exp); + } + + @Override + public String executor() { + return ThreadPool.Names.GENERIC; + } + }; + transportService.sendRequest( + destination, + PUBLISH_REMOTE_STATE_ACTION_NAME, + remotePublishRequest, + stateRequestOptions, + responseHandler + ); + } catch (Exception e) { + logger.warn(() -> new ParameterizedMessage("error sending remote cluster state to {}", destination), e); + listener.onFailure(e); + } + } + private void sendFullClusterState(DiscoveryNode destination, ActionListener listener) { BytesReference bytes = serializedStates.get(destination.getVersion()); if (bytes == null) { diff --git a/server/src/main/java/org/opensearch/cluster/coordination/RemotePublishRequest.java b/server/src/main/java/org/opensearch/cluster/coordination/RemotePublishRequest.java new file mode 100644 index 0000000000000..9461c5ee63627 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/coordination/RemotePublishRequest.java @@ -0,0 +1,85 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.coordination; + +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; + +import java.io.IOException; + +/** + * Send the publish request with the remote cluster state details + * @opensearch.internal + */ +public class RemotePublishRequest extends TermVersionRequest { + + private final String clusterName; + private final String clusterUUID; + private final String manifestFile; + + public RemotePublishRequest( + DiscoveryNode sourceNode, + long term, + long version, + String clusterName, + String clusterUUID, + String manifestFile + ) { + super(sourceNode, term, version); + this.clusterName = clusterName; + this.clusterUUID = clusterUUID; + this.manifestFile = manifestFile; + } + + public RemotePublishRequest(StreamInput in) throws IOException { + super(in); + this.clusterName = in.readString(); + this.clusterUUID = in.readString(); + this.manifestFile = in.readString(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeString(clusterName); + out.writeString(clusterUUID); + out.writeString(manifestFile); + } + + @Override + public String toString() { + return "RemotePublishRequest{" + + "term=" + + term + + ", version=" + + version + + ", clusterName=" + + clusterName + + ", clusterUUID=" + + clusterUUID + + ", sourceNode=" + + sourceNode + + ", manifestFile=" + + manifestFile + + '}'; + } + + public String getClusterName() { + return clusterName; + } + + public String getClusterUUID() { + return clusterUUID; + } + + public String getManifestFile() { + return manifestFile; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java index 93d6f6b0e4d22..ac158416d0c3b 100644 --- a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java +++ b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java @@ -63,7 +63,9 @@ import java.util.stream.Stream; import static org.opensearch.node.NodeRoleSettings.NODE_ROLES_SETTING; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_NODE_ATTRIBUTE_KEY_PREFIX; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; /** * A discovery node represents a node that is part of the cluster. @@ -520,6 +522,18 @@ public boolean isRemoteStoreNode() { return this.getAttributes().keySet().stream().anyMatch(key -> key.startsWith(REMOTE_STORE_NODE_ATTRIBUTE_KEY_PREFIX)); } + /** + * Returns whether remote cluster state publication is enabled on this node + * @return true if the node contains remote cluster state node attribute and remote routing table node attribute + */ + public boolean isRemoteStatePublicationEnabled() { + return this.getAttributes() + .keySet() + .stream() + .anyMatch(key -> (key.equals(REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY))) + && this.getAttributes().keySet().stream().anyMatch(key -> key.equals(REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY)); + } + /** * Returns a set of all the roles that the node has. The roles are returned in sorted order by the role name. *

diff --git a/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java b/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java index 538dea5b2e60b..922e23b849d49 100644 --- a/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java +++ b/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java @@ -53,6 +53,7 @@ import org.opensearch.core.common.io.stream.NamedWriteableRegistry; import org.opensearch.core.common.transport.TransportAddress; import org.opensearch.gateway.GatewayMetaState; +import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.monitor.NodeHealthService; import org.opensearch.node.remotestore.RemoteStoreNodeService; import org.opensearch.plugins.DiscoveryPlugin; @@ -135,7 +136,8 @@ public DiscoveryModule( NodeHealthService nodeHealthService, PersistedStateRegistry persistedStateRegistry, RemoteStoreNodeService remoteStoreNodeService, - ClusterManagerMetrics clusterManagerMetrics + ClusterManagerMetrics clusterManagerMetrics, + RemoteClusterStateService remoteClusterStateService ) { final Collection> joinValidators = new ArrayList<>(); final Map> hostProviders = new HashMap<>(); @@ -214,7 +216,8 @@ public DiscoveryModule( nodeHealthService, persistedStateRegistry, remoteStoreNodeService, - clusterManagerMetrics + clusterManagerMetrics, + remoteClusterStateService ); } else { throw new IllegalArgumentException("Unknown discovery type [" + discoveryType + "]"); diff --git a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java index 24183f2d2675f..811a5161a6f6e 100644 --- a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java +++ b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java @@ -65,6 +65,7 @@ import org.opensearch.env.NodeMetadata; import org.opensearch.gateway.remote.ClusterMetadataManifest; import org.opensearch.gateway.remote.RemoteClusterStateService; +import org.opensearch.gateway.remote.model.RemoteClusterStateManifestInfo; import org.opensearch.index.recovery.RemoteStoreRestoreService; import org.opensearch.index.recovery.RemoteStoreRestoreService.RemoteRestoreResult; import org.opensearch.node.Node; @@ -666,6 +667,8 @@ public static class RemotePersistedState implements PersistedState { private ClusterState lastAcceptedState; private ClusterMetadataManifest lastAcceptedManifest; + + private String lastUploadedManifestFile; private final RemoteClusterStateService remoteClusterStateService; private String previousClusterUUID; @@ -691,10 +694,14 @@ public void setCurrentTerm(long currentTerm) { // But for RemotePersistedState, the state is only pushed by the active cluster. So this method is not required. } + public String getLastUploadedManifestFile() { + return lastUploadedManifestFile; + } + @Override public void setLastAcceptedState(ClusterState clusterState) { try { - final ClusterMetadataManifest manifest; + final RemoteClusterStateManifestInfo manifestDetails; if (shouldWriteFullClusterState(clusterState)) { final Optional latestManifest = remoteClusterStateService.getLatestClusterMetadataManifest( clusterState.getClusterName().value(), @@ -712,15 +719,21 @@ public void setLastAcceptedState(ClusterState clusterState) { clusterState.metadata().clusterUUID() ); } - manifest = remoteClusterStateService.writeFullMetadata(clusterState, previousClusterUUID); + manifestDetails = remoteClusterStateService.writeFullMetadata(clusterState, previousClusterUUID); } else { assert verifyManifestAndClusterState(lastAcceptedManifest, lastAcceptedState) == true : "Previous manifest and previous ClusterState are not in sync"; - manifest = remoteClusterStateService.writeIncrementalMetadata(lastAcceptedState, clusterState, lastAcceptedManifest); + manifestDetails = remoteClusterStateService.writeIncrementalMetadata( + lastAcceptedState, + clusterState, + lastAcceptedManifest + ); } - assert verifyManifestAndClusterState(manifest, clusterState) == true : "Manifest and ClusterState are not in sync"; - lastAcceptedManifest = manifest; + assert verifyManifestAndClusterState(manifestDetails.getClusterMetadataManifest(), clusterState) == true + : "Manifest and ClusterState are not in sync"; + lastAcceptedManifest = manifestDetails.getClusterMetadataManifest(); lastAcceptedState = clusterState; + lastUploadedManifestFile = manifestDetails.getManifestFileName(); } catch (Exception e) { remoteClusterStateService.writeMetadataFailed(); handleExceptionOnWrite(e); @@ -768,12 +781,13 @@ public void markLastAcceptedStateAsCommitted() { metadataBuilder.clusterUUIDCommitted(true); clusterState = ClusterState.builder(lastAcceptedState).metadata(metadataBuilder).build(); } - final ClusterMetadataManifest committedManifest = remoteClusterStateService.markLastStateAsCommitted( + final RemoteClusterStateManifestInfo committedManifestDetails = remoteClusterStateService.markLastStateAsCommitted( clusterState, lastAcceptedManifest ); - lastAcceptedManifest = committedManifest; + lastAcceptedManifest = committedManifestDetails.getClusterMetadataManifest(); lastAcceptedState = clusterState; + lastUploadedManifestFile = committedManifestDetails.getManifestFileName(); } catch (Exception e) { handleExceptionOnWrite(e); } diff --git a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java index 175727a0093c6..90c80ccdf78a1 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java +++ b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java @@ -332,6 +332,11 @@ public Map getCustomMetadataMap() { return uploadedCustomMetadataMap; } + // TODO https://github.com/opensearch-project/OpenSearch/pull/14089 + public ClusterStateDiffManifest getDiffManifest() { + return new ClusterStateDiffManifest(); + } + public boolean hasMetadataAttributesFiles() { return uploadedCoordinationMetadata != null || uploadedSettingsMetadata != null diff --git a/server/src/main/java/org/opensearch/gateway/remote/ClusterStateDiffManifest.java b/server/src/main/java/org/opensearch/gateway/remote/ClusterStateDiffManifest.java new file mode 100644 index 0000000000000..d59c6042e7834 --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/ClusterStateDiffManifest.java @@ -0,0 +1,22 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +/** + * Manifest of diff between two cluster states + * + * @opensearch.internal + */ +public class ClusterStateDiffManifest { + + // TODO https://github.com/opensearch-project/OpenSearch/pull/14089 + public String getFromStateUUID() { + return null; + } +} diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index 6bd876b4f511c..240b00e67af0c 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -36,6 +36,7 @@ import org.opensearch.core.xcontent.ToXContent; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadataAttribute; +import org.opensearch.gateway.remote.model.RemoteClusterStateManifestInfo; import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.index.translog.transfer.BlobStoreTransferService; import org.opensearch.node.Node; @@ -268,7 +269,7 @@ public RemoteClusterStateService( * @return A manifest object which contains the details of uploaded entity metadata. */ @Nullable - public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, String previousClusterUUID) throws IOException { + public RemoteClusterStateManifestInfo writeFullMetadata(ClusterState clusterState, String previousClusterUUID) throws IOException { final long startTimeNanos = relativeTimeNanosSupplier.getAsLong(); if (clusterState.nodes().isLocalNodeElectedClusterManager() == false) { logger.error("Local node is not elected cluster manager. Exiting"); @@ -284,7 +285,7 @@ public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, Stri true, true ); - final ClusterMetadataManifest manifest = uploadManifest( + final RemoteClusterStateManifestInfo manifestDetails = uploadManifest( clusterState, uploadedMetadataResults.uploadedIndexMetadata, previousClusterUUID, @@ -311,7 +312,7 @@ public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, Stri uploadedMetadataResults.uploadedIndexMetadata.size() ); } - return manifest; + return manifestDetails; } /** @@ -322,7 +323,7 @@ public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, Stri * @return The uploaded ClusterMetadataManifest file */ @Nullable - public ClusterMetadataManifest writeIncrementalMetadata( + public RemoteClusterStateManifestInfo writeIncrementalMetadata( ClusterState previousClusterState, ClusterState clusterState, ClusterMetadataManifest previousManifest @@ -404,7 +405,7 @@ public ClusterMetadataManifest writeIncrementalMetadata( customsToBeDeletedFromRemote.keySet().forEach(allUploadedCustomMap::remove); indicesToBeDeletedFromRemote.keySet().forEach(allUploadedIndexMetadata::remove); - final ClusterMetadataManifest manifest = uploadManifest( + final RemoteClusterStateManifestInfo manifestDetails = uploadManifest( clusterState, new ArrayList<>(allUploadedIndexMetadata.values()), previousManifest.getPreviousClusterUUID(), @@ -422,7 +423,7 @@ public ClusterMetadataManifest writeIncrementalMetadata( remoteStateStats.stateTook(durationMillis); ParameterizedMessage clusterStateUploadTimeMessage = new ParameterizedMessage( CLUSTER_STATE_UPLOAD_TIME_LOG_STRING, - manifest.getStateVersion(), + manifestDetails.getClusterMetadataManifest().getStateVersion(), durationMillis ); ParameterizedMessage metadataUpdateMessage = new ParameterizedMessage( @@ -444,7 +445,7 @@ public ClusterMetadataManifest writeIncrementalMetadata( } else { logger.info("{}; {}", clusterStateUploadTimeMessage, metadataUpdateMessage); } - return manifest; + return manifestDetails; } private UploadedMetadataResults writeMetadataInParallel( @@ -724,7 +725,7 @@ public RemoteClusterStateCleanupManager getCleanupManager() { } @Nullable - public ClusterMetadataManifest markLastStateAsCommitted(ClusterState clusterState, ClusterMetadataManifest previousManifest) + public RemoteClusterStateManifestInfo markLastStateAsCommitted(ClusterState clusterState, ClusterMetadataManifest previousManifest) throws IOException { assert clusterState != null : "Last accepted cluster state is not set"; if (clusterState.nodes().isLocalNodeElectedClusterManager() == false) { @@ -732,7 +733,7 @@ public ClusterMetadataManifest markLastStateAsCommitted(ClusterState clusterStat return null; } assert previousManifest != null : "Last cluster metadata manifest is not set"; - ClusterMetadataManifest committedManifest = uploadManifest( + RemoteClusterStateManifestInfo committedManifestDetails = uploadManifest( clusterState, previousManifest.getIndices(), previousManifest.getPreviousClusterUUID(), @@ -742,11 +743,11 @@ public ClusterMetadataManifest markLastStateAsCommitted(ClusterState clusterStat previousManifest.getCustomMetadataMap(), true ); - if (!previousManifest.isClusterUUIDCommitted() && committedManifest.isClusterUUIDCommitted()) { - remoteClusterStateCleanupManager.deleteStaleClusterUUIDs(clusterState, committedManifest); + if (!previousManifest.isClusterUUIDCommitted() && committedManifestDetails.getClusterMetadataManifest().isClusterUUIDCommitted()) { + remoteClusterStateCleanupManager.deleteStaleClusterUUIDs(clusterState, committedManifestDetails.getClusterMetadataManifest()); } - return committedManifest; + return committedManifestDetails; } @Override @@ -773,7 +774,7 @@ public void start() { this.remoteRoutingTableService.ifPresent(RemoteRoutingTableService::start); } - private ClusterMetadataManifest uploadManifest( + private RemoteClusterStateManifestInfo uploadManifest( ClusterState clusterState, List uploadedIndexMetadata, String previousClusterUUID, @@ -812,7 +813,7 @@ private ClusterMetadataManifest uploadManifest( new ArrayList<>() ); writeMetadataManifest(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID(), manifest, manifestFileName); - return manifest; + return new RemoteClusterStateManifestInfo(manifest, manifestFileName); } } @@ -1091,6 +1092,31 @@ public ClusterState getLatestClusterState(String clusterName, String clusterUUID .build(); } + public ClusterState getClusterStateForManifest( + String clusterName, + ClusterMetadataManifest manifest, + String localNodeId, + boolean includeEphemeral + ) { + // TODO https://github.com/opensearch-project/OpenSearch/pull/14089 + return null; + } + + public ClusterState getClusterStateUsingDiff( + String clusterName, + ClusterMetadataManifest manifest, + ClusterState previousClusterState, + String localNodeId + ) { + // TODO https://github.com/opensearch-project/OpenSearch/pull/14089 + return null; + } + + public ClusterMetadataManifest getClusterMetadataManifestByFileName(String clusterUUID, String manifestFileName) { + // TODO https://github.com/opensearch-project/OpenSearch/pull/14089 + return null; + } + private Metadata getGlobalMetadata(String clusterName, String clusterUUID, ClusterMetadataManifest clusterMetadataManifest) { String globalMetadataFileName = clusterMetadataManifest.getGlobalMetadataFileName(); try { diff --git a/server/src/main/java/org/opensearch/gateway/remote/model/RemoteClusterStateManifestInfo.java b/server/src/main/java/org/opensearch/gateway/remote/model/RemoteClusterStateManifestInfo.java new file mode 100644 index 0000000000000..5d987e5e21e1a --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/model/RemoteClusterStateManifestInfo.java @@ -0,0 +1,33 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote.model; + +import org.opensearch.gateway.remote.ClusterMetadataManifest; + +/** + * A class encapsulating the cluster state manifest and its remote uploaded path + */ +public class RemoteClusterStateManifestInfo { + + private final ClusterMetadataManifest clusterMetadataManifest; + private final String manifestFileName; + + public RemoteClusterStateManifestInfo(final ClusterMetadataManifest manifest, final String manifestFileName) { + this.clusterMetadataManifest = manifest; + this.manifestFileName = manifestFileName; + } + + public ClusterMetadataManifest getClusterMetadataManifest() { + return clusterMetadataManifest; + } + + public String getManifestFileName() { + return manifestFileName; + } +} diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 35b8295f26a08..19e6ef71b9491 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -1196,7 +1196,8 @@ protected Node( fsHealthService, persistedStateRegistry, remoteStoreNodeService, - clusterManagerMetrics + clusterManagerMetrics, + remoteClusterStateService ); final SearchPipelineService searchPipelineService = new SearchPipelineService( clusterService, diff --git a/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java b/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java index bd71aecf89101..e74962dcbba1b 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java @@ -48,6 +48,7 @@ import org.opensearch.gateway.GatewayMetaState.RemotePersistedState; import org.opensearch.gateway.remote.ClusterMetadataManifest; import org.opensearch.gateway.remote.RemoteClusterStateService; +import org.opensearch.gateway.remote.model.RemoteClusterStateManifestInfo; import org.opensearch.repositories.fs.FsRepository; import org.opensearch.test.EqualsHashCodeTestUtils; import org.opensearch.test.OpenSearchTestCase; @@ -944,7 +945,8 @@ public void testHandlePrePublishAndCommitWhenRemoteStateEnabled() throws IOExcep .previousClusterUUID(randomAlphaOfLength(10)) .clusterUUIDCommitted(true) .build(); - Mockito.when(remoteClusterStateService.writeFullMetadata(clusterState, previousClusterUUID)).thenReturn(manifest); + Mockito.when(remoteClusterStateService.writeFullMetadata(clusterState, previousClusterUUID)) + .thenReturn(new RemoteClusterStateManifestInfo(manifest, "path/to/manifest")); final PersistedStateRegistry persistedStateRegistry = persistedStateRegistry(); persistedStateRegistry.addPersistedState(PersistedStateType.LOCAL, ps1); @@ -977,6 +979,8 @@ public void testHandlePrePublishAndCommitWhenRemoteStateEnabled() throws IOExcep Mockito.verify(remoteClusterStateService, Mockito.times(1)).writeFullMetadata(clusterState, previousClusterUUID); assertThat(persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE).getLastAcceptedState(), equalTo(clusterState)); + Mockito.when(remoteClusterStateService.markLastStateAsCommitted(any(), any())) + .thenReturn(new RemoteClusterStateManifestInfo(manifest, "path/to/manifest")); coordinationState.handlePreCommit(); ClusterState committedClusterState = ClusterState.builder(clusterState) .metadata(Metadata.builder(clusterState.metadata()).clusterUUIDCommitted(true).build()) diff --git a/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java b/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java index f84f0326f4a9d..3a7988bcd2bda 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java @@ -274,7 +274,8 @@ protected void onSendRequest( nodeHealthService, persistedStateRegistry, Mockito.mock(RemoteStoreNodeService.class), - new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE), + null ); transportService.start(); transportService.acceptIncomingRequests(); diff --git a/server/src/test/java/org/opensearch/cluster/coordination/PublicationTransportHandlerTests.java b/server/src/test/java/org/opensearch/cluster/coordination/PublicationTransportHandlerTests.java index 6d94054afdea2..08e3f47100d8c 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/PublicationTransportHandlerTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/PublicationTransportHandlerTests.java @@ -37,33 +37,59 @@ import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.Diff; import org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration; +import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.gateway.remote.ClusterMetadataManifest; +import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.node.Node; import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.transport.CapturingTransport; import org.opensearch.transport.TransportService; +import org.junit.Before; import java.io.IOException; import java.util.Collections; +import java.util.Optional; +import java.util.function.Function; + +import org.mockito.Mockito; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.is; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.when; public class PublicationTransportHandlerTests extends OpenSearchTestCase { - public void testDiffSerializationFailure() { - DeterministicTaskQueue deterministicTaskQueue = new DeterministicTaskQueue( + private static final long TERM = 5; + private static final long VERSION = 5; + private static final String CLUSTER_NAME = "test-cluster"; + private static final String CLUSTER_UUID = "test-cluster-UUID"; + private static final String MANIFEST_FILE = "path/to/manifest"; + private static final String LOCAL_NODE_ID = "localNode"; + + private DeterministicTaskQueue deterministicTaskQueue; + private TransportService transportService; + private DiscoveryNode localNode; + private DiscoveryNode secondNode; + + @Before + public void setup() { + deterministicTaskQueue = new DeterministicTaskQueue( Settings.builder().put(Node.NODE_NAME_SETTING.getKey(), "test").build(), random() ); final ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - final DiscoveryNode localNode = new DiscoveryNode("localNode", buildNewFakeTransportAddress(), Version.CURRENT); - final TransportService transportService = new CapturingTransport().createTransportService( + localNode = new DiscoveryNode(LOCAL_NODE_ID, buildNewFakeTransportAddress(), Version.CURRENT); + secondNode = new DiscoveryNode("secondNode", buildNewFakeTransportAddress(), Version.CURRENT); + transportService = new CapturingTransport().createTransportService( Settings.EMPTY, deterministicTaskQueue.getThreadPool(), TransportService.NOOP_TRANSPORT_INTERCEPTOR, @@ -72,14 +98,10 @@ public void testDiffSerializationFailure() { Collections.emptySet(), NoopTracer.INSTANCE ); - final PublicationTransportHandler handler = new PublicationTransportHandler( - transportService, - writableRegistry(), - pu -> null, - (pu, l) -> {} - ); - transportService.start(); - transportService.acceptIncomingRequests(); + } + + public void testDiffSerializationFailure() { + final PublicationTransportHandler handler = getPublicationTransportHandler(p -> null, null); final DiscoveryNode otherNode = new DiscoveryNode("otherNode", buildNewFakeTransportAddress(), Version.CURRENT); final ClusterState clusterState = CoordinationStateTests.clusterState( @@ -111,10 +133,181 @@ public void writeTo(StreamOutput out) throws IOException { OpenSearchException e = expectThrows( OpenSearchException.class, - () -> handler.newPublicationContext(new ClusterChangedEvent("test", unserializableClusterState, clusterState)) + () -> handler.newPublicationContext(new ClusterChangedEvent("test", unserializableClusterState, clusterState), false, null) ); assertNotNull(e.getCause()); assertThat(e.getCause(), instanceOf(IOException.class)); assertThat(e.getCause().getMessage(), containsString("Simulated failure of diff serialization")); } + + public void testHandleIncomingRemotePublishRequestWhenNoCurrentPublishRequest() { + RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + + PublishWithJoinResponse expectedPublishResponse = new PublishWithJoinResponse(new PublishResponse(TERM, VERSION), Optional.empty()); + Function handlePublishRequest = p -> expectedPublishResponse; + final PublicationTransportHandler handler = getPublicationTransportHandler(handlePublishRequest, remoteClusterStateService); + RemotePublishRequest remotePublishRequest = new RemotePublishRequest( + localNode, + TERM, + VERSION, + CLUSTER_NAME, + CLUSTER_UUID, + MANIFEST_FILE + ); + + IllegalStateException e = expectThrows( + IllegalStateException.class, + () -> handler.handleIncomingRemotePublishRequest(remotePublishRequest) + ); + assertThat(e.getMessage(), containsString("publication to self failed")); + Mockito.verifyNoInteractions(remoteClusterStateService); + } + + public void testHandleIncomingRemotePublishRequestWhenTermMismatch() { + RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + + PublishWithJoinResponse expectedPublishResponse = new PublishWithJoinResponse(new PublishResponse(TERM, VERSION), Optional.empty()); + Function handlePublishRequest = p -> expectedPublishResponse; + final PublicationTransportHandler handler = getPublicationTransportHandler(handlePublishRequest, remoteClusterStateService); + RemotePublishRequest remotePublishRequest = new RemotePublishRequest( + localNode, + TERM, + VERSION, + CLUSTER_NAME, + CLUSTER_UUID, + MANIFEST_FILE + ); + ClusterState clusterState = buildClusterState(6L, VERSION); + PublishRequest publishRequest = new PublishRequest(clusterState); + handler.setCurrentPublishRequestToSelf(publishRequest); + IllegalStateException e = expectThrows( + IllegalStateException.class, + () -> handler.handleIncomingRemotePublishRequest(remotePublishRequest) + ); + assertThat(e.getMessage(), containsString("publication to self failed")); + Mockito.verifyNoInteractions(remoteClusterStateService); + } + + public void testHandleIncomingRemotePublishRequestWhenVersionMismatch() { + RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + + PublishWithJoinResponse expectedPublishResponse = new PublishWithJoinResponse(new PublishResponse(TERM, VERSION), Optional.empty()); + Function handlePublishRequest = p -> expectedPublishResponse; + final PublicationTransportHandler handler = getPublicationTransportHandler(handlePublishRequest, remoteClusterStateService); + RemotePublishRequest remotePublishRequest = new RemotePublishRequest( + localNode, + TERM, + VERSION, + CLUSTER_NAME, + CLUSTER_UUID, + MANIFEST_FILE + ); + ClusterState clusterState = buildClusterState(TERM, 11L); + PublishRequest publishRequest = new PublishRequest(clusterState); + handler.setCurrentPublishRequestToSelf(publishRequest); + IllegalStateException e = expectThrows( + IllegalStateException.class, + () -> handler.handleIncomingRemotePublishRequest(remotePublishRequest) + ); + assertThat(e.getMessage(), containsString("publication to self failed")); + Mockito.verifyNoInteractions(remoteClusterStateService); + } + + public void testHandleIncomingRemotePublishRequestForLocalNode() throws IOException { + RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + + PublishWithJoinResponse expectedPublishResponse = new PublishWithJoinResponse(new PublishResponse(TERM, VERSION), Optional.empty()); + Function handlePublishRequest = p -> expectedPublishResponse; + final PublicationTransportHandler handler = getPublicationTransportHandler(handlePublishRequest, remoteClusterStateService); + RemotePublishRequest remotePublishRequest = new RemotePublishRequest( + localNode, + TERM, + VERSION, + CLUSTER_NAME, + CLUSTER_UUID, + MANIFEST_FILE + ); + ClusterState clusterState = buildClusterState(TERM, VERSION); + PublishRequest publishRequest = new PublishRequest(clusterState); + handler.setCurrentPublishRequestToSelf(publishRequest); + PublishWithJoinResponse publishWithJoinResponse = handler.handleIncomingRemotePublishRequest(remotePublishRequest); + assertThat(publishWithJoinResponse, is(expectedPublishResponse)); + Mockito.verifyNoInteractions(remoteClusterStateService); + } + + public void testHandleIncomingRemotePublishRequestWhenManifestNotFound() throws IOException { + RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + + PublishWithJoinResponse expectedPublishResponse = new PublishWithJoinResponse(new PublishResponse(TERM, VERSION), Optional.empty()); + Function handlePublishRequest = p -> expectedPublishResponse; + final PublicationTransportHandler handler = getPublicationTransportHandler(handlePublishRequest, remoteClusterStateService); + RemotePublishRequest remotePublishRequest = new RemotePublishRequest( + secondNode, + TERM, + VERSION, + CLUSTER_NAME, + CLUSTER_UUID, + MANIFEST_FILE + ); + when(remoteClusterStateService.getClusterMetadataManifestByFileName(CLUSTER_UUID, MANIFEST_FILE)).thenReturn(null); + ClusterState clusterState = buildClusterState(TERM, VERSION); + PublishRequest publishRequest = new PublishRequest(clusterState); + handler.setCurrentPublishRequestToSelf(publishRequest); + IllegalStateException e = expectThrows( + IllegalStateException.class, + () -> handler.handleIncomingRemotePublishRequest(remotePublishRequest) + ); + assertThat(e.getMessage(), containsString("Publication failed as manifest was not found for")); + Mockito.verify(remoteClusterStateService, times(1)).getClusterMetadataManifestByFileName(Mockito.any(), Mockito.any()); + } + + public void testHandleIncomingRemotePublishRequestWhenNoLastSeenState() throws IOException { + RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + + PublishWithJoinResponse expectedPublishResponse = new PublishWithJoinResponse(new PublishResponse(TERM, VERSION), Optional.empty()); + Function handlePublishRequest = p -> expectedPublishResponse; + final PublicationTransportHandler handler = getPublicationTransportHandler(handlePublishRequest, remoteClusterStateService); + RemotePublishRequest remotePublishRequest = new RemotePublishRequest( + secondNode, + TERM, + VERSION, + CLUSTER_NAME, + CLUSTER_UUID, + MANIFEST_FILE + ); + ClusterMetadataManifest manifest = ClusterMetadataManifest.builder().clusterTerm(TERM).stateVersion(VERSION).build(); + when(remoteClusterStateService.getClusterMetadataManifestByFileName(CLUSTER_UUID, MANIFEST_FILE)).thenReturn(manifest); + when(remoteClusterStateService.getClusterStateForManifest(CLUSTER_NAME, manifest, LOCAL_NODE_ID, true)).thenReturn( + buildClusterState(TERM, VERSION) + ); + ClusterState clusterState = buildClusterState(TERM, VERSION); + PublishRequest publishRequest = new PublishRequest(clusterState); + handler.setCurrentPublishRequestToSelf(publishRequest); + PublishWithJoinResponse publishWithJoinResponse = handler.handleIncomingRemotePublishRequest(remotePublishRequest); + assertThat(publishWithJoinResponse, is(expectedPublishResponse)); + Mockito.verify(remoteClusterStateService, times(1)).getClusterMetadataManifestByFileName(Mockito.any(), Mockito.any()); + } + + private PublicationTransportHandler getPublicationTransportHandler( + Function handlePublishRequest, + RemoteClusterStateService remoteClusterStateService + ) { + final PublicationTransportHandler handler = new PublicationTransportHandler( + transportService, + writableRegistry(), + handlePublishRequest, + (pu, l) -> {}, + remoteClusterStateService + ); + transportService.start(); + transportService.acceptIncomingRequests(); + return handler; + } + + private ClusterState buildClusterState(long term, long version) { + CoordinationMetadata.Builder coordMetadataBuilder = CoordinationMetadata.builder().term(term); + Metadata newMetadata = Metadata.builder().coordinationMetadata(coordMetadataBuilder.build()).build(); + DiscoveryNodes nodes = DiscoveryNodes.builder().add(localNode).add(secondNode).localNodeId(LOCAL_NODE_ID).build(); + return ClusterState.builder(ClusterState.EMPTY_STATE).version(version).metadata(newMetadata).nodes(nodes).build(); + } } diff --git a/server/src/test/java/org/opensearch/discovery/DiscoveryModuleTests.java b/server/src/test/java/org/opensearch/discovery/DiscoveryModuleTests.java index 5539b3237c2bf..cd3e8af2a3cd1 100644 --- a/server/src/test/java/org/opensearch/discovery/DiscoveryModuleTests.java +++ b/server/src/test/java/org/opensearch/discovery/DiscoveryModuleTests.java @@ -131,7 +131,8 @@ private DiscoveryModule newModule(Settings settings, List plugi null, new PersistedStateRegistry(), remoteStoreNodeService, - new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE), + null ); } diff --git a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java index 418e6d8de6adb..7b113961fc2c7 100644 --- a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java +++ b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java @@ -69,6 +69,7 @@ import org.opensearch.gateway.remote.ClusterMetadataManifest; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.gateway.remote.RemotePersistenceStats; +import org.opensearch.gateway.remote.model.RemoteClusterStateManifestInfo; import org.opensearch.index.recovery.RemoteStoreRestoreService; import org.opensearch.index.recovery.RemoteStoreRestoreService.RemoteRestoreResult; import org.opensearch.index.remote.RemoteIndexPathUploader; @@ -723,9 +724,11 @@ public void testRemotePersistedState() throws IOException { final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); final ClusterMetadataManifest manifest = ClusterMetadataManifest.builder().clusterTerm(1L).stateVersion(5L).build(); final String previousClusterUUID = "prev-cluster-uuid"; - Mockito.when(remoteClusterStateService.writeFullMetadata(Mockito.any(), Mockito.any())).thenReturn(manifest); + Mockito.when(remoteClusterStateService.writeFullMetadata(Mockito.any(), Mockito.any())) + .thenReturn(new RemoteClusterStateManifestInfo(manifest, "path/to/manifest")); - Mockito.when(remoteClusterStateService.writeIncrementalMetadata(Mockito.any(), Mockito.any(), Mockito.any())).thenReturn(manifest); + Mockito.when(remoteClusterStateService.writeIncrementalMetadata(Mockito.any(), Mockito.any(), Mockito.any())) + .thenReturn(new RemoteClusterStateManifestInfo(manifest, "path/to/manifest")); CoordinationState.PersistedState remotePersistedState = new RemotePersistedState(remoteClusterStateService, previousClusterUUID); assertThat(remotePersistedState.getLastAcceptedState(), nullValue()); @@ -754,6 +757,9 @@ public void testRemotePersistedState() throws IOException { assertThat(remotePersistedState.getLastAcceptedState(), equalTo(secondClusterState)); assertThat(remotePersistedState.getCurrentTerm(), equalTo(clusterTerm)); + when(remoteClusterStateService.markLastStateAsCommitted(Mockito.any(), Mockito.any())).thenReturn( + new RemoteClusterStateManifestInfo(manifest, "path/to/manifest") + ); remotePersistedState.markLastAcceptedStateAsCommitted(); Mockito.verify(remoteClusterStateService, times(1)).markLastStateAsCommitted(Mockito.any(), Mockito.any()); @@ -779,9 +785,11 @@ public void testRemotePersistedStateNotCommitted() throws IOException { .build(); Mockito.when(remoteClusterStateService.getLatestClusterMetadataManifest(Mockito.any(), Mockito.any())) .thenReturn(Optional.of(manifest)); - Mockito.when(remoteClusterStateService.writeFullMetadata(Mockito.any(), Mockito.any())).thenReturn(manifest); + Mockito.when(remoteClusterStateService.writeFullMetadata(Mockito.any(), Mockito.any())) + .thenReturn(new RemoteClusterStateManifestInfo(manifest, "path/to/manifest")); - Mockito.when(remoteClusterStateService.writeIncrementalMetadata(Mockito.any(), Mockito.any(), Mockito.any())).thenReturn(manifest); + Mockito.when(remoteClusterStateService.writeIncrementalMetadata(Mockito.any(), Mockito.any(), Mockito.any())) + .thenReturn(new RemoteClusterStateManifestInfo(manifest, "path/to/manifest")); CoordinationState.PersistedState remotePersistedState = new RemotePersistedState( remoteClusterStateService, ClusterState.UNKNOWN_UUID diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 4a53770c76d88..890a4b478b502 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -43,6 +43,7 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedMetadataAttribute; +import org.opensearch.gateway.remote.model.RemoteClusterStateManifestInfo; import org.opensearch.index.remote.RemoteIndexPathUploader; import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.indices.IndicesModule; @@ -182,8 +183,11 @@ public void teardown() throws Exception { public void testFailWriteFullMetadataNonClusterManagerNode() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().build(); - final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, randomAlphaOfLength(10)); - Assert.assertThat(manifest, nullValue()); + final RemoteClusterStateManifestInfo manifestDetails = remoteClusterStateService.writeFullMetadata( + clusterState, + randomAlphaOfLength(10) + ); + Assert.assertThat(manifestDetails, nullValue()); } public void testFailInitializationWhenRemoteStateDisabled() { @@ -218,7 +222,8 @@ public void testWriteFullMetadataSuccess() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); mockBlobStoreObjects(); remoteClusterStateService.start(); - final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid"); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid") + .getClusterMetadataManifest(); final UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "index-uuid", "metadata-filename"); List indices = List.of(uploadedIndexMetadata); @@ -262,7 +267,8 @@ public void testWriteFullMetadataInParallelSuccess() throws IOException { }).when(container).asyncBlobUpload(writeContextArgumentCaptor.capture(), actionListenerArgumentCaptor.capture()); remoteClusterStateService.start(); - final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid"); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid") + .getClusterMetadataManifest(); final UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "index-uuid", "metadata-filename"); List indices = List.of(uploadedIndexMetadata); @@ -401,8 +407,12 @@ public void testWriteFullMetadataInParallelFailureForIndexMetadata() throws IOEx public void testFailWriteIncrementalMetadataNonClusterManagerNode() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().build(); remoteClusterStateService.start(); - final ClusterMetadataManifest manifest = remoteClusterStateService.writeIncrementalMetadata(clusterState, clusterState, null); - Assert.assertThat(manifest, nullValue()); + final RemoteClusterStateManifestInfo manifestDetails = remoteClusterStateService.writeIncrementalMetadata( + clusterState, + clusterState, + null + ); + Assert.assertThat(manifestDetails, nullValue()); assertEquals(0, remoteClusterStateService.getStats().getSuccessCount()); } @@ -433,7 +443,7 @@ public void testWriteIncrementalMetadataSuccess() throws IOException { previousClusterState, clusterState, previousManifest - ); + ).getClusterMetadataManifest(); final UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "index-uuid", "metadata-filename"); final List indices = List.of(uploadedIndexMetadata); @@ -508,7 +518,7 @@ private void verifyCodecMigrationManifest(int previousCodec) throws IOException previousClusterState, newClusterState, previousManifest - ); + ).getClusterMetadataManifest(); // global metadata is updated assertThat(manifestAfterUpdate.hasMetadataAttributesFiles(), is(true)); @@ -545,7 +555,7 @@ private void verifyWriteIncrementalGlobalMetadataFromOlderCodecSuccess(ClusterMe previousClusterState, clusterState, previousManifest - ); + ).getClusterMetadataManifest(); final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() .codecVersion(3) @@ -736,7 +746,8 @@ public void testCustomMetadataDeletedUpdatedAndAdded() throws IOException { // Initial cluster state with index. final ClusterState initialClusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); remoteClusterStateService.start(); - final ClusterMetadataManifest initialManifest = remoteClusterStateService.writeFullMetadata(initialClusterState, "_na_"); + final ClusterMetadataManifest initialManifest = remoteClusterStateService.writeFullMetadata(initialClusterState, "_na_") + .getClusterMetadataManifest(); ClusterState clusterState1 = ClusterState.builder(initialClusterState) .metadata( @@ -751,7 +762,7 @@ public void testCustomMetadataDeletedUpdatedAndAdded() throws IOException { initialClusterState, clusterState1, initialManifest - ); + ).getClusterMetadataManifest(); // remove custom1 from the cluster state, update custom2, custom3 is at it is, added custom4 ClusterState clusterState2 = ClusterState.builder(initialClusterState) .metadata( @@ -761,7 +772,8 @@ public void testCustomMetadataDeletedUpdatedAndAdded() throws IOException { .putCustom("custom4", new CustomMetadata1("mock_custom_metadata4")) ) .build(); - ClusterMetadataManifest manifest2 = remoteClusterStateService.writeIncrementalMetadata(clusterState1, clusterState2, manifest1); + ClusterMetadataManifest manifest2 = remoteClusterStateService.writeIncrementalMetadata(clusterState1, clusterState2, manifest1) + .getClusterMetadataManifest(); // custom1 is removed assertFalse(manifest2.getCustomMetadataMap().containsKey("custom1")); // custom2 is updated @@ -811,7 +823,8 @@ public void testIndexMetadataDeletedUpdatedAndAdded() throws IOException { // Initial cluster state with index. final ClusterState initialClusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); remoteClusterStateService.start(); - final ClusterMetadataManifest initialManifest = remoteClusterStateService.writeFullMetadata(initialClusterState, "_na_"); + final ClusterMetadataManifest initialManifest = remoteClusterStateService.writeFullMetadata(initialClusterState, "_na_") + .getClusterMetadataManifest(); String initialIndex = "test-index"; Index index1 = new Index("test-index-1", "index-uuid-1"); Index index2 = new Index("test-index-2", "index-uuid-2"); @@ -844,7 +857,7 @@ public void testIndexMetadataDeletedUpdatedAndAdded() throws IOException { initialClusterState, clusterState1, initialManifest - ); + ).getClusterMetadataManifest(); // verify that initial index is removed, and new index are added assertEquals(1, initialManifest.getIndices().size()); assertEquals(2, manifest1.getIndices().size()); @@ -855,7 +868,8 @@ public void testIndexMetadataDeletedUpdatedAndAdded() throws IOException { ClusterState clusterState2 = ClusterState.builder(clusterState1) .metadata(Metadata.builder(clusterState1.getMetadata()).put(indexMetadata1, true).build()) .build(); - ClusterMetadataManifest manifest2 = remoteClusterStateService.writeIncrementalMetadata(clusterState1, clusterState2, manifest1); + ClusterMetadataManifest manifest2 = remoteClusterStateService.writeIncrementalMetadata(clusterState1, clusterState2, manifest1) + .getClusterMetadataManifest(); // index1 is updated assertEquals(2, manifest2.getIndices().size()); assertEquals( @@ -888,7 +902,8 @@ private void verifyMetadataAttributeOnlyUpdated( // Initial cluster state with index. final ClusterState initialClusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); remoteClusterStateService.start(); - final ClusterMetadataManifest initialManifest = remoteClusterStateService.writeFullMetadata(initialClusterState, "_na_"); + final ClusterMetadataManifest initialManifest = remoteClusterStateService.writeFullMetadata(initialClusterState, "_na_") + .getClusterMetadataManifest(); ClusterState newClusterState = clusterStateUpdater.apply(initialClusterState); @@ -899,9 +914,10 @@ private void verifyMetadataAttributeOnlyUpdated( initialClusterState, newClusterState, initialManifest - ); + ).getClusterMetadataManifest(); } else { - manifestAfterMetadataUpdate = remoteClusterStateService.writeFullMetadata(newClusterState, initialClusterState.stateUUID()); + manifestAfterMetadataUpdate = remoteClusterStateService.writeFullMetadata(newClusterState, initialClusterState.stateUUID()) + .getClusterMetadataManifest(); } assertions.accept(initialManifest, manifestAfterMetadataUpdate); @@ -1182,7 +1198,8 @@ public void testMarkLastStateAsCommittedSuccess() throws IOException { List indices = List.of(uploadedIndexMetadata); final ClusterMetadataManifest previousManifest = ClusterMetadataManifest.builder().indices(indices).build(); - final ClusterMetadataManifest manifest = remoteClusterStateService.markLastStateAsCommitted(clusterState, previousManifest); + final ClusterMetadataManifest manifest = remoteClusterStateService.markLastStateAsCommitted(clusterState, previousManifest) + .getClusterMetadataManifest(); final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() .indices(indices) @@ -1287,7 +1304,8 @@ public void testRemoteStateStats() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); mockBlobStoreObjects(); remoteClusterStateService.start(); - final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid"); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid") + .getClusterMetadataManifest(); assertTrue(remoteClusterStateService.getStats() != null); assertEquals(1, remoteClusterStateService.getStats().getSuccessCount()); diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 3848dce5a14ec..e22039ba4317f 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -2554,7 +2554,8 @@ public void start(ClusterState initialState) { () -> new StatusInfo(HEALTHY, "healthy-info"), persistedStateRegistry, remoteStoreNodeService, - new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE), + null ); clusterManagerService.setClusterStatePublisher(coordinator); coordinator.start(); diff --git a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java index 1c2270bab1260..b432e5411404e 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java @@ -1184,7 +1184,8 @@ protected Optional getDisruptableMockTransport(Transpo nodeHealthService, persistedStateRegistry, remoteStoreNodeService, - new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE), + null ); clusterManagerService.setClusterStatePublisher(coordinator); final GatewayService gatewayService = new GatewayService(