Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix registry unsupported pipeline update #96497

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,10 @@ public void clusterChanged(ClusterChangedEvent event) {
return;
}

if (isClusterReady(event) == false) {
return;
}

// if this node is newer than the master node, we probably need to add the template, which might be newer than the
// template the master node has, so we need potentially add new templates despite being not the master node
DiscoveryNode localNode = event.state().getNodes().getLocalNode();
Expand All @@ -197,6 +201,14 @@ public void clusterChanged(ClusterChangedEvent event) {
}
}

/**
* A method that can be overridden to add additional conditions to be satisfied
* before installing the template registry components.
*/
protected boolean isClusterReady(ClusterChangedEvent event) {
return true;
}

/**
* Whether the registry should only apply changes when running on the master node.
* This is useful for plugins where certain actions are performed on master nodes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
*/
package org.elasticsearch.xpack.application.analytics;

import org.elasticsearch.Version;
import org.elasticsearch.client.internal.Client;
import org.elasticsearch.cluster.ClusterChangedEvent;
import org.elasticsearch.cluster.metadata.ComponentTemplate;
import org.elasticsearch.cluster.metadata.ComposableIndexTemplate;
import org.elasticsearch.cluster.service.ClusterService;
Expand Down Expand Up @@ -35,8 +37,11 @@

public class AnalyticsTemplateRegistry extends IndexTemplateRegistry {

// This registry requires all nodes to be at least 8.8.0
static final Version MIN_NODE_VERSION = Version.V_8_8_0;

// This number must be incremented when we make changes to built-in templates.
static final int REGISTRY_VERSION = 1;
static final int REGISTRY_VERSION = 2;

// ILM Policies configuration
static final String EVENT_DATA_STREAM_ILM_POLICY_NAME = EVENT_DATA_STREAM_INDEX_PREFIX + "default_policy";
Expand Down Expand Up @@ -146,4 +151,11 @@ protected boolean requiresMasterNode() {
// there and the ActionNotFoundTransportException errors are then prevented.
return true;
}

@Override
protected boolean isClusterReady(ClusterChangedEvent event) {
// Ensure templates are installed only once all nodes are updated to 8.8.0.
Version minNodeVersion = event.state().nodes().getMinNodeVersion();
return minNodeVersion.onOrAfter(MIN_NODE_VERSION);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

package org.elasticsearch.xpack.application.analytics;

import org.elasticsearch.Version;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.ActionRequest;
import org.elasticsearch.action.ActionResponse;
Expand Down Expand Up @@ -405,6 +406,25 @@ public void testThatNonExistingPipelinesAreAddedImmediately() throws Exception {
assertBusy(() -> assertThat(calledTimes.get(), equalTo(registry.getIngestPipelines().size())));
}

public void testThatNothingIsInstalledWhenAllNodesAreNotUpdated() {
DiscoveryNode updatedNode = TestDiscoveryNode.create("updatedNode");
DiscoveryNode outdatedNode = TestDiscoveryNode.create("outdatedNode", ESTestCase.buildNewFakeTransportAddress(), Version.V_8_7_0);
DiscoveryNodes nodes = DiscoveryNodes.builder()
.localNodeId("updatedNode")
.masterNodeId("updatedNode")
.add(updatedNode)
.add(outdatedNode)
.build();

client.setVerifier((a, r, l) -> {
fail("if some cluster mode are not updated to at least v.8.8.0 nothing should happen");
return null;
});

ClusterChangedEvent event = createClusterChangedEvent(Collections.emptyMap(), Collections.emptyMap(), nodes);
registry.clusterChanged(event);
}

// -------------

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.Version;
import org.elasticsearch.client.internal.Client;
import org.elasticsearch.cluster.ClusterChangedEvent;
import org.elasticsearch.cluster.metadata.ComponentTemplate;
import org.elasticsearch.cluster.metadata.ComposableIndexTemplate;
import org.elasticsearch.cluster.service.ClusterService;
Expand All @@ -36,6 +38,9 @@
public class StackTemplateRegistry extends IndexTemplateRegistry {
private static final Logger logger = LogManager.getLogger(StackTemplateRegistry.class);

// Current version of the registry requires all nodes to be at least 8.9.0.
public static final Version MIN_NODE_VERSION = Version.V_8_9_0;

// The stack template registry version. This number must be incremented when we make changes
// to built-in templates.
public static final int REGISTRY_VERSION = 3;
Expand Down Expand Up @@ -253,4 +258,13 @@ protected boolean requiresMasterNode() {
// there and the ActionNotFoundTransportException errors are then prevented.
return true;
}

@Override
protected boolean isClusterReady(ClusterChangedEvent event) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@eyalkoren Now checking all nodes are at least at v 8.9.0, so it does not fails because of the ignore_missing_pipeline parameter

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are you preventing with this?
AFAIK, once the local implementation of requiresMasterNode() returns true, as in this case, this ensures that the upgrade will occur only on the elected master. I believe that during rolling upgrades this ensures that this happens only after all non-master nodes are already upgraded.
Since the usage of ignore_missing_pipeline was introduced in #95971, which was added to 8.9.0 and not back-ported, I am not sure whether this is required.

BTW, AnalyticsTemplateRegistry also requires master node, so double-check if this is required in the original case as well.

@jbaiera @dakrone please confirm or enlighten me if I got it wrong

Copy link
Member

@davidkyle davidkyle Jun 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no guarantee that the non-master nodes will be upgraded first during a rolling upgrade. This is a sensible precaution.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would nodes apply settings coming from a master of a higher version?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dakrone just explained that I indeed got it wrong and there is no actual enforced guarantee as to the order of upgrade in code. This should normally not occur if the upgrade is done according to documentation (master node last), but such verification does make sense

// Ensure current version of the components are installed only once all nodes are updated to 8.9.0.
// This is necessary to prevent an error caused nby the usage of the ignore_missing_pipeline property
// in the pipeline processor, which has been introduced only in 8.9.0
Version minNodeVersion = event.state().nodes().getMinNodeVersion();
return minNodeVersion.onOrAfter(MIN_NODE_VERSION);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

package org.elasticsearch.xpack.stack;

import org.elasticsearch.Version;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.ActionRequest;
import org.elasticsearch.action.ActionResponse;
Expand Down Expand Up @@ -444,6 +445,25 @@ public void testThatMissingMasterNodeDoesNothing() {
registry.clusterChanged(event);
}

public void testThatNothingIsInstalledWhenAllNodesAreNotUpdated() {
DiscoveryNode updatedNode = TestDiscoveryNode.create("updatedNode");
DiscoveryNode outdatedNode = TestDiscoveryNode.create("outdatedNode", ESTestCase.buildNewFakeTransportAddress(), Version.V_8_8_0);
DiscoveryNodes nodes = DiscoveryNodes.builder()
.localNodeId("updatedNode")
.masterNodeId("updatedNode")
.add(updatedNode)
.add(outdatedNode)
.build();

client.setVerifier((a, r, l) -> {
fail("if some cluster mode are not updated to at least v.8.9.0 nothing should happen");
return null;
});

ClusterChangedEvent event = createClusterChangedEvent(Collections.emptyMap(), nodes);
registry.clusterChanged(event);
}

// -------------

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,21 +112,10 @@ public void testTrainedModelDeployment() throws Exception {
request.addParameter("wait_for_status", "yellow");
request.addParameter("timeout", "70s");
}));

// Workaround for an upgrade test failure where an ingest
// pipeline config cannot be parsed by older nodes:
// https://github.com/elastic/elasticsearch/issues/95766
//
// In version 8.3.1 ml stopped parsing the full ingest
// pipeline configuration so will avoid this problem.
// TODO remove this check once https://github.com/elastic/elasticsearch/issues/95766
// is resolved
if (UPGRADE_FROM_VERSION.onOrAfter(Version.V_8_3_1)) {
waitForDeploymentStarted(modelId);
// attempt inference on new and old nodes multiple times
for (int i = 0; i < 10; i++) {
assertInfer(modelId);
}
waitForDeploymentStarted(modelId);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@davidkyle Reverted your workaround.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @afoucret. I added the test-full-bwc label for extra CI coverage. The label means the upgrade tests will be run against all backwards compatible versions

// attempt inference on new and old nodes multiple times
for (int i = 0; i < 10; i++) {
assertInfer(modelId);
}
}
case UPGRADED -> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,19 +74,8 @@ public void testTrainedModelInference() throws Exception {
request.addParameter("timeout", "70s");
}));
List<String> modelIds = getTrainedModels();

// Workaround for an upgrade test failure where an ingest
// pipeline config cannot be parsed by older nodes:
// https://github.com/elastic/elasticsearch/issues/95766
//
// In version 8.3.1 ml stopped parsing the full ingest
// pipeline configuration so will avoid this problem.
// TODO remove this check once https://github.com/elastic/elasticsearch/issues/95766
// is resolved
if (UPGRADE_FROM_VERSION.onOrAfter(Version.V_8_3_1)) {
// Test that stats are serializable and can be gathered
getTrainedModelStats();
}
// Test that stats are serializable and can be gathered
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@davidkyle Reverted your workaround.

getTrainedModelStats();
// Verify that the pipelines still work and inference is possible
testInfer(modelIds);
}
Expand Down