From 78f8a02691b5dc25dd82d0747d8050bf85b9ede4 Mon Sep 17 00:00:00 2001 From: Mohammed Naser Date: Sun, 17 Mar 2024 16:07:02 -0400 Subject: [PATCH] fix: stop unexpected rollouts (#324) When making any node group changes, we were previously making an entire change of all the cluster resources. With this change, we're instead simply making the small changes in the node group which should avoid unexpected full rollouts of node groups when changing min/max for autoscaling or resizing a cluster. Signed-off-by: Mohammed Naser --- docs/developer/cluster-topology.md | 12 ++ flake.nix | 4 +- magnum_cluster_api/driver.py | 121 ++++++++--- magnum_cluster_api/resources.py | 109 ++++++---- magnum_cluster_api/tests/unit/conftest.py | 25 +++ magnum_cluster_api/tests/unit/test_driver.py | 198 ++++++++++++++++++ .../tests/unit/test_resources.py | 79 +++++++ 7 files changed, 486 insertions(+), 62 deletions(-) create mode 100644 docs/developer/cluster-topology.md create mode 100644 magnum_cluster_api/tests/unit/test_driver.py diff --git a/docs/developer/cluster-topology.md b/docs/developer/cluster-topology.md new file mode 100644 index 00000000..76962b17 --- /dev/null +++ b/docs/developer/cluster-topology.md @@ -0,0 +1,12 @@ +# Cluster Topology + +The Cluster API driver for Magnum makes use of the Cluster topology feature of the Cluster API project. This allows it to delegate all of the work around building resources such as the `OpenStackCluster`, `MachineDeployments` and everything else managed entire by the Cluster API instead of the driver creating all of these resources. + +In order to do this, the driver creates a [`ClusterClass`](https://cluster-api.sigs.k8s.io/tasks/experimental-features/cluster-class/write-clusterclass) resource which is called `magnum-v{VERSION}` where `{VERSION}` is the current version of the driver because of the following reasons: + +- This allows us to have multiple different versions of the `ClusterClass` because it is an immutable resource. +- This prevents causing a rollout of existing clusters should a change happen to the underlying `ClusterClass`. + +It's important to note that there are only _one_ scenarios where the `spec.topology.class` for a given `Cluster` will be modified and this will be when a cluster upgrade is done. This is because there is an expectation by the user that a rolling restart operation will occur if a cluster upgrade is requested. No other action should be allowed to change the `spec.topology.class` of a `Cluster`. + +For users, it's important to keep in mind that if they want to use a newer `ClusterClass` in order to make sure of a new feature available in a newer `ClusterClass`, they can simply do an upgrade within Magnum to the same cluster template and it will actually force an update of the `spec.topology.class`, which might then naturally cause a full rollout to occur. \ No newline at end of file diff --git a/flake.nix b/flake.nix index f0092c79..52ec2c34 100644 --- a/flake.nix +++ b/flake.nix @@ -15,9 +15,11 @@ { devShell = pkgs.mkShell { - # LD_LIBRARY_PATH = "${pkgs.stdenv.cc.cc.lib}/lib"; + LD_LIBRARY_PATH = "${pkgs.stdenv.cc.cc.lib}/lib"; buildInputs = with pkgs; [ + bashInteractive + glibcLocales poetry ]; }; diff --git a/magnum_cluster_api/driver.py b/magnum_cluster_api/driver.py index 8417ec33..531bc6fd 100644 --- a/magnum_cluster_api/driver.py +++ b/magnum_cluster_api/driver.py @@ -13,6 +13,8 @@ # under the License. +from __future__ import annotations + import keystoneauth1 from magnum import objects as magnum_objects from magnum.conductor import scale_manager @@ -239,9 +241,6 @@ def resize_cluster( ): utils.validate_cluster(context, cluster) - if nodegroup is None: - nodegroup = cluster.default_ng_worker - if nodes_to_remove: machines = objects.Machine.objects(self.k8s_api).filter( namespace="magnum-system", @@ -260,12 +259,7 @@ def resize_cluster( ] = "yes" machine.update() - nodegroup.node_count = node_count - nodegroup.save() - - resources.apply_cluster_from_magnum_cluster( - context, self.k8s_api, cluster, skip_auto_scaling_release=True - ) + self._update_nodegroup(context, cluster, nodegroup) @cluster_lock_wrapper def upgrade_cluster( @@ -341,10 +335,29 @@ def create_nodegroup( nodegroup: magnum_objects.NodeGroup, ): utils.validate_nodegroup(nodegroup, context) - resources.apply_cluster_from_magnum_cluster( - context, self.k8s_api, cluster, skip_auto_scaling_release=True + + cluster_resource: objects.Cluster = objects.Cluster.objects( + self.k8s_api, namespace="magnum-system" + ).get(name=cluster.stack_id) + + cluster_resource.obj["spec"]["topology"]["workers"][ + "machineDeployments" + ].append(resources.mutate_machine_deployment(context, cluster, nodegroup)) + + current_generation = resources.Cluster( + context, self.k8s_api, cluster + ).get_observed_generation() + cluster_resource.update() + self.wait_capi_cluster_reconciliation_start( + context, cluster, current_generation ) + nodegroup.status = fields.ClusterStatus.CREATE_IN_PROGRESS + nodegroup.save() + + cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS + cluster.save() + def update_nodegroup_status( self, context, @@ -402,18 +415,53 @@ def update_nodegroup( cluster: magnum_objects.Cluster, nodegroup: magnum_objects.NodeGroup, ): - # TODO + self._update_nodegroup(context, cluster, nodegroup) - # NOTE(okozachenko1203): First we save the nodegroup status because update_cluster_status() - # could be finished before update_nodegroup(). - nodegroup.save() + def _update_nodegroup( + self, + context, + cluster: magnum_objects.Cluster, + nodegroup: magnum_objects.NodeGroup, + ): utils.validate_nodegroup(nodegroup, context) - resources.apply_cluster_from_magnum_cluster( - context, self.k8s_api, cluster, skip_auto_scaling_release=True + + cluster_resource: objects.Cluster = objects.Cluster.objects( + self.k8s_api, namespace="magnum-system" + ).get(name=cluster.stack_id) + + machine_deployment_index = None + for i, machine_deployment in enumerate( + cluster_resource.obj["spec"]["topology"]["workers"]["machineDeployments"] + ): + if machine_deployment["name"] == nodegroup.name: + machine_deployment_index = i + break + + if machine_deployment_index is not None: + machine_deployment = cluster_resource.obj["spec"]["topology"]["workers"][ + "machineDeployments" + ][machine_deployment_index] + + cluster_resource.obj["spec"]["topology"]["workers"]["machineDeployments"][ + machine_deployment_index + ] = resources.mutate_machine_deployment( + context, + cluster, + nodegroup, + machine_deployment, + ) + + current_generation = resources.Cluster( + context, self.k8s_api, cluster + ).get_observed_generation() + cluster_resource.update() + self.wait_capi_cluster_reconciliation_start( + context, cluster, current_generation ) - # NOTE(okozachenko1203): We set the cluster status as UPDATE_IN_PROGRESS again at the end because - # update_cluster_status() could be finished and cluster status has been set as - # UPDATE_COMPLETE before nodegroup_conductor.Handler.nodegroup_update finished. + + nodegroup.status = fields.ClusterStatus.UPDATE_IN_PROGRESS + nodegroup.save() + cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS cluster.save() @@ -424,15 +472,36 @@ def delete_nodegroup( cluster: magnum_objects.Cluster, nodegroup: magnum_objects.NodeGroup, ): + cluster_resource: objects.Cluster = objects.Cluster.objects( + self.k8s_api, namespace="magnum-system" + ).get(name=cluster.stack_id) + + machine_deployment_index = None + for i, machine_deployment in enumerate( + cluster_resource.obj["spec"]["topology"]["workers"]["machineDeployments"] + ): + if machine_deployment["name"] == nodegroup.name: + machine_deployment_index = i + break + + if machine_deployment_index is not None: + del cluster_resource.obj["spec"]["topology"]["workers"][ + "machineDeployments" + ][machine_deployment_index] + + current_generation = resources.Cluster( + context, self.k8s_api, cluster + ).get_observed_generation() + cluster_resource.update() + self.wait_capi_cluster_reconciliation_start( + context, cluster, current_generation + ) + nodegroup.status = fields.ClusterStatus.DELETE_IN_PROGRESS nodegroup.save() - resources.apply_cluster_from_magnum_cluster( - context, - self.k8s_api, - cluster, - skip_auto_scaling_release=True, - ) + cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS + cluster.save() @cluster_lock_wrapper def get_monitor(self, context, cluster: magnum_objects.Cluster): diff --git a/magnum_cluster_api/resources.py b/magnum_cluster_api/resources.py index ec9416ba..4f19a246 100644 --- a/magnum_cluster_api/resources.py +++ b/magnum_cluster_api/resources.py @@ -1993,36 +1993,68 @@ def create_cluster_class( ClusterClass(api).apply() -def generate_machine_deployments_for_cluster( - context: context.RequestContext, cluster: objects.Cluster -) -> list: +def mutate_machine_deployment( + context: context.RequestContext, + cluster: objects.Cluster, + node_group: magnum_objects.NodeGroup, + machine_deployment: dict = {}, +): + """ + This function will either makes updates to machine deployment fields which + will not cause a rolling update or will return a new machine deployment + if none is provided. + """ + auto_scaling_enabled = utils.get_auto_scaling_enabled(cluster) - machine_deployments = [] - for ng in cluster.nodegroups: - if ng.role == "master" or ng.status.startswith("DELETE"): - continue + machine_deployment.setdefault( + "metadata", + { + "annotations": {}, + "labels": {}, + }, + ) + + # Node labels + machine_deployment["metadata"]["labels"] = { + f"node-role.kubernetes.io/{node_group.role}": "", + "node.cluster.x-k8s.io/nodegroup": node_group.name, + } + + # Replicas (or min/max if auto-scaling is enabled) + if auto_scaling_enabled: + machine_deployment["replicas"] = None + machine_deployment["metadata"]["annotations"] = { + AUTOSCALE_ANNOTATION_MIN: str( + utils.get_node_group_min_node_count(node_group) + ), + AUTOSCALE_ANNOTATION_MAX: str( + utils.get_node_group_max_node_count(context, node_group) + ), + } + else: + machine_deployment["replicas"] = node_group.node_count + machine_deployment["metadata"]["annotations"] = {} - machine_deployment = { + # Fixes + machine_deployment["nodeVolumeDetachTimeout"] = ( + CLUSTER_CLASS_NODE_VOLUME_DETACH_TIMEOUT + ) + + # Anything beyond this point will *NOT* be changed in the machine deployment + # for update operations (i.e. if the machine deployment already exists). + if machine_deployment.get("name") == node_group.name: + return machine_deployment + + # At this point, this is all code that will be added for brand new machine + # deployments. We can bring any of this code into the above block if we + # want to change it for existing machine deployments. + machine_deployment.update( + { "class": "default-worker", - "nodeVolumeDetachTimeout": CLUSTER_CLASS_NODE_VOLUME_DETACH_TIMEOUT, - "name": ng.name, - "metadata": { - "annotations": ( - { - AUTOSCALE_ANNOTATION_MIN: f"{utils.get_node_group_min_node_count(ng)}", # noqa: E501 - AUTOSCALE_ANNOTATION_MAX: f"{utils.get_node_group_max_node_count(context, ng)}", # noqa: E501 - } - if auto_scaling_enabled - else {} - ), - "labels": { - f"node-role.kubernetes.io/{ng.role}": "", - "node.cluster.x-k8s.io/nodegroup": ng.name, - }, - }, + "name": node_group.name, "failureDomain": utils.get_node_group_label( - context, ng, "availability_zone", "" + context, node_group, "availability_zone", "" ), "machineHealthCheck": { "enable": utils.get_cluster_label_as_bool( @@ -2036,13 +2068,13 @@ def generate_machine_deployments_for_cluster( "value": { "size": utils.get_node_group_label_as_int( context, - ng, + node_group, "boot_volume_size", CONF.cinder.default_boot_volume_size, ), "type": utils.get_node_group_label( context, - ng, + node_group, "boot_volume_type", cinder.get_default_boot_volume_type(context), ), @@ -2050,31 +2082,38 @@ def generate_machine_deployments_for_cluster( }, { "name": "flavor", - "value": ng.flavor_id, + "value": node_group.flavor_id, }, { "name": "imageRepository", "value": utils.get_node_group_label( context, - ng, + node_group, "container_infra_prefix", "", ), }, { "name": "imageUUID", - "value": utils.get_image_uuid(ng.image_id, context), + "value": utils.get_image_uuid(node_group.image_id, context), }, ], }, } + ) + + return machine_deployment - # NOTE(mnaser): In order for auto-scaler and cluster class reconcilers - # to not fight each other, we only set replicas if the - # auto-scaler is disabled. - if not auto_scaling_enabled: - machine_deployment["replicas"] = ng.node_count +def generate_machine_deployments_for_cluster( + context: context.RequestContext, cluster: objects.Cluster +) -> list: + machine_deployments = [] + for ng in cluster.nodegroups: + if ng.role == "master" or ng.status.startswith("DELETE"): + continue + + machine_deployment = mutate_machine_deployment(context, cluster, ng) machine_deployments.append(machine_deployment) return machine_deployments diff --git a/magnum_cluster_api/tests/unit/conftest.py b/magnum_cluster_api/tests/unit/conftest.py index 55c3f52c..f6a93f05 100644 --- a/magnum_cluster_api/tests/unit/conftest.py +++ b/magnum_cluster_api/tests/unit/conftest.py @@ -15,6 +15,8 @@ import pytest from magnum.common import context as magnum_context +from magnum_cluster_api import driver + @pytest.fixture def context(): @@ -26,3 +28,26 @@ def context(): user_id="fake_user", is_admin=False, ) + + +@pytest.fixture(scope="session") +def mock_pykube(session_mocker): + session_mocker.patch("pykube.KubeConfig") + session_mocker.patch("pykube.HTTPClient") + + +@pytest.fixture(scope="session") +def mock_cluster_lock(session_mocker): + session_mocker.patch("kubernetes.config.load_config") + session_mocker.patch("magnum_cluster_api.sync.ClusterLock.acquire") + session_mocker.patch("magnum_cluster_api.sync.ClusterLock.release") + + +@pytest.fixture(scope="session") +def mock_validate_nodegroup(session_mocker): + session_mocker.patch("magnum_cluster_api.utils.validate_nodegroup") + + +@pytest.fixture() +def ubuntu_driver(mock_cluster_lock, mock_pykube): + yield driver.UbuntuDriver() diff --git a/magnum_cluster_api/tests/unit/test_driver.py b/magnum_cluster_api/tests/unit/test_driver.py new file mode 100644 index 00000000..6376931e --- /dev/null +++ b/magnum_cluster_api/tests/unit/test_driver.py @@ -0,0 +1,198 @@ +# Copyright (c) 2024 VEXXHOST, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import pytest +from magnum.objects import fields +from magnum.tests.unit.objects import utils + + +class TestDriver: + @pytest.fixture(autouse=True) + def setup(self, mocker, context, mock_pykube, mock_validate_nodegroup): + self.cluster = utils.get_test_cluster(context, labels={}) + self.cluster.save = mocker.MagicMock() + + self.node_group = utils.get_test_nodegroup(context) + self.node_group.save = mocker.MagicMock() + + self.mock_cluster_resource = mocker.MagicMock() + self.mock_cluster_objects = mocker.patch( + "magnum_cluster_api.objects.Cluster.objects" + ) + self.mock_cluster_objects.return_value.get.return_value = ( + self.mock_cluster_resource + ) + + self.mock_wait_capi_cluster_reconciliation_start = mocker.patch( + "magnum_cluster_api.driver.BaseDriver.wait_capi_cluster_reconciliation_start" + ) + + def _assert_node_group_crud_calls(self): + self.mock_cluster_objects.return_value.get.assert_called_once_with( + name=self.cluster.stack_id + ) + self.mock_cluster_resource.update.assert_called_once() + self.mock_wait_capi_cluster_reconciliation_start.assert_called_once() + + def _assert_node_group_status(self, expected_status): + assert self.node_group.status == expected_status + self.node_group.save.assert_called_once() + + assert self.cluster.status == fields.ClusterStatus.UPDATE_IN_PROGRESS + self.cluster.save.assert_called_once() + + def test_create_nodegroup(self, mocker, context, ubuntu_driver): + self.mock_cluster_resource.obj = { + "spec": { + "topology": { + "workers": { + "machineDeployments": [], + } + } + }, + } + + mock_mutate_machine_deployment = mocker.patch( + "magnum_cluster_api.resources.mutate_machine_deployment" + ) + + ubuntu_driver.create_nodegroup(context, self.cluster, self.node_group) + + mock_mutate_machine_deployment.assert_called_once_with( + context, self.cluster, self.node_group + ) + + assert self.mock_cluster_resource.obj["spec"]["topology"]["workers"][ + "machineDeployments" + ] == [mock_mutate_machine_deployment.return_value] + + self._assert_node_group_crud_calls() + self._assert_node_group_status(fields.ClusterStatus.CREATE_IN_PROGRESS) + + def test_update_nodegroup(self, mocker, context, ubuntu_driver): + self.mock_cluster_resource.obj = { + "spec": { + "topology": { + "workers": { + "machineDeployments": [ + { + "name": self.node_group.name, + } + ], + } + } + }, + } + + mock_mutate_machine_deployment = mocker.patch( + "magnum_cluster_api.resources.mutate_machine_deployment" + ) + + ubuntu_driver.update_nodegroup(context, self.cluster, self.node_group) + + mock_mutate_machine_deployment.assert_called_once_with( + context, + self.cluster, + self.node_group, + { + "name": self.node_group.name, + }, + ) + + assert self.mock_cluster_resource.obj["spec"]["topology"]["workers"][ + "machineDeployments" + ] == [mock_mutate_machine_deployment.return_value] + + self._assert_node_group_crud_calls() + self._assert_node_group_status(fields.ClusterStatus.UPDATE_IN_PROGRESS) + + def test_update_nodegroup_with_multiple_node_groups( + self, mocker, context, ubuntu_driver + ): + mock_machine_deployment = mocker.MagicMock() + + self.mock_cluster_resource.obj = { + "spec": { + "topology": { + "workers": { + "machineDeployments": [ + mock_machine_deployment, + { + "name": self.node_group.name, + }, + ], + } + } + }, + } + + mock_mutate_machine_deployment = mocker.patch( + "magnum_cluster_api.resources.mutate_machine_deployment" + ) + + ubuntu_driver.update_nodegroup(context, self.cluster, self.node_group) + + assert not mock_machine_deployment.called + + mock_mutate_machine_deployment.assert_called_once_with( + context, + self.cluster, + self.node_group, + { + "name": self.node_group.name, + }, + ) + + assert self.mock_cluster_resource.obj["spec"]["topology"]["workers"][ + "machineDeployments" + ] == [ + mock_machine_deployment, + mock_mutate_machine_deployment.return_value, + ] + + self._assert_node_group_crud_calls() + self._assert_node_group_status(fields.ClusterStatus.UPDATE_IN_PROGRESS) + + def test_delete_nodegroup_with_multiple_node_groups( + self, mocker, context, ubuntu_driver + ): + mock_machine_deployment = mocker.MagicMock() + + self.mock_cluster_resource.obj = { + "spec": { + "topology": { + "workers": { + "machineDeployments": [ + mock_machine_deployment, + { + "name": self.node_group.name, + }, + ], + } + } + }, + } + + ubuntu_driver.delete_nodegroup(context, self.cluster, self.node_group) + + assert not mock_machine_deployment.called + + assert self.mock_cluster_resource.obj["spec"]["topology"]["workers"][ + "machineDeployments" + ] == [ + mock_machine_deployment, + ] + + self._assert_node_group_crud_calls() + self._assert_node_group_status(fields.ClusterStatus.DELETE_IN_PROGRESS) diff --git a/magnum_cluster_api/tests/unit/test_resources.py b/magnum_cluster_api/tests/unit/test_resources.py index fec53f6e..b1e149b8 100644 --- a/magnum_cluster_api/tests/unit/test_resources.py +++ b/magnum_cluster_api/tests/unit/test_resources.py @@ -12,7 +12,9 @@ # License for the specific language governing permissions and limitations # under the License. +import pytest from magnum.objects import fields +from magnum.tests.unit.objects import utils from magnum_cluster_api import resources @@ -66,3 +68,80 @@ def test_generate_machine_deployments_for_cluster_with_deleting_node_group( ) assert len(mds) == 2 + + +class TestExistingMutateMachineDeployment: + @pytest.fixture(autouse=True) + def setup(self, mocker, context): + self.cluster = utils.get_test_cluster(context) + self.node_group = utils.get_test_nodegroup(context) + + self.mock_get_node_group_label = mocker.patch( + "magnum_cluster_api.utils.get_node_group_label" + ) + self.mock_auto_scaling = mocker.patch( + "magnum_cluster_api.utils.get_auto_scaling_enabled" + ) + self.mock_get_node_group_min_node_count = mocker.patch( + "magnum_cluster_api.utils.get_node_group_min_node_count" + ) + self.mock_get_node_group_max_node_count = mocker.patch( + "magnum_cluster_api.utils.get_node_group_max_node_count" + ) + + def _assert_no_mutations(self, md): + assert md["name"] == self.node_group.name + assert "class" not in md + self.mock_get_node_group_label.assert_not_called() + + def _assert_common_machine_deployment_values(self, md): + assert md["name"] == self.node_group.name + assert md["metadata"]["labels"] == { + f"node-role.kubernetes.io/{self.node_group.role}": "", + "node.cluster.x-k8s.io/nodegroup": self.node_group.name, + } + assert ( + md["nodeVolumeDetachTimeout"] + == resources.CLUSTER_CLASS_NODE_VOLUME_DETACH_TIMEOUT + ) + + def test_mutate_machine_deployment_without_autoscaling(self, context): + self.mock_auto_scaling.return_value = False + + md = resources.mutate_machine_deployment( + context, + self.cluster, + self.node_group, + { + "name": self.node_group.name, + }, + ) + + self._assert_common_machine_deployment_values(md) + self._assert_no_mutations(md) + + assert md["replicas"] == self.node_group.node_count + assert md["metadata"]["annotations"] == {} + + def test_mutate_machine_deployment_with_autoscaling(self, context): + self.mock_auto_scaling.return_value = True + + md = resources.mutate_machine_deployment( + context, + self.cluster, + self.node_group, + { + "name": self.node_group.name, + }, + ) + + self._assert_common_machine_deployment_values(md) + self._assert_no_mutations(md) + + assert md["replicas"] is None + assert md["metadata"]["annotations"][resources.AUTOSCALE_ANNOTATION_MIN] == str( + self.mock_get_node_group_min_node_count.return_value + ) + assert md["metadata"]["annotations"][resources.AUTOSCALE_ANNOTATION_MAX] == str( + self.mock_get_node_group_max_node_count.return_value + )