diff --git a/internal/controller/controllers/agent_controller.go b/internal/controller/controllers/agent_controller.go index 339e3f2b4b9..e7b2109b1c5 100644 --- a/internal/controller/controllers/agent_controller.go +++ b/internal/controller/controllers/agent_controller.go @@ -38,6 +38,7 @@ import ( "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" @@ -80,6 +81,9 @@ func (r *AgentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl // Retrieve clusterDeployment if err = r.Get(ctx, kubeKey, clusterDeployment); err != nil { + if k8serrors.IsNotFound(err) { + return r.deleteAgent(ctx, req.NamespacedName) + } errMsg := fmt.Sprintf("failed to get clusterDeployment with name %s in namespace %s", agent.Spec.ClusterDeploymentName.Name, agent.Spec.ClusterDeploymentName.Namespace) // Update that we failed to retrieve the clusterDeployment @@ -89,6 +93,9 @@ func (r *AgentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl // Retrieve cluster for ClusterDeploymentName from the database cluster, err := r.Installer.GetClusterByKubeKey(kubeKey) if err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return r.deleteAgent(ctx, req.NamespacedName) + } // Update that we failed to retrieve the cluster from the database return r.updateStatus(ctx, agent, nil, err, !errors.Is(err, gorm.ErrRecordNotFound)) } @@ -96,7 +103,8 @@ func (r *AgentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl //Retrieve host from cluster host := getHostFromCluster(cluster, agent.Name) if host == nil { - return r.updateStatus(ctx, agent, nil, errors.New("host not found in cluster"), false) + // Host is not a part of the cluster, which may happen with newly created day2 clusters. + return r.deleteAgent(ctx, req.NamespacedName) } // check for updates from user, compare spec and update if needed @@ -113,6 +121,20 @@ func (r *AgentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl return r.updateStatus(ctx, agent, host, nil, false) } +func (r *AgentReconciler) deleteAgent(ctx context.Context, agent types.NamespacedName) (ctrl.Result, error) { + agentToDelete := &aiv1beta1.Agent{ + ObjectMeta: metav1.ObjectMeta{ + Name: agent.Name, + Namespace: agent.Namespace, + }, + } + if delErr := r.Client.Delete(ctx, agentToDelete); delErr != nil { + r.Log.WithError(delErr).Errorf("Failed to get delete resource %s %s", agent.Name, agent.Namespace) + return ctrl.Result{Requeue: true}, delErr + } + return ctrl.Result{}, nil +} + // updateStatus is updating all the Agent Conditions. // In case that an error has occured when trying to sync the Spec, the error (syncErr) is presented in SpecSyncedCondition. // Internal bool differentiate between backend server error (internal HTTP 5XX) and user input error (HTTP 4XXX) diff --git a/internal/controller/controllers/agent_controller_test.go b/internal/controller/controllers/agent_controller_test.go index 4416bc29a70..2e9652f2b0f 100644 --- a/internal/controller/controllers/agent_controller_test.go +++ b/internal/controller/controllers/agent_controller_test.go @@ -102,7 +102,7 @@ var _ = Describe("agent reconcile", func() { Expect(c.Get(ctx, key, agent)).To(BeNil()) }) - It("cluster deployment not found", func() { + It("Agent not created when cluster deployment not found", func() { host := newAgent("host", testNamespace, v1beta1.AgentSpec{ClusterDeploymentName: &v1beta1.ClusterReference{Name: "clusterDeployment", Namespace: testNamespace}}) Expect(c.Create(ctx, host)).To(BeNil()) result, err := hr.Reconcile(ctx, newHostRequest(host)) @@ -114,15 +114,10 @@ var _ = Describe("agent reconcile", func() { Namespace: testNamespace, Name: "host", } - Expect(c.Get(ctx, key, agent)).To(BeNil()) - expectedState := fmt.Sprintf("%s failed to get clusterDeployment with name clusterDeployment in namespace test-namespace: clusterdeployments.hive.openshift.io \"clusterDeployment\" not found", InputErrorMsg) - Expect(conditionsv1.FindStatusCondition(agent.Status.Conditions, SpecSyncedCondition).Message).To(Equal(expectedState)) - Expect(conditionsv1.FindStatusCondition(agent.Status.Conditions, SpecSyncedCondition).Reason).To(Equal(InputErrorReason)) - Expect(conditionsv1.FindStatusCondition(agent.Status.Conditions, SpecSyncedCondition).Status).To(Equal(corev1.ConditionFalse)) - + Expect(c.Get(ctx, key, agent).Error()).To(Equal("agents.agent-install.openshift.io \"host\" not found")) }) - It("cluster not found in database", func() { + It("Agent not created when cluster not found in database", func() { host := newAgent("host", testNamespace, v1beta1.AgentSpec{ClusterDeploymentName: &v1beta1.ClusterReference{Name: "clusterDeployment", Namespace: testNamespace}}) Expect(c.Create(ctx, host)).To(BeNil()) clusterDeployment := newClusterDeployment("clusterDeployment", testNamespace, getDefaultClusterDeploymentSpec("clusterDeployment-test", "pull-secret")) @@ -137,11 +132,7 @@ var _ = Describe("agent reconcile", func() { Namespace: testNamespace, Name: "host", } - Expect(c.Get(ctx, key, agent)).To(BeNil()) - expectedState := fmt.Sprintf("%s record not found", InputErrorMsg) - Expect(conditionsv1.FindStatusCondition(agent.Status.Conditions, SpecSyncedCondition).Message).To(Equal(expectedState)) - Expect(conditionsv1.FindStatusCondition(agent.Status.Conditions, SpecSyncedCondition).Reason).To(Equal(InputErrorReason)) - Expect(conditionsv1.FindStatusCondition(agent.Status.Conditions, SpecSyncedCondition).Status).To(Equal(corev1.ConditionFalse)) + Expect(c.Get(ctx, key, agent).Error()).To(Equal("agents.agent-install.openshift.io \"host\" not found")) }) It("error getting cluster from database", func() { @@ -168,7 +159,7 @@ var _ = Describe("agent reconcile", func() { Expect(conditionsv1.FindStatusCondition(agent.Status.Conditions, SpecSyncedCondition).Status).To(Equal(corev1.ConditionFalse)) }) - It("host not found in cluster", func() { + It("Agent not created when host not found in cluster", func() { host := newAgent("host", testNamespace, v1beta1.AgentSpec{ClusterDeploymentName: &v1beta1.ClusterReference{Name: "clusterDeployment", Namespace: testNamespace}}) clusterDeployment := newClusterDeployment("clusterDeployment", testNamespace, getDefaultClusterDeploymentSpec("clusterDeployment-test", "pull-secret")) Expect(c.Create(ctx, clusterDeployment)).To(BeNil()) @@ -183,11 +174,7 @@ var _ = Describe("agent reconcile", func() { Namespace: testNamespace, Name: "host", } - Expect(c.Get(ctx, key, agent)).To(BeNil()) - expectedState := fmt.Sprintf("%s host not found in cluster", InputErrorMsg) - Expect(conditionsv1.FindStatusCondition(agent.Status.Conditions, SpecSyncedCondition).Message).To(Equal(expectedState)) - Expect(conditionsv1.FindStatusCondition(agent.Status.Conditions, SpecSyncedCondition).Reason).To(Equal(InputErrorReason)) - Expect(conditionsv1.FindStatusCondition(agent.Status.Conditions, SpecSyncedCondition).Status).To(Equal(corev1.ConditionFalse)) + Expect(c.Get(ctx, key, agent).Error()).To(Equal("agents.agent-install.openshift.io \"host\" not found")) }) It("Agent update", func() { diff --git a/internal/controller/controllers/clusterdeployments_controller.go b/internal/controller/controllers/clusterdeployments_controller.go index c9de116ff31..2d9b191d4c2 100644 --- a/internal/controller/controllers/clusterdeployments_controller.go +++ b/internal/controller/controllers/clusterdeployments_controller.go @@ -138,6 +138,9 @@ func (r *ClusterDeploymentsReconciler) Reconcile(ctx context.Context, req ctrl.R if err != nil { return r.updateStatus(ctx, clusterDeployment, cluster, err) } + if err = r.NotifyClusterDeploymentAgents(req.NamespacedName); err != nil { + return r.updateStatus(ctx, clusterDeployment, cluster, err) + } if !r.isSNO(clusterDeployment) { //Create Day2 cluster return r.createNewDay2Cluster(ctx, req.NamespacedName, clusterDeployment) @@ -581,12 +584,31 @@ func (r *ClusterDeploymentsReconciler) deregisterClusterIfNeeded(ctx context.Con }); err != nil { return buildReply(err) } + if err = r.NotifyClusterDeploymentAgents(key); err != nil { + return buildReply(err) + } r.Log.Infof("Cluster resource deleted, Unregistered cluster: %s", c.ID.String()) return buildReply(nil) } +func (r *ClusterDeploymentsReconciler) NotifyClusterDeploymentAgents(clusterDeployment types.NamespacedName) error { + agents := &v1beta1.AgentList{} + if err := r.List(context.Background(), agents); err != nil { + return err + } + for _, clusterAgent := range agents.Items { + if clusterAgent.Spec.ClusterDeploymentName.Name == clusterDeployment.Name && + clusterAgent.Spec.ClusterDeploymentName.Namespace == clusterDeployment.Namespace { + r.Log.Debugf("clusterDeployment %s namespace %s, notifying agent %s to reconcile for updates.", + clusterDeployment.Name, clusterDeployment.Namespace, clusterAgent.Name) + r.CRDEventsHandler.NotifyAgentUpdates(clusterAgent.Name, clusterAgent.Namespace) + } + } + return nil +} + func (r *ClusterDeploymentsReconciler) SetupWithManager(mgr ctrl.Manager) error { mapSecretToClusterDeployment := func(a client.Object) []reconcile.Request { clusterDeployments := &hivev1.ClusterDeploymentList{} diff --git a/subsystem/kubeapi_test.go b/subsystem/kubeapi_test.go index f67baa90ee8..8af8d974070 100644 --- a/subsystem/kubeapi_test.go +++ b/subsystem/kubeapi_test.go @@ -192,6 +192,20 @@ func getClusterFromDB( return cluster } +func getClusterDeploymentAgents(ctx context.Context, client k8sclient.Client, clusterDeployment types.NamespacedName) *v1beta1.AgentList { + agents := &v1beta1.AgentList{} + clusterAgents := &v1beta1.AgentList{} + err := client.List(ctx, agents) + Expect(err).To(BeNil()) + for _, agent := range agents.Items { + if agent.Spec.ClusterDeploymentName.Name == clusterDeployment.Name && + agent.Spec.ClusterDeploymentName.Namespace == clusterDeployment.Namespace { + clusterAgents.Items = append(clusterAgents.Items, agent) + } + } + return clusterAgents +} + func getClusterDeploymentCRD(ctx context.Context, client k8sclient.Client, key types.NamespacedName) *hivev1.ClusterDeployment { cluster := &hivev1.ClusterDeployment{} err := client.Get(ctx, key, cluster) @@ -405,15 +419,16 @@ var _ = Describe("[kube-api]cluster installation", func() { clearDB() }) - It("deploy clusterDeployment with agents and wait for ready", func() { + It("deploy clusterDeployment with agents and wait for ready, delete and verify agents deleted", func() { secretRef := deployLocalObjectSecretIfNeeded(ctx, kubeClient) spec := getDefaultClusterDeploymentSpec(secretRef) + By("Create cluster") deployClusterDeploymentCRD(ctx, kubeClient, spec) - key := types.NamespacedName{ + clusterKey := types.NamespacedName{ Namespace: Options.Namespace, Name: spec.ClusterName, } - cluster := getClusterFromDB(ctx, kubeClient, db, key, waitForReconcileTimeout) + cluster := getClusterFromDB(ctx, kubeClient, db, clusterKey, waitForReconcileTimeout) configureLocalAgentClient(cluster.ID.String()) hosts := make([]*models.Host, 0) for i := 0; i < 3; i++ { @@ -422,6 +437,7 @@ var _ = Describe("[kube-api]cluster installation", func() { hosts = append(hosts, host) } generateFullMeshConnectivity(ctx, "1.2.3.10", hosts...) + By("Approve Agents") for _, host := range hosts { hostkey := types.NamespacedName{ Namespace: Options.Namespace, @@ -433,7 +449,15 @@ var _ = Describe("[kube-api]cluster installation", func() { return kubeClient.Update(ctx, agent) }, "30s", "10s").Should(BeNil()) } - checkClusterCondition(ctx, key, controllers.ClusterReadyForInstallationCondition, controllers.ClusterAlreadyInstallingReason) + By("Verify ClusterDeployment ReadyForInstallation") + checkClusterCondition(ctx, clusterKey, controllers.ClusterReadyForInstallationCondition, controllers.ClusterAlreadyInstallingReason) + By("Delete ClusterDeployment") + err := kubeClient.Delete(ctx, getClusterDeploymentCRD(ctx, kubeClient, clusterKey)) + Expect(err).To(BeNil()) + By("Verify ClusterDeployment Agents were deleted") + Eventually(func() int { + return len(getClusterDeploymentAgents(ctx, kubeClient, clusterKey).Items) + }, "2m", "2s").Should(Equal(0)) }) It("deploy clusterDeployment with agent and update agent", func() { @@ -1180,6 +1204,10 @@ var _ = Describe("[kube-api]cluster installation", func() { cluster = getClusterFromDB(ctx, kubeClient, db, clusterKey, waitForReconcileTimeout) Expect(*cluster.Kind).Should(Equal(models.ClusterKindAddHostsCluster)) + By("Verify ClusterDeployment Agents were deleted") + Eventually(func() int { + return len(getClusterDeploymentAgents(ctx, kubeClient, clusterKey).Items) + }, "2m", "2s").Should(Equal(0)) By("Verify Cluster Metadata") Eventually(func() bool { return getClusterDeploymentCRD(ctx, kubeClient, clusterKey).Spec.Installed @@ -1266,6 +1294,10 @@ var _ = Describe("[kube-api]cluster installation", func() { cluster = getClusterFromDB(ctx, kubeClient, db, clusterKey, waitForReconcileTimeout) Expect(*cluster.Kind).Should(Equal(models.ClusterKindAddHostsCluster)) + By("Verify ClusterDeployment Agents were deleted") + Eventually(func() int { + return len(getClusterDeploymentAgents(ctx, kubeClient, clusterKey).Items) + }, "2m", "2s").Should(Equal(0)) By("Add Day 2 host and approve agent") configureLocalAgentClient(cluster.ID.String()) host := setupNewHost(ctx, "hostnameday2", *cluster.ID)