diff --git a/internal/cluster/cluster.go b/internal/cluster/cluster.go index 4d3e679899d..dca09c0df40 100644 --- a/internal/cluster/cluster.go +++ b/internal/cluster/cluster.go @@ -233,6 +233,10 @@ func (m *Manager) DeregisterCluster(ctx context.Context, c *common.Cluster) erro } else { m.eventsHandler.AddEvent(ctx, *c.ID, nil, models.EventSeverityInfo, fmt.Sprintf("Deregistered cluster: %s", c.ID.String()), time.Now()) + for _, h := range c.Hosts { + m.eventsHandler.AddEvent(ctx, *c.ID, nil, models.EventSeverityInfo, + fmt.Sprintf("Deregistered cluster: %s host %s", c.ID.String(), h.ID.String()), time.Now()) + } } return err } diff --git a/internal/cluster/cluster_test.go b/internal/cluster/cluster_test.go index 0b5f0cb911a..758320aaa88 100644 --- a/internal/cluster/cluster_test.go +++ b/internal/cluster/cluster_test.go @@ -2753,6 +2753,7 @@ var _ = Describe("Validation metrics and events", func() { mockHost.EXPECT().ReportValidationFailedMetrics(ctx, gomock.Any(), openshiftVersion, emailDomain) mockMetric.EXPECT().ClusterValidationFailed(openshiftVersion, emailDomain, models.ClusterValidationIDSufficientMastersCount) mockEvents.EXPECT().AddEvent(ctx, *c.ID, nil, models.EventSeverityInfo, gomock.Any(), gomock.Any()) + mockEvents.EXPECT().AddEvent(ctx, *c.ID, gomock.Any(), models.EventSeverityInfo, gomock.Any(), gomock.Any()) err := m.DeregisterCluster(ctx, c) Expect(err).ShouldNot(HaveOccurred()) @@ -2764,6 +2765,7 @@ var _ = Describe("Validation metrics and events", func() { mockHost.EXPECT().ReportValidationFailedMetrics(ctx, gomock.Any(), openshiftVersion, emailDomain) mockMetric.EXPECT().ClusterValidationFailed(openshiftVersion, emailDomain, models.ClusterValidationIDSufficientMastersCount) mockEvents.EXPECT().AddEvent(ctx, *c.ID, nil, models.EventSeverityInfo, gomock.Any(), gomock.Any()) + mockEvents.EXPECT().AddEvent(ctx, *c.ID, gomock.Any(), models.EventSeverityInfo, gomock.Any(), gomock.Any()) err := m.DeregisterCluster(ctx, c) Expect(err).ShouldNot(HaveOccurred()) diff --git a/internal/controller/controllers/agent_controller.go b/internal/controller/controllers/agent_controller.go index 339e3f2b4b9..49e6924e7db 100644 --- a/internal/controller/controllers/agent_controller.go +++ b/internal/controller/controllers/agent_controller.go @@ -38,6 +38,7 @@ import ( "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" @@ -80,6 +81,9 @@ func (r *AgentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl // Retrieve clusterDeployment if err = r.Get(ctx, kubeKey, clusterDeployment); err != nil { + if k8serrors.IsNotFound(err) { + return r.deleteAgent(ctx, req.NamespacedName) + } errMsg := fmt.Sprintf("failed to get clusterDeployment with name %s in namespace %s", agent.Spec.ClusterDeploymentName.Name, agent.Spec.ClusterDeploymentName.Namespace) // Update that we failed to retrieve the clusterDeployment @@ -89,6 +93,9 @@ func (r *AgentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl // Retrieve cluster for ClusterDeploymentName from the database cluster, err := r.Installer.GetClusterByKubeKey(kubeKey) if err != nil { + if errors.Is(err, gorm.ErrRecordNotFound) { + return r.deleteAgent(ctx, req.NamespacedName) + } // Update that we failed to retrieve the cluster from the database return r.updateStatus(ctx, agent, nil, err, !errors.Is(err, gorm.ErrRecordNotFound)) } @@ -96,7 +103,8 @@ func (r *AgentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl //Retrieve host from cluster host := getHostFromCluster(cluster, agent.Name) if host == nil { - return r.updateStatus(ctx, agent, nil, errors.New("host not found in cluster"), false) + // Host is not a part of the cluster, which may happen with newly created day2 clusters. + return r.deleteAgent(ctx, req.NamespacedName) } // check for updates from user, compare spec and update if needed @@ -113,6 +121,21 @@ func (r *AgentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl return r.updateStatus(ctx, agent, host, nil, false) } +func (r *AgentReconciler) deleteAgent(ctx context.Context, agent types.NamespacedName) (ctrl.Result, error) { + Requeue := false + agentToDelete := &aiv1beta1.Agent{ + ObjectMeta: metav1.ObjectMeta{ + Name: agent.Name, + Namespace: agent.Namespace, + }, + } + if err := r.Client.Delete(ctx, agentToDelete); err != nil { + r.Log.WithError(err).Errorf("Failed to get delete resource %s %s", agent.Name, agent.Namespace) + Requeue = true + } + return ctrl.Result{Requeue: Requeue}, nil +} + // updateStatus is updating all the Agent Conditions. // In case that an error has occured when trying to sync the Spec, the error (syncErr) is presented in SpecSyncedCondition. // Internal bool differentiate between backend server error (internal HTTP 5XX) and user input error (HTTP 4XXX) diff --git a/internal/controller/controllers/agent_controller_test.go b/internal/controller/controllers/agent_controller_test.go index 4416bc29a70..2e9652f2b0f 100644 --- a/internal/controller/controllers/agent_controller_test.go +++ b/internal/controller/controllers/agent_controller_test.go @@ -102,7 +102,7 @@ var _ = Describe("agent reconcile", func() { Expect(c.Get(ctx, key, agent)).To(BeNil()) }) - It("cluster deployment not found", func() { + It("Agent not created when cluster deployment not found", func() { host := newAgent("host", testNamespace, v1beta1.AgentSpec{ClusterDeploymentName: &v1beta1.ClusterReference{Name: "clusterDeployment", Namespace: testNamespace}}) Expect(c.Create(ctx, host)).To(BeNil()) result, err := hr.Reconcile(ctx, newHostRequest(host)) @@ -114,15 +114,10 @@ var _ = Describe("agent reconcile", func() { Namespace: testNamespace, Name: "host", } - Expect(c.Get(ctx, key, agent)).To(BeNil()) - expectedState := fmt.Sprintf("%s failed to get clusterDeployment with name clusterDeployment in namespace test-namespace: clusterdeployments.hive.openshift.io \"clusterDeployment\" not found", InputErrorMsg) - Expect(conditionsv1.FindStatusCondition(agent.Status.Conditions, SpecSyncedCondition).Message).To(Equal(expectedState)) - Expect(conditionsv1.FindStatusCondition(agent.Status.Conditions, SpecSyncedCondition).Reason).To(Equal(InputErrorReason)) - Expect(conditionsv1.FindStatusCondition(agent.Status.Conditions, SpecSyncedCondition).Status).To(Equal(corev1.ConditionFalse)) - + Expect(c.Get(ctx, key, agent).Error()).To(Equal("agents.agent-install.openshift.io \"host\" not found")) }) - It("cluster not found in database", func() { + It("Agent not created when cluster not found in database", func() { host := newAgent("host", testNamespace, v1beta1.AgentSpec{ClusterDeploymentName: &v1beta1.ClusterReference{Name: "clusterDeployment", Namespace: testNamespace}}) Expect(c.Create(ctx, host)).To(BeNil()) clusterDeployment := newClusterDeployment("clusterDeployment", testNamespace, getDefaultClusterDeploymentSpec("clusterDeployment-test", "pull-secret")) @@ -137,11 +132,7 @@ var _ = Describe("agent reconcile", func() { Namespace: testNamespace, Name: "host", } - Expect(c.Get(ctx, key, agent)).To(BeNil()) - expectedState := fmt.Sprintf("%s record not found", InputErrorMsg) - Expect(conditionsv1.FindStatusCondition(agent.Status.Conditions, SpecSyncedCondition).Message).To(Equal(expectedState)) - Expect(conditionsv1.FindStatusCondition(agent.Status.Conditions, SpecSyncedCondition).Reason).To(Equal(InputErrorReason)) - Expect(conditionsv1.FindStatusCondition(agent.Status.Conditions, SpecSyncedCondition).Status).To(Equal(corev1.ConditionFalse)) + Expect(c.Get(ctx, key, agent).Error()).To(Equal("agents.agent-install.openshift.io \"host\" not found")) }) It("error getting cluster from database", func() { @@ -168,7 +159,7 @@ var _ = Describe("agent reconcile", func() { Expect(conditionsv1.FindStatusCondition(agent.Status.Conditions, SpecSyncedCondition).Status).To(Equal(corev1.ConditionFalse)) }) - It("host not found in cluster", func() { + It("Agent not created when host not found in cluster", func() { host := newAgent("host", testNamespace, v1beta1.AgentSpec{ClusterDeploymentName: &v1beta1.ClusterReference{Name: "clusterDeployment", Namespace: testNamespace}}) clusterDeployment := newClusterDeployment("clusterDeployment", testNamespace, getDefaultClusterDeploymentSpec("clusterDeployment-test", "pull-secret")) Expect(c.Create(ctx, clusterDeployment)).To(BeNil()) @@ -183,11 +174,7 @@ var _ = Describe("agent reconcile", func() { Namespace: testNamespace, Name: "host", } - Expect(c.Get(ctx, key, agent)).To(BeNil()) - expectedState := fmt.Sprintf("%s host not found in cluster", InputErrorMsg) - Expect(conditionsv1.FindStatusCondition(agent.Status.Conditions, SpecSyncedCondition).Message).To(Equal(expectedState)) - Expect(conditionsv1.FindStatusCondition(agent.Status.Conditions, SpecSyncedCondition).Reason).To(Equal(InputErrorReason)) - Expect(conditionsv1.FindStatusCondition(agent.Status.Conditions, SpecSyncedCondition).Status).To(Equal(corev1.ConditionFalse)) + Expect(c.Get(ctx, key, agent).Error()).To(Equal("agents.agent-install.openshift.io \"host\" not found")) }) It("Agent update", func() { diff --git a/subsystem/kubeapi_test.go b/subsystem/kubeapi_test.go index f67baa90ee8..8af8d974070 100644 --- a/subsystem/kubeapi_test.go +++ b/subsystem/kubeapi_test.go @@ -192,6 +192,20 @@ func getClusterFromDB( return cluster } +func getClusterDeploymentAgents(ctx context.Context, client k8sclient.Client, clusterDeployment types.NamespacedName) *v1beta1.AgentList { + agents := &v1beta1.AgentList{} + clusterAgents := &v1beta1.AgentList{} + err := client.List(ctx, agents) + Expect(err).To(BeNil()) + for _, agent := range agents.Items { + if agent.Spec.ClusterDeploymentName.Name == clusterDeployment.Name && + agent.Spec.ClusterDeploymentName.Namespace == clusterDeployment.Namespace { + clusterAgents.Items = append(clusterAgents.Items, agent) + } + } + return clusterAgents +} + func getClusterDeploymentCRD(ctx context.Context, client k8sclient.Client, key types.NamespacedName) *hivev1.ClusterDeployment { cluster := &hivev1.ClusterDeployment{} err := client.Get(ctx, key, cluster) @@ -405,15 +419,16 @@ var _ = Describe("[kube-api]cluster installation", func() { clearDB() }) - It("deploy clusterDeployment with agents and wait for ready", func() { + It("deploy clusterDeployment with agents and wait for ready, delete and verify agents deleted", func() { secretRef := deployLocalObjectSecretIfNeeded(ctx, kubeClient) spec := getDefaultClusterDeploymentSpec(secretRef) + By("Create cluster") deployClusterDeploymentCRD(ctx, kubeClient, spec) - key := types.NamespacedName{ + clusterKey := types.NamespacedName{ Namespace: Options.Namespace, Name: spec.ClusterName, } - cluster := getClusterFromDB(ctx, kubeClient, db, key, waitForReconcileTimeout) + cluster := getClusterFromDB(ctx, kubeClient, db, clusterKey, waitForReconcileTimeout) configureLocalAgentClient(cluster.ID.String()) hosts := make([]*models.Host, 0) for i := 0; i < 3; i++ { @@ -422,6 +437,7 @@ var _ = Describe("[kube-api]cluster installation", func() { hosts = append(hosts, host) } generateFullMeshConnectivity(ctx, "1.2.3.10", hosts...) + By("Approve Agents") for _, host := range hosts { hostkey := types.NamespacedName{ Namespace: Options.Namespace, @@ -433,7 +449,15 @@ var _ = Describe("[kube-api]cluster installation", func() { return kubeClient.Update(ctx, agent) }, "30s", "10s").Should(BeNil()) } - checkClusterCondition(ctx, key, controllers.ClusterReadyForInstallationCondition, controllers.ClusterAlreadyInstallingReason) + By("Verify ClusterDeployment ReadyForInstallation") + checkClusterCondition(ctx, clusterKey, controllers.ClusterReadyForInstallationCondition, controllers.ClusterAlreadyInstallingReason) + By("Delete ClusterDeployment") + err := kubeClient.Delete(ctx, getClusterDeploymentCRD(ctx, kubeClient, clusterKey)) + Expect(err).To(BeNil()) + By("Verify ClusterDeployment Agents were deleted") + Eventually(func() int { + return len(getClusterDeploymentAgents(ctx, kubeClient, clusterKey).Items) + }, "2m", "2s").Should(Equal(0)) }) It("deploy clusterDeployment with agent and update agent", func() { @@ -1180,6 +1204,10 @@ var _ = Describe("[kube-api]cluster installation", func() { cluster = getClusterFromDB(ctx, kubeClient, db, clusterKey, waitForReconcileTimeout) Expect(*cluster.Kind).Should(Equal(models.ClusterKindAddHostsCluster)) + By("Verify ClusterDeployment Agents were deleted") + Eventually(func() int { + return len(getClusterDeploymentAgents(ctx, kubeClient, clusterKey).Items) + }, "2m", "2s").Should(Equal(0)) By("Verify Cluster Metadata") Eventually(func() bool { return getClusterDeploymentCRD(ctx, kubeClient, clusterKey).Spec.Installed @@ -1266,6 +1294,10 @@ var _ = Describe("[kube-api]cluster installation", func() { cluster = getClusterFromDB(ctx, kubeClient, db, clusterKey, waitForReconcileTimeout) Expect(*cluster.Kind).Should(Equal(models.ClusterKindAddHostsCluster)) + By("Verify ClusterDeployment Agents were deleted") + Eventually(func() int { + return len(getClusterDeploymentAgents(ctx, kubeClient, clusterKey).Items) + }, "2m", "2s").Should(Equal(0)) By("Add Day 2 host and approve agent") configureLocalAgentClient(cluster.ID.String()) host := setupNewHost(ctx, "hostnameday2", *cluster.ID)