diff --git a/pkg/virt-controller/watch/descheduler/BUILD.bazel b/pkg/virt-controller/watch/descheduler/BUILD.bazel index 476f51330a04..ce094ebb979c 100644 --- a/pkg/virt-controller/watch/descheduler/BUILD.bazel +++ b/pkg/virt-controller/watch/descheduler/BUILD.bazel @@ -5,4 +5,15 @@ go_library( srcs = ["descheduler.go"], importpath = "kubevirt.io/kubevirt/pkg/virt-controller/watch/descheduler", visibility = ["//visibility:public"], + deps = [ + "//pkg/apimachinery/patch:go_default_library", + "//pkg/controller:go_default_library", + "//staging/src/kubevirt.io/api/core/v1:go_default_library", + "//staging/src/kubevirt.io/client-go/kubecli:go_default_library", + "//staging/src/kubevirt.io/client-go/log:go_default_library", + "//vendor/k8s.io/api/core/v1:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/types:go_default_library", + "//vendor/k8s.io/client-go/tools/cache:go_default_library", + ], ) diff --git a/pkg/virt-controller/watch/descheduler/descheduler.go b/pkg/virt-controller/watch/descheduler/descheduler.go index 41793266d131..84745e584bb1 100644 --- a/pkg/virt-controller/watch/descheduler/descheduler.go +++ b/pkg/virt-controller/watch/descheduler/descheduler.go @@ -19,7 +19,85 @@ package descheduler +import ( + "context" + "fmt" + + "k8s.io/client-go/tools/cache" + virtv1 "kubevirt.io/api/core/v1" + + "kubevirt.io/kubevirt/pkg/controller" + + k8sv1 "k8s.io/api/core/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + + "kubevirt.io/client-go/kubecli" + "kubevirt.io/client-go/log" + + "kubevirt.io/kubevirt/pkg/apimachinery/patch" +) + // EvictOnlyAnnotation indicates pods whose eviction is not expected to be completed right away. // Instead, an eviction request is expected to be intercepted by an external component which will initiate the // eviction process for the pod. const EvictOnlyAnnotation = "descheduler.alpha.kubernetes.io/request-evict-only" + +// EvictionInProgressAnnotation indicates pods whose eviction was initiated by an external component. +const EvictionInProgressAnnotation = "descheduler.alpha.kubernetes.io/eviction-in-progress" + +func AddDeschedulerAnnotation(virtClient kubecli.KubevirtClient, sourcePod *k8sv1.Pod) error { + if _, exists := sourcePod.GetAnnotations()[EvictionInProgressAnnotation]; exists { + return nil + } + + patchSet := patch.New( + patch.WithAdd(fmt.Sprintf("/metadata/annotations/%s", patch.EscapeJSONPointer(EvictionInProgressAnnotation)), ""), + ) + patchBytes, err := patchSet.GeneratePayload() + if err != nil { + return err + } + + _, err = virtClient.CoreV1().Pods(sourcePod.Namespace).Patch(context.Background(), sourcePod.Name, types.JSONPatchType, patchBytes, v1.PatchOptions{}) + if err != nil { + log.Log.Object(sourcePod).Errorf("failed to add %s pod annotation: %v", EvictionInProgressAnnotation, err) + return err + } + + return nil +} + +func RemoveDeschedulerAnnotationFromSourcePod(virtClient kubecli.KubevirtClient, migration *virtv1.VirtualMachineInstanceMigration, podIndexer cache.Indexer) error { + if migration.Status.MigrationState == nil || migration.Status.MigrationState.SourcePod == "" { + return nil + } + + podKey := controller.NamespacedKey(migration.Namespace, migration.Status.MigrationState.SourcePod) + obj, exists, err := podIndexer.GetByKey(podKey) + if !exists { + log.Log.Warningf("source pod %s does not exist", migration.Status.MigrationState.SourcePod) + return nil + } + if err != nil { + log.Log.Reason(err).Errorf("Failed to fetch source pod %s for namespace from cache.", migration.Status.MigrationState.SourcePod) + return err + } + + sourcePod := obj.(*k8sv1.Pod) + if _, exists := sourcePod.GetAnnotations()[EvictionInProgressAnnotation]; exists { + patchSet := patch.New(patch.WithRemove(fmt.Sprintf("/metadata/annotations/%s", patch.EscapeJSONPointer(EvictionInProgressAnnotation)))) + patchBytes, err := patchSet.GeneratePayload() + if err != nil { + return err + } + + _, err = virtClient.CoreV1().Pods(sourcePod.Namespace).Patch(context.Background(), sourcePod.Name, types.JSONPatchType, patchBytes, v1.PatchOptions{}) + if err != nil { + log.Log.Object(sourcePod).Errorf("failed to remove %s pod annotation : %v", EvictionInProgressAnnotation, err) + return err + } + } + + return nil +} diff --git a/pkg/virt-controller/watch/migration/BUILD.bazel b/pkg/virt-controller/watch/migration/BUILD.bazel index 7c4ed2f279bc..310f25b2e700 100644 --- a/pkg/virt-controller/watch/migration/BUILD.bazel +++ b/pkg/virt-controller/watch/migration/BUILD.bazel @@ -18,6 +18,7 @@ go_library( "//pkg/util/status:go_default_library", "//pkg/virt-config:go_default_library", "//pkg/virt-controller/services:go_default_library", + "//pkg/virt-controller/watch/descheduler:go_default_library", "//staging/src/kubevirt.io/api/core/v1:go_default_library", "//staging/src/kubevirt.io/api/migrations/v1alpha1:go_default_library", "//staging/src/kubevirt.io/client-go/kubecli:go_default_library", diff --git a/pkg/virt-controller/watch/migration/migration.go b/pkg/virt-controller/watch/migration/migration.go index d48f26e9d04c..5ae1bf12af65 100644 --- a/pkg/virt-controller/watch/migration/migration.go +++ b/pkg/virt-controller/watch/migration/migration.go @@ -62,6 +62,7 @@ import ( "kubevirt.io/kubevirt/pkg/controller" storagetypes "kubevirt.io/kubevirt/pkg/storage/types" "kubevirt.io/kubevirt/pkg/virt-controller/services" + "kubevirt.io/kubevirt/pkg/virt-controller/watch/descheduler" ) const ( @@ -499,6 +500,12 @@ func (c *MigrationController) updateStatus(migration *virtv1.VirtualMachineInsta } } + if migrationCopy.Status.Phase == virtv1.MigrationFailed { + if err := descheduler.RemoveDeschedulerAnnotationFromSourcePod(c.clientset, migrationCopy, c.podIndexer); err != nil { + return err + } + } + controller.SetVMIMigrationPhaseTransitionTimestamp(migration, migrationCopy) controller.SetSourcePod(migrationCopy, vmi, c.podIndexer) @@ -1279,6 +1286,12 @@ func (c *MigrationController) sync(key string, migration *virtv1.VirtualMachineI return nil } + if _, exists := migration.GetAnnotations()[virtv1.EvacuationMigrationAnnotation]; exists { + if err = descheduler.AddDeschedulerAnnotation(c.clientset, sourcePod); err != nil { + return err + } + } + // patch VMI annotations and set RuntimeUser in preparation for target pod creation patches := c.setupVMIRuntimeUser(vmi) if !patches.IsEmpty() { @@ -1334,8 +1347,17 @@ func (c *MigrationController) sync(key string, migration *virtv1.VirtualMachineI !vmi.Status.MigrationState.Failed && !vmi.Status.MigrationState.Completed { - return c.handleMarkMigrationFailedOnVMI(migration, vmi) + err = c.handleMarkMigrationFailedOnVMI(migration, vmi) + if err != nil { + return err + } } + + if migration.Status.Phase != virtv1.MigrationFailed { + return nil + } + + return descheduler.RemoveDeschedulerAnnotationFromSourcePod(c.clientset, migration, c.podIndexer) case virtv1.MigrationRunning: if migration.DeletionTimestamp != nil && vmi.Status.MigrationState != nil { err = c.markMigrationAbortInVmiStatus(migration, vmi)