From 97ceef6938603e315c4e1c8d2bb697aabc3dd7f8 Mon Sep 17 00:00:00 2001 From: RealAnna <89971034+RealAnna@users.noreply.github.com> Date: Mon, 10 Oct 2022 16:16:04 +0200 Subject: [PATCH] feat(scheduler): Background check for pod status in permit plugin (#124) --- scheduler/cmd/scheduler/main.go | 2 +- scheduler/manifests/install/base/rbac.yaml | 3 ++ scheduler/pkg/klcpermit/permit.go | 43 +++++++++++++++------- 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/scheduler/cmd/scheduler/main.go b/scheduler/cmd/scheduler/main.go index 4d40664d2a..8a9e4ddde7 100644 --- a/scheduler/cmd/scheduler/main.go +++ b/scheduler/cmd/scheduler/main.go @@ -40,7 +40,7 @@ func main() { rand.Seed(time.Now().UnixNano()) command := app.NewSchedulerCommand( - app.WithPlugin(klcpermit.Name, klcpermit.New), + app.WithPlugin(klcpermit.PluginName, klcpermit.New), ) code := cli.Run(command) diff --git a/scheduler/manifests/install/base/rbac.yaml b/scheduler/manifests/install/base/rbac.yaml index 7f5a098004..c72eae11c1 100644 --- a/scheduler/manifests/install/base/rbac.yaml +++ b/scheduler/manifests/install/base/rbac.yaml @@ -69,6 +69,9 @@ rules: - apiGroups: ["lifecycle.keptn.sh"] resources: ["keptnworkloadinstances"] verbs: ["get", "list", "watch"] + - apiGroups: [ "" ] + resources: [ "configmaps" ] + verbs: [ "list", "watch" ] --- kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 diff --git a/scheduler/pkg/klcpermit/permit.go b/scheduler/pkg/klcpermit/permit.go index 9cf7cc21b5..a6585479da 100644 --- a/scheduler/pkg/klcpermit/permit.go +++ b/scheduler/pkg/klcpermit/permit.go @@ -12,12 +12,12 @@ import ( "k8s.io/kubernetes/pkg/scheduler/framework" ) -// Name is the name of the plugin used in the plugin registry and configurations. +// PluginName is the name of the plugin used in the plugin registry and configurations. const ( - Name = "KLCPermit" + PluginName = "KLCPermit" ) -// Permit is a plugin that implements a wait for pre-deployment checks +// Permit is a plugin that waits for pre-deployment checks to be successfully finished type Permit struct { handler framework.Handle workloadManager *WorkloadManager @@ -25,29 +25,46 @@ type Permit struct { var _ framework.PermitPlugin = &Permit{} -// Name returns name of the plugin. +// PluginName returns name of the plugin. func (pl *Permit) Name() string { - return Name + return PluginName } func (pl *Permit) Permit(ctx context.Context, state *framework.CycleState, p *v1.Pod, nodeName string) (*framework.Status, time.Duration) { - klog.InfoS("[Keptn Permit Plugin] waiting for pre-deployment checks on", p.GetObjectMeta().GetName()) + klog.Infof("[Keptn Permit Plugin] waiting for pre-deployment checks on %s", p.GetObjectMeta().GetName()) + // check the permit immediately, to fail early in case the pod cannot be queued switch pl.workloadManager.Permit(ctx, p) { - case Wait: - klog.Infof("[Keptn Permit Plugin] waiting for pre-deployment checks on", p.GetObjectMeta().GetName()) - return framework.NewStatus(framework.Wait), 30 * time.Second case Failure: - klog.Infof("[Keptn Permit Plugin] failed pre-deployment checks on", p.GetObjectMeta().GetName()) + klog.Infof("[Keptn Permit Plugin] failed pre-deployment checks on %s", p.GetObjectMeta().GetName()) return framework.NewStatus(framework.Error), 0 * time.Second case Success: - klog.Infof("[Keptn Permit Plugin] passed pre-deployment checks on", p.GetObjectMeta().GetName()) + klog.Infof("[Keptn Permit Plugin] passed pre-deployment checks on %s", p.GetObjectMeta().GetName()) return framework.NewStatus(framework.Success), 0 * time.Second default: - klog.Infof("[Keptn Permit Plugin] unknown status of pre-deployment checks for", p.GetObjectMeta().GetName()) - return framework.NewStatus(framework.Wait), 30 * time.Second + klog.Infof("[Keptn Permit Plugin] waiting for pre-deployment checks on %s", p.GetObjectMeta().GetName()) + go pl.monitorPod(ctx, p) + return framework.NewStatus(framework.Wait), 5 * time.Minute + } + +} + +func (pl *Permit) monitorPod(ctx context.Context, p *v1.Pod) { + waitingPodHandler := pl.handler.GetWaitingPod(p.UID) + + for { + switch pl.workloadManager.Permit(ctx, p) { + case Failure: + waitingPodHandler.Reject(PluginName, "Pre Deployment Check failed") + return + case Success: + waitingPodHandler.Allow(PluginName) + return + default: + time.Sleep(10 * time.Second) + } } }