Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add a webhook to prevent eviction of pods on kosmos NotReady nodes #342

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ TARGETS := clusterlink-controller-manager \
clusterlink-proxy \
clustertree-cluster-manager \
scheduler \
webhook

CTL_TARGETS := kosmosctl

Expand Down
63 changes: 63 additions & 0 deletions cmd/webhook/app/options/options.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package options

import (
"github.com/spf13/pflag"
"k8s.io/client-go/tools/leaderelection/resourcelock"
componentbaseconfig "k8s.io/component-base/config"

"github.com/kosmos.io/kosmos/pkg/utils"
"github.com/kosmos.io/kosmos/pkg/webhook"
)

type Options struct {
LeaderElection componentbaseconfig.LeaderElectionConfiguration
KubernetesOptions KubernetesOptions
WebhookServerOptions WebhookServerOptions
PodValidatorOptions webhook.PodValidatorOptions
}

type KubernetesOptions struct {
KubeConfig string `json:"kubeconfig" yaml:"kubeconfig"`
Master string `json:"master,omitempty" yaml:"master,omitempty"`
QPS float32 `json:"qps,omitempty" yaml:"qps,omitempty"`
Burst int `json:"burst,omitempty" yaml:"burst,omitempty"`
}

type WebhookServerOptions struct {
Host string
Port int
CertDir string
CertName string
KeyName string
}

func NewOptions() *Options {
return &Options{
LeaderElection: componentbaseconfig.LeaderElectionConfiguration{
LeaderElect: true,
ResourceLock: resourcelock.LeasesResourceLock,
ResourceNamespace: utils.DefaultNamespace,
ResourceName: "network-manager",
},
}
}

func (o *Options) AddFlags(flags *pflag.FlagSet) {
if o == nil {
return
}

flags.BoolVar(&o.LeaderElection.LeaderElect, "leader-elect", true, "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.")
flags.StringVar(&o.LeaderElection.ResourceName, "leader-elect-resource-name", "kosmos-webhook", "The name of resource object that is used for locking during leader election.")
flags.StringVar(&o.LeaderElection.ResourceNamespace, "leader-elect-resource-namespace", utils.DefaultNamespace, "The namespace of resource object that is used for locking during leader election.")
flags.Float32Var(&o.KubernetesOptions.QPS, "kube-qps", 40.0, "QPS to use while talking with kube-apiserver.")
flags.IntVar(&o.KubernetesOptions.Burst, "kube-burst", 60, "Burst to use while talking with kube-apiserver.")
flags.StringVar(&o.KubernetesOptions.KubeConfig, "kubeconfig", "", "Path for kubernetes kubeconfig file, if left blank, will use in cluster way.")
flags.StringVar(&o.KubernetesOptions.Master, "master", "", "Used to generate kubeconfig for downloading, if not specified, will use host in kubeconfig.")
flags.StringVar(&o.WebhookServerOptions.Host, "bind-address", "0.0.0.0", "The IP address on which to listen for the --secure-port port.")
flags.IntVar(&o.WebhookServerOptions.Port, "secure-port", 9443, "The secure port on which to serve HTTPS.")
flags.StringVar(&o.WebhookServerOptions.CertDir, "cert-dir", "/etc/certs", "The directory that contains the server key and certificate.")
flags.StringVar(&o.WebhookServerOptions.CertName, "tls-cert-file-name", "tls.crt", "The name of server certificate.")
flags.StringVar(&o.WebhookServerOptions.KeyName, "tls-private-key-file-name", "tls.key", "The name of server key.")
flags.StringArrayVar(&o.PodValidatorOptions.UsernamesNeedToPrevent, "usernames-need-to-prevent", []string{"system:serviceaccount:kube-system:node-controller"}, "Usernames that need to prevent deleting pods on NotReady kosmos nodes.")
}
10 changes: 10 additions & 0 deletions cmd/webhook/app/options/validation.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package options

import "k8s.io/apimachinery/pkg/util/validation/field"

// Validate checks Options and return a slice of found errs.
func (o *Options) Validate() field.ErrorList {
errs := field.ErrorList{}

return errs
}
102 changes: 102 additions & 0 deletions cmd/webhook/app/webhook.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package app

import (
"context"
"fmt"
"net/http"

"github.com/spf13/cobra"
"k8s.io/client-go/tools/clientcmd"
cliflag "k8s.io/component-base/cli/flag"
"k8s.io/klog/v2"
controllerruntime "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/webhook"

"github.com/kosmos.io/kosmos/cmd/webhook/app/options"
"github.com/kosmos.io/kosmos/pkg/scheme"
"github.com/kosmos.io/kosmos/pkg/sharedcli/klogflag"
kosmoswebhook "github.com/kosmos.io/kosmos/pkg/webhook"
)

func NewWebhookCommand(ctx context.Context) *cobra.Command {
opts := options.NewOptions()

cmd := &cobra.Command{
Use: "kosmos-webhook",
Long: `TODO`,
RunE: func(cmd *cobra.Command, args []string) error {
if errs := opts.Validate(); len(errs) != 0 {
return errs.ToAggregate()
}
if err := Run(ctx, opts); err != nil {
return err
}
return nil
},
Args: func(cmd *cobra.Command, args []string) error {
for _, arg := range args {
if len(arg) > 0 {
return fmt.Errorf("%q does not take any arguments, got %q", cmd.CommandPath(), args)
}
}
return nil
},
}

fss := cliflag.NamedFlagSets{}

genericFlagSet := fss.FlagSet("generic")
opts.AddFlags(genericFlagSet)

logsFlagSet := fss.FlagSet("logs")
klogflag.Add(logsFlagSet)

cmd.Flags().AddFlagSet(genericFlagSet)
cmd.Flags().AddFlagSet(logsFlagSet)

return cmd
}

func Run(ctx context.Context, opts *options.Options) error {
config, err := clientcmd.BuildConfigFromFlags(opts.KubernetesOptions.Master, opts.KubernetesOptions.KubeConfig)
if err != nil {
panic(err)
}
config.QPS, config.Burst = opts.KubernetesOptions.QPS, opts.KubernetesOptions.Burst

mgr, err := controllerruntime.NewManager(config, controllerruntime.Options{
Logger: klog.Background(),
Scheme: scheme.NewSchema(),
WebhookServer: &webhook.Server{
Host: opts.WebhookServerOptions.Host,
Port: opts.WebhookServerOptions.Port,
CertDir: opts.WebhookServerOptions.CertDir,
CertName: opts.WebhookServerOptions.CertName,
KeyName: opts.WebhookServerOptions.KeyName,
},
MetricsBindAddress: "0",
HealthProbeBindAddress: "0",
LeaderElection: opts.LeaderElection.LeaderElect,
LeaderElectionID: opts.LeaderElection.ResourceName,
LeaderElectionNamespace: opts.LeaderElection.ResourceNamespace,
})
if err != nil {
klog.Errorf("failed to build webhook server: %v", err)
return err
}

hookServer := mgr.GetWebhookServer()
hookServer.Register("/validate-delete-pod", &webhook.Admission{Handler: &kosmoswebhook.PodValidator{
Client: mgr.GetClient(),
Options: opts.PodValidatorOptions,
}})
hookServer.WebhookMux.Handle("/health", http.StripPrefix("/health", &healthz.Handler{}))

if err := mgr.Start(ctx); err != nil {
klog.Errorf("failed to start webhook manager: %v", err)
return err
}

return nil
}
17 changes: 17 additions & 0 deletions cmd/webhook/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package main

import (
"os"

apiserver "k8s.io/apiserver/pkg/server"
"k8s.io/component-base/cli"

"github.com/kosmos.io/kosmos/cmd/webhook/app"
)

func main() {
ctx := apiserver.SetupSignalContext()
cmd := app.NewWebhookCommand(ctx)
code := cli.Run(cmd)
os.Exit(code)
}
53 changes: 53 additions & 0 deletions deploy/webhook/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: kosmos-webhook
namespace: kosmos-system
spec:
selector:
matchLabels:
app: kosmos-webhook
template:
metadata:
labels:
app: kosmos-webhook
spec:
serviceAccountName: kosmos-webhook
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kosmos.io/node
operator: DoesNotExist
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app
operator: In
values:
- kosmos-webhook
namespaces:
- kosmos-system
topologyKey: kubernetes.io/hostname
containers:
- image: ghcr.io/kosmos-io/webhook:__VERSION__
name: kosmos-webhook
volumeMounts:
- name: tls
mountPath: "/etc/certs"
command:
- webhook
- --v=4
resources:
limits:
memory: 500Mi
cpu: 500m
requests:
cpu: 500m
memory: 500Mi
volumes:
- name: tls
secret:
secretName: kosmos-webhook-tls
27 changes: 27 additions & 0 deletions deploy/webhook/rbac.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: kosmos-webhook
namespace: kosmos-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kosmos-webhook
rules:
- apiGroups: ['*']
resources: ["nodes", "pods"]
verbs: ["*"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kosmos-webhook
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kosmos-webhook
subjects:
- kind: ServiceAccount
name: kosmos-webhook
namespace: kosmos-system
12 changes: 12 additions & 0 deletions deploy/webhook/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: v1
kind: Service
metadata:
name: kosmos-webhook
namespace: kosmos-system
spec:
ports:
- port: 9443
protocol: TCP
targetPort: 9443
selector:
app: kosmos-webhook
9 changes: 9 additions & 0 deletions deploy/webhook/tls-secret.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: v1
data:
tls.crt: __BASE64_SERVER_CRT__
tls.key: __BASE64_SERVER_KEY__
kind: Secret
metadata:
name: kosmos-webhook-tls
namespace: kosmos-system
type: kubernetes.io/tls
26 changes: 26 additions & 0 deletions deploy/webhook/validate-delete-pod-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingWebhookConfiguration
metadata:
name: "validate-delete-pod.kosmos.io"
webhooks:
- name: "validate-delete-pod.kosmos.io"
rules:
- apiGroups: [""]
apiVersions: ["v1"]
operations: ["DELETE"]
resources: ["pods"]
scope: "*"
admissionReviewVersions: ["v1"]
# FailurePolicy defines how unrecognized errors from the admission endpoint are handled - allowed values are
# Ignore or Fail. Defaults to Fail.
failurePolicy: Ignore
sideEffects: None
timeoutSeconds: 3
clientConfig:
service:
namespace: kosmos-system
name: kosmos-webhook
path: /validate-delete-pod
port: 9443
caBundle: |
__BASE64_CA_CRT__
36 changes: 36 additions & 0 deletions hack/gen-webhook-certs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash

WEBHOOK_NAME="kosmos-webhook"
NAMESPACE="kosmos-system"
DAYS="36500"

openssl genrsa -out ca.key 2048

openssl req -new -x509 -days ${DAYS} -key ca.key \
-subj "/C=CN/CN=${WEBHOOK_NAME}"\
-out ca.crt

openssl req -newkey rsa:2048 -nodes -keyout server.key \
-subj "/C=CN/CN=${WEBHOOK_NAME}" \
-out server.csr

openssl x509 -req \
-extfile <(printf "subjectAltName=DNS:${WEBHOOK_NAME}.${NAMESPACE}.svc") \
-days ${DAYS} \
-in server.csr \
-CA ca.crt -CAkey ca.key -CAcreateserial \
-out server.crt

echo
echo ">> Generating kube secrets..."
kubectl create secret tls ${WEBHOOK_NAME}-tls \
--cert=server.crt \
--key=server.key \
--dry-run=client -o yaml \
> tls-secret.yaml

echo
echo ">> MutatingWebhookConfiguration caBundle:"
cat ca.crt | base64 | fold

rm ca.crt ca.key ca.srl server.crt server.csr server.key
1 change: 1 addition & 0 deletions hack/util.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ CLUSTERLINK_TARGET_SOURCE=(
clusterlink-controller-manager=cmd/clusterlink/controller-manager
clustertree-cluster-manager=cmd/clustertree/cluster-manager
kosmosctl=cmd/kosmosctl
webhook=cmd/webhook
)

#https://textkool.com/en/ascii-art-generator?hl=default&vl=default&font=DOS%20Rebel&text=KOSMOS
Expand Down
9 changes: 9 additions & 0 deletions pkg/utils/k8s.go
Original file line number Diff line number Diff line change
Expand Up @@ -342,3 +342,12 @@ func ListResourceClusters(anno map[string]string) []string {
owners := strings.Split(anno[KosmosResourceOwnersAnnotations], ",")
return owners
}

func IsNotReady(node *corev1.Node) bool {
for _, condition := range node.Status.Conditions {
if condition.Type == corev1.NodeReady && condition.Status == corev1.ConditionTrue {
return false
}
}
return true
}
Loading
Loading