From 6656d1a2fd9cdec1fe495c28dd3fbac9617341f6 Mon Sep 17 00:00:00 2001 From: eldondevat <31245140+eldondevat@users.noreply.github.com> Date: Thu, 4 Jun 2020 11:37:49 -0400 Subject: [PATCH] KO-384 Refactor out webhook and improve validation (#103) * KO-384 Refactor out webhook * Add documentation about the configuration of the validating webhook * Include System Certs if available We should include the certificates in the standard system certificates location if available. If we have customers who want to generically deploy a specific internal set of certificates, being able to replace them in the standard location may be preferable. * Fix spelling Co-authored-by: Jim Dickinson Co-authored-by: Jim Dickinson --- docs/developer/validating_webhook.md | 36 ++++++ operator/cmd/manager/crypto.go | 8 +- operator/cmd/manager/main.go | 112 ------------------ operator/cmd/manager/webhook.go | 165 +++++++++++++++++++++++++++ 4 files changed, 205 insertions(+), 116 deletions(-) create mode 100644 docs/developer/validating_webhook.md create mode 100644 operator/cmd/manager/webhook.go diff --git a/docs/developer/validating_webhook.md b/docs/developer/validating_webhook.md new file mode 100644 index 000000000..bbec69bf7 --- /dev/null +++ b/docs/developer/validating_webhook.md @@ -0,0 +1,36 @@ +## The validating webhook. + +The operator offers, and installs when possible, a validating webhook for +related CRDs. The webhook is intended to provide checks of the validity of an +update or create request, where there might be CRD-specific guardrails that are +not readily checked by implicit CRD validity. Such checks include preventing +renaming certain elements of the deployment, such as the the cassandra cluster +or the racks, which are core to the identity of a cassandra cluster. + +Validating webhooks have specific requirements in kubernetes: +* They must be served over TLS +* The TLS service name where they are reached must match the subject of the certificate +* The CA signing the certificate must be either installed in the kube apiserver filesystem, or +explicitly configured in the kubernetes validatingwebhookconfiguration object. + +The operator takes a progressive-enhancement approach to enabling this webhook, +which is described as follows: + +The operator will look for, and if present, use, the certificates in the +default location that the controller-manager expects the certificates. If the +files there don't exist, or the certificate does not appear to be valid, then +the operator will generate a self-signed CA, and attempt to update the various +kubernetes references to that certificate, specifically: +* The CA defined in the webhook +* The cert and key stored in the relevant secret in the cass-operator namespace. + +If the cert and key are regenerated, then they will also be written to an +alternative location on disk, so that they can be consumed by the +controller-manager. Because the operator root filesystem is recommended to be +deployed read-only, and secret mount points are typically read-only as well, an +alternative location to host the certificate and key is chosen in a +memory-backed temporary kubernetes volume. + +To avoid a prohibitive user experience, the webhook is configured to fail open. +This means that errors encountered in the above process will generate log +messages, but will not wholly prevent the operation of the cass-operator. diff --git a/operator/cmd/manager/crypto.go b/operator/cmd/manager/crypto.go index 62cb6e0ce..439c2117b 100644 --- a/operator/cmd/manager/crypto.go +++ b/operator/cmd/manager/crypto.go @@ -1,13 +1,13 @@ package main import ( - "fmt" "bytes" "crypto/rand" "crypto/rsa" "crypto/x509" "crypto/x509/pkix" "encoding/pem" + "fmt" "math/big" "time" ) @@ -29,11 +29,11 @@ func getNewCertAndKey(namespace string) (keypem, certpem string, err error) { NotBefore: notBefore, NotAfter: notAfter, - IsCA: true, + IsCA: true, KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign, ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, BasicConstraintsValid: true, - DNSNames: []string{fmt.Sprintf("cassandradatacenter-webhook-service.%s.svc",namespace) }, + DNSNames: []string{fmt.Sprintf("cassandradatacenter-webhook-service.%s.svc", namespace)}, } var derBytes []byte if derBytes, err = x509.CreateCertificate(rand.Reader, &template, &template, &priv.PublicKey, priv); err == nil { @@ -50,5 +50,5 @@ func getNewCertAndKey(namespace string) (keypem, certpem string, err error) { } } } - return "","", err + return "", "", err } diff --git a/operator/cmd/manager/main.go b/operator/cmd/manager/main.go index bda0014c7..7efecdda0 100644 --- a/operator/cmd/manager/main.go +++ b/operator/cmd/manager/main.go @@ -5,13 +5,9 @@ package main import ( "context" - "crypto/x509" - "encoding/base64" - "encoding/pem" "errors" "flag" "fmt" - "io/ioutil" "os" "path/filepath" "runtime" @@ -33,12 +29,9 @@ import ( sdkVersion "github.com/operator-framework/operator-sdk/version" "github.com/spf13/pflag" v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/intstr" controllerRuntime "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/cache" - crclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/config" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/manager" @@ -276,108 +269,3 @@ func serveCRMetrics(cfg *rest.Config, operatorNs string) error { } return nil } - -func ensureWebhookCertificate(cfg *rest.Config, namespace string) (err error) { - var contents []byte - if contents, err = ioutil.ReadFile(serverCertFile); err == nil && len(contents) > 0 { - certpool := x509.NewCertPool() - var block *pem.Block - if block, _ = pem.Decode(contents); err == nil && block != nil { - var cert *x509.Certificate - if cert, err = x509.ParseCertificate(block.Bytes); err == nil { - certpool.AddCert(cert) - log.Info("Attempting to validate operator CA") - verify_opts := x509.VerifyOptions{ - DNSName: fmt.Sprintf("cassandradatacenter-webhook-service.%s.svc", namespace), - Roots: certpool, - } - if _, err = cert.Verify(verify_opts); err == nil { - log.Info("Found valid certificate for webhook") - return nil - } - } - } - } - return updateSecretAndWebhook(cfg, namespace) -} - -func updateSecretAndWebhook(cfg *rest.Config, namespace string) (err error) { - var key, cert string - var client crclient.Client - if key, cert, err = getNewCertAndKey(namespace); err == nil { - if client, err = crclient.New(cfg, crclient.Options{}); err == nil { - secret := &v1.Secret{} - err = client.Get(context.Background(), crclient.ObjectKey{ - Namespace: namespace, - Name: "cass-operator-webhook-config", - }, secret) - if err == nil { - secret.StringData = make(map[string]string) - secret.StringData["tls.key"] = key - secret.StringData["tls.crt"] = cert - if err = client.Update(context.Background(), secret); err == nil { - log.Info("TLS secret for webhook updated") - if err = ioutil.WriteFile(altServerCertFile, []byte(cert), 0600); err == nil { - if err = ioutil.WriteFile(altServerKeyFile, []byte(key), 0600); err == nil { - certDir = altCertDir - log.Info("TLS secret updated in pod mount") - return updateWebhook(client, cert, namespace) - } - } - } - - } - } - } - log.Error(err, "Failed to update certificates") - return err -} - -func updateWebhook(client crclient.Client, cert, namespace string) (err error) { - u := &unstructured.Unstructured{} - u.SetGroupVersionKind(schema.GroupVersionKind{ - Group: "admissionregistration.k8s.io", - Kind: "ValidatingWebhookConfiguration", - Version: "v1beta1", - }) - err = client.Get(context.Background(), crclient.ObjectKey{ - Name: "cassandradatacenter-webhook-registration", - }, u) - if err == nil { - var webhook_slice []interface{} - var webhook map[string]interface{} - var ok, present bool - webhook_slice, present, err = unstructured.NestedSlice(u.Object, "webhooks") - webhook, ok = webhook_slice[0].(map[string]interface{}) - if !ok || !present || err != nil { - log.Info(fmt.Sprintf("Error loading webhook for modification: %+v %+v %+v", ok, present, err)) - return err - } - if err = unstructured.SetNestedField(webhook, namespace, "clientConfig", "service", "namespace"); err == nil { - if err = unstructured.SetNestedField(webhook, base64.StdEncoding.EncodeToString([]byte(cert)), "clientConfig", "caBundle"); err == nil { - webhook_slice[0] = webhook - if err = unstructured.SetNestedSlice(u.Object, webhook_slice, "webhooks"); err == nil { - err = client.Update(context.Background(), u) - } - } - } - } - return err -} - -func ensureWebhookConfigVolume(cfg *rest.Config, namespace string) (err error) { - var pod *v1.Pod - var client crclient.Client - if client, err = crclient.New(cfg, crclient.Options{}); err == nil { - if pod, err = k8sutil.GetPod(context.Background(), client, namespace); err == nil { - for _, volume := range pod.Spec.Volumes { - if "cass-operator-certs-volume" == volume.Name { - return nil - } - } - log.Error(fmt.Errorf("Secrets volume not found, unable to start webhook"), "") - os.Exit(1) - } - } - return err -} diff --git a/operator/cmd/manager/webhook.go b/operator/cmd/manager/webhook.go new file mode 100644 index 000000000..de9aee502 --- /dev/null +++ b/operator/cmd/manager/webhook.go @@ -0,0 +1,165 @@ +package main + +import ( + "context" + "crypto/x509" + "encoding/base64" + "encoding/pem" + "errors" + "fmt" + "io/ioutil" + "os" + + // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) + _ "k8s.io/client-go/plugin/pkg/client/auth" + "k8s.io/client-go/rest" + + "github.com/operator-framework/operator-sdk/pkg/k8sutil" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + crclient "sigs.k8s.io/controller-runtime/pkg/client" +) + +func ensureWebhookCertificate(cfg *rest.Config, namespace string) (err error) { + var contents []byte + var webhook map[string]interface{} + var bundled string + var client crclient.Client + var certpool *x509.CertPool + if contents, err = ioutil.ReadFile(serverCertFile); err == nil && len(contents) > 0 { + if client, err = crclient.New(cfg, crclient.Options{}); err == nil { + if err, _, webhook, _ = fetchWebhookForNamespace(client, namespace); err == nil { + if bundled, _, err = unstructured.NestedString(webhook, "clientConfig", "caBundle"); err == nil { + if base64.StdEncoding.EncodeToString([]byte(contents)) == bundled { + certpool, err = x509.SystemCertPool() + if err != nil { + certpool = x509.NewCertPool() + } + var block *pem.Block + if block, _ = pem.Decode(contents); err == nil && block != nil { + var cert *x509.Certificate + if cert, err = x509.ParseCertificate(block.Bytes); err == nil { + certpool.AddCert(cert) + log.Info("Attempting to validate operator CA") + verify_opts := x509.VerifyOptions{ + DNSName: fmt.Sprintf("cassandradatacenter-webhook-service.%s.svc", namespace), + Roots: certpool, + } + if _, err = cert.Verify(verify_opts); err == nil { + log.Info("Found valid certificate for webhook") + return nil + } + } + } + } + } + } + } + } + return updateSecretAndWebhook(cfg, namespace) +} + +func updateSecretAndWebhook(cfg *rest.Config, namespace string) (err error) { + var key, cert string + var client crclient.Client + if key, cert, err = getNewCertAndKey(namespace); err == nil { + if client, err = crclient.New(cfg, crclient.Options{}); err == nil { + secret := &v1.Secret{} + err = client.Get(context.Background(), crclient.ObjectKey{ + Namespace: namespace, + Name: "cass-operator-webhook-config", + }, secret) + if err == nil { + secret.StringData = make(map[string]string) + secret.StringData["tls.key"] = key + secret.StringData["tls.crt"] = cert + if err = client.Update(context.Background(), secret); err == nil { + log.Info("TLS secret for webhook updated") + if err = ioutil.WriteFile(altServerCertFile, []byte(cert), 0600); err == nil { + if err = ioutil.WriteFile(altServerKeyFile, []byte(key), 0600); err == nil { + certDir = altCertDir + log.Info("TLS secret updated in pod mount") + return updateWebhook(client, cert, namespace) + } + } + } + + } + } + } + log.Error(err, "Failed to update certificates") + return err +} + +func fetchWebhookForNamespace(client crclient.Client, namespace string) (err error, webhook_config *unstructured.Unstructured, webhook map[string]interface{}, unstructured_index int) { + + webhook_config = &unstructured.Unstructured{} + webhook_config.SetGroupVersionKind(schema.GroupVersionKind{ + Group: "admissionregistration.k8s.io", + Kind: "ValidatingWebhookConfiguration", + Version: "v1beta1", + }) + err = client.Get(context.Background(), crclient.ObjectKey{ + Name: "cassandradatacenter-webhook-registration", + }, webhook_config) + if err != nil { + return err, webhook_config, webhook, 0 + } + var ok, present bool + var found_namespace string + var webhook_list []interface{} + if webhook_list, present, err = unstructured.NestedSlice(webhook_config.Object, "webhooks"); err == nil { + if present { + for webhook_index, webhook_untypped := range webhook_list { + webhook, ok = webhook_untypped.(map[string]interface{}) + if ok { + if found_namespace, _, err = unstructured.NestedString(webhook, "clientConfig", "service", "namespace"); found_namespace == namespace { + return nil, webhook_config, webhook, webhook_index + } + } + } + } + return errors.New("Webhook not found for namespace"), webhook_config, webhook, 0 + } + return err, webhook_config, webhook, 0 +} + +func updateWebhook(client crclient.Client, cert, namespace string) (err error) { + var webhook_slice []interface{} + var webhook map[string]interface{} + var present bool + var webhook_index int + var webhook_config *unstructured.Unstructured + err, webhook_config, webhook, webhook_index = fetchWebhookForNamespace(client, namespace) + if err == nil { + if err = unstructured.SetNestedField(webhook, namespace, "clientConfig", "service", "namespace"); err == nil { + if err = unstructured.SetNestedField(webhook, base64.StdEncoding.EncodeToString([]byte(cert)), "clientConfig", "caBundle"); err == nil { + if webhook_slice, present, err = unstructured.NestedSlice(webhook_config.Object, "webhooks"); present && err == nil { + webhook_slice[webhook_index] = webhook + if err = unstructured.SetNestedSlice(webhook_config.Object, webhook_slice, "webhooks"); err == nil { + err = client.Update(context.Background(), webhook_config) + } + } + } + } + } + return err +} + +func ensureWebhookConfigVolume(cfg *rest.Config, namespace string) (err error) { + var pod *v1.Pod + var client crclient.Client + if client, err = crclient.New(cfg, crclient.Options{}); err == nil { + if pod, err = k8sutil.GetPod(context.Background(), client, namespace); err == nil { + for _, volume := range pod.Spec.Volumes { + if "cass-operator-certs-volume" == volume.Name { + return nil + } + } + log.Error(fmt.Errorf("Secrets volume not found, unable to start webhook"), "") + os.Exit(1) + } + } + return err +}