From b070055945a118c88fcbcf2dfc09b1432885b94b Mon Sep 17 00:00:00 2001 From: Adrian Ludwin Date: Wed, 1 Jul 2020 17:45:45 -0400 Subject: [PATCH] Restart the pod if certs change See #765. If a mounted secret changes _after_ a pod is started, it can take a fairly long time (~60s) for the kubelet to notice the change and project the new secret to the pod. Since our internal cert manager writes a secret but then needs to wait for it to become available as a file, this leads to a poor onboarding experience with HNC. This change introduces a flag that exits the process as soon as the internal cert manager changes a secret, which should only occur on initial installation of HNC or every ten years (!). The restart time takes <5s so this is overall a much better experience. Tested: without changing the flags in the default manifest, observed no change when HNC is installed for the first time (i.e. from the first log message to when the HNCConfiguration is first reconciled takes 103s, and there are no restarts). When the flag is added, the startup time decreases to 10s with the one expected restart. Further restarts of HNC (e.g. deleting and recreating the deployment but not the secret) does not result in a restart and completes in 4s. --- incubator/hnc/config/manager/manager.yaml | 3 ++- .../open-policy-agent/gatekeeper/pkg/webhook/certs.go | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/incubator/hnc/config/manager/manager.yaml b/incubator/hnc/config/manager/manager.yaml index 2c51ef4e9..e24e00f18 100644 --- a/incubator/hnc/config/manager/manager.yaml +++ b/incubator/hnc/config/manager/manager.yaml @@ -42,7 +42,8 @@ spec: - "--metrics-addr=127.0.0.1:8080" - "--max-reconciles=10" - "--apiserver-qps-throttle=50" - - "--enable-internal-cert-management=true" + - "--enable-internal-cert-management" + - "--cert-restart-on-secret-refresh" image: controller:latest name: manager resources: diff --git a/incubator/hnc/third_party/open-policy-agent/gatekeeper/pkg/webhook/certs.go b/incubator/hnc/third_party/open-policy-agent/gatekeeper/pkg/webhook/certs.go index 95e40af2d..838da4bf0 100644 --- a/incubator/hnc/third_party/open-policy-agent/gatekeeper/pkg/webhook/certs.go +++ b/incubator/hnc/third_party/open-policy-agent/gatekeeper/pkg/webhook/certs.go @@ -10,6 +10,7 @@ import ( "crypto/x509/pkix" "encoding/base64" "encoding/pem" + "flag" "fmt" "math/big" "os" @@ -47,6 +48,12 @@ var vwhGVK = schema.GroupVersionKind{Group: "admissionregistration.k8s.io", Vers var _ manager.Runnable = &CertRotator{} +var restartOnSecretRefresh = false + +func init() { + flag.BoolVar(&restartOnSecretRefresh, "cert-restart-on-secret-refresh", false, "Kills the process when secrets are refreshed so that the pod can be restarted (secrets take up to 60s to be updated by running pods)") +} + // AddRotator adds the CertRotator and ReconcileVWH to the manager. func AddRotator(mgr manager.Manager, cr *CertRotator, vwhName string) error { cr.client = mgr.GetClient() @@ -129,6 +136,10 @@ func (cr *CertRotator) refreshCertIfNeeded() error { return false, nil } crLog.Info("server certs refreshed") + if restartOnSecretRefresh { + crLog.Info("Secrets have been updated; exiting so pod can be restarted (omit --cert-restart-on-secret-refresh to wait instead of restarting") + os.Exit(0) + } return true, nil } // make sure our reconciler is initialized on startup (either this or the above refreshCerts() will call this)