From c75f06f7b2ecc9d84be23ff1a5fcc4b8f7f93970 Mon Sep 17 00:00:00 2001 From: Stephen Kitt Date: Tue, 24 Oct 2023 17:01:34 +0200 Subject: [PATCH] Allow halting (and restarting) pods on certificate errors General practice in Kubernetes is to restart pods relying on certificates when those certificates are changed. Because Kubernetes isn't aware of cross-cluster certificate changes, this doesn't happen automatically when the broker certificate (or trust chain) changes; this produces certificate errors and ultimately results in a broken setup. To avoid this, provide a setting to set up gateway and Lighthouse agent pods to halt on certificate errors. Signed-off-by: Stephen Kitt --- api/v1alpha1/servicediscovery_types.go | 1 + api/v1alpha1/submariner_types.go | 5 +++++ config/crd/bases/submariner.io_servicediscoveries.yaml | 2 ++ config/crd/bases/submariner.io_submariners.yaml | 3 +++ .../manifests/bases/submariner.clusterserviceversion.yaml | 5 +++++ controllers/servicediscovery/servicediscovery_controller.go | 1 + controllers/submariner/gateway_resources.go | 1 + controllers/submariner/servicediscovery_resources.go | 1 + pkg/embeddedyamls/yamls.go | 6 ++++++ 9 files changed, 25 insertions(+) diff --git a/api/v1alpha1/servicediscovery_types.go b/api/v1alpha1/servicediscovery_types.go index d347e6782..31e2deacc 100644 --- a/api/v1alpha1/servicediscovery_types.go +++ b/api/v1alpha1/servicediscovery_types.go @@ -44,6 +44,7 @@ type ServiceDiscoverySpec struct { Debug bool `json:"debug"` GlobalnetEnabled bool `json:"globalnetEnabled,omitempty"` BrokerK8sInsecure bool `json:"brokerK8sInsecure,omitempty"` + HaltOnCertificateError bool `json:"haltOnCertificateError,omitempty"` CoreDNSCustomConfig *CoreDNSCustomConfig `json:"coreDNSCustomConfig,omitempty"` // +listType=set CustomDomains []string `json:"customDomains,omitempty"` diff --git a/api/v1alpha1/submariner_types.go b/api/v1alpha1/submariner_types.go index c25946945..c8cef5cd2 100644 --- a/api/v1alpha1/submariner_types.go +++ b/api/v1alpha1/submariner_types.go @@ -169,6 +169,11 @@ type SubmarinerSpec struct { BrokerK8sInsecure bool `json:"brokerK8sInsecure,omitempty"` + // Halt on certificate error (so the pod gets restarted). + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Halt (and restart) on certificate error" + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors={"urn:alm:descriptor:com.tectonic.ui:booleanSwitch"} + HaltOnCertificateError bool `json:"haltOnCertificateError"` + // Name of the custom CoreDNS configmap to configure forwarding to Lighthouse. // It should be in / format where is optional and defaults to kube-system. // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="CoreDNS Custom Config" diff --git a/config/crd/bases/submariner.io_servicediscoveries.yaml b/config/crd/bases/submariner.io_servicediscoveries.yaml index 44aa4ec90..dd5e4df53 100644 --- a/config/crd/bases/submariner.io_servicediscoveries.yaml +++ b/config/crd/bases/submariner.io_servicediscoveries.yaml @@ -66,6 +66,8 @@ spec: type: boolean globalnetEnabled: type: boolean + haltOnCertificateError: + type: boolean imageOverrides: additionalProperties: type: string diff --git a/config/crd/bases/submariner.io_submariners.yaml b/config/crd/bases/submariner.io_submariners.yaml index 224126740..2b047c864 100644 --- a/config/crd/bases/submariner.io_submariners.yaml +++ b/config/crd/bases/submariner.io_submariners.yaml @@ -130,6 +130,9 @@ spec: description: The Global CIDR super-net range for allocating GlobalCIDRs to each cluster. type: string + haltOnCertificateError: + description: Halt on certificate error (so the pod gets restarted). + type: boolean imageOverrides: additionalProperties: type: string diff --git a/config/manifests/bases/submariner.clusterserviceversion.yaml b/config/manifests/bases/submariner.clusterserviceversion.yaml index ef1787255..43a05115c 100644 --- a/config/manifests/bases/submariner.clusterserviceversion.yaml +++ b/config/manifests/bases/submariner.clusterserviceversion.yaml @@ -222,6 +222,11 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:text - urn:alm:descriptor:com.tectonic.ui:advanced + - description: Halt on certificate error (so the pod gets restarted). + displayName: Halt (and restart) on certificate error + path: haltOnCertificateError + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:booleanSwitch - description: Override component images. displayName: Image Overrides path: imageOverrides diff --git a/controllers/servicediscovery/servicediscovery_controller.go b/controllers/servicediscovery/servicediscovery_controller.go index a8aee038a..c4d40a8a2 100644 --- a/controllers/servicediscovery/servicediscovery_controller.go +++ b/controllers/servicediscovery/servicediscovery_controller.go @@ -246,6 +246,7 @@ func newLighthouseAgent(cr *submarinerv1alpha1.ServiceDiscovery, name string) *a {Name: "SUBMARINER_CLUSTERID", Value: cr.Spec.ClusterID}, {Name: "SUBMARINER_DEBUG", Value: strconv.FormatBool(cr.Spec.Debug)}, {Name: "SUBMARINER_GLOBALNET_ENABLED", Value: strconv.FormatBool(cr.Spec.GlobalnetEnabled)}, + {Name: "SUBMARINER_HALT_ON_CERT_ERROR", Value: strconv.FormatBool(cr.Spec.HaltOnCertificateError)}, {Name: broker.EnvironmentVariable("ApiServer"), Value: cr.Spec.BrokerK8sApiServer}, {Name: broker.EnvironmentVariable("ApiServerToken"), Value: cr.Spec.BrokerK8sApiServerToken}, {Name: broker.EnvironmentVariable("RemoteNamespace"), Value: cr.Spec.BrokerK8sRemoteNamespace}, diff --git a/controllers/submariner/gateway_resources.go b/controllers/submariner/gateway_resources.go index f04a681a2..fbc77767b 100644 --- a/controllers/submariner/gateway_resources.go +++ b/controllers/submariner/gateway_resources.go @@ -203,6 +203,7 @@ func newGatewayPodTemplate(cr *v1alpha1.Submariner, name string, podSelectorLabe {Name: "SUBMARINER_HEALTHCHECKINTERVAL", Value: strconv.FormatUint(healthCheckInterval, 10)}, {Name: "SUBMARINER_HEALTHCHECKMAXPACKETLOSSCOUNT", Value: strconv.FormatUint(healthCheckMaxPacketLossCount, 10)}, {Name: "SUBMARINER_METRICSPORT", Value: gatewayMetricsServerPort}, + {Name: "SUBMARINER_HALT_ON_CERT_ERROR", Value: strconv.FormatBool(cr.Spec.HaltOnCertificateError)}, {Name: "NODE_NAME", ValueFrom: &corev1.EnvVarSource{ FieldRef: &corev1.ObjectFieldSelector{ FieldPath: "spec.nodeName", diff --git a/controllers/submariner/servicediscovery_resources.go b/controllers/submariner/servicediscovery_resources.go index 04c0fe480..279fbbf1c 100644 --- a/controllers/submariner/servicediscovery_resources.go +++ b/controllers/submariner/servicediscovery_resources.go @@ -47,6 +47,7 @@ func (r *Reconciler) serviceDiscoveryReconciler(ctx context.Context, submariner BrokerK8sApiServerToken: submariner.Spec.BrokerK8sApiServerToken, BrokerK8sApiServer: submariner.Spec.BrokerK8sApiServer, BrokerK8sInsecure: submariner.Spec.BrokerK8sInsecure, + HaltOnCertificateError: submariner.Spec.HaltOnCertificateError, Debug: submariner.Spec.Debug, ClusterID: submariner.Spec.ClusterID, Namespace: submariner.Spec.Namespace, diff --git a/pkg/embeddedyamls/yamls.go b/pkg/embeddedyamls/yamls.go index a395619bf..8af9723f3 100644 --- a/pkg/embeddedyamls/yamls.go +++ b/pkg/embeddedyamls/yamls.go @@ -220,6 +220,9 @@ spec: description: The Global CIDR super-net range for allocating GlobalCIDRs to each cluster. type: string + haltOnCertificateError: + description: Halt on certificate error (so the pod gets restarted). + type: boolean imageOverrides: additionalProperties: type: string @@ -297,6 +300,7 @@ spec: - clusterCIDR - clusterID - debug + - haltOnCertificateError - namespace - natEnabled - serviceCIDR @@ -1113,6 +1117,8 @@ spec: type: boolean globalnetEnabled: type: boolean + haltOnCertificateError: + type: boolean imageOverrides: additionalProperties: type: string