Skip to content

Commit

Permalink
Persist TLS certificate and key of antrea-controller (antrea-io#5955)
Browse files Browse the repository at this point in the history
In clusters where upgrade is performed with rolling update of Nodes and
images of new versions are only available on new Nodes, the deployment
strategy of antrea-controller is set to RollingUpdate to prevent
antrea-controller Pod from being deleted immediately when the deployment
is updated, leading to a period in which no antrea-controller is
running. However, it also causes two instances of antrea-controller to
run simultaneously in a short time, making it possible that the old
instance overrides the CA bundle stored in antrea-ca ConfigMap,
APIServices, and Webhooks, while the new instance won't update them
again.

The commit makes two changes to fix the problem:

1. CACertController will periodically sync the CA cert to improve the
   robustness.

2. Self-signed TLS certificate and key of antrea-controller will be
   stored in a Secret and will be reused after restarting controller.
   This makes running multiple antrea-controller instances
   simultaneously possible and makes restart of antrea-controller
   smoother as antrea-agents don't need to retrieve a new CA bundle most
   of the time.

Besides, the change is helpful for implementing high-availability of
antrea-controller in the future.

Signed-off-by: Quan Tian <[email protected]>
  • Loading branch information
tnqn committed Mar 7, 2024
1 parent 25bf7cb commit 0884ea8
Show file tree
Hide file tree
Showing 18 changed files with 708 additions and 158 deletions.
2 changes: 2 additions & 0 deletions build/charts/antrea/templates/controller/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,13 @@ rules:
resources:
- secrets
resourceNames:
- antrea-controller-tls
- antrea-ipsec-ca
verbs:
- get
- update
- watch
- list
- apiGroups:
- ""
resources:
Expand Down
2 changes: 2 additions & 0 deletions build/yamls/antrea-aks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6407,11 +6407,13 @@ rules:
resources:
- secrets
resourceNames:
- antrea-controller-tls
- antrea-ipsec-ca
verbs:
- get
- update
- watch
- list
- apiGroups:
- ""
resources:
Expand Down
2 changes: 2 additions & 0 deletions build/yamls/antrea-eks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6407,11 +6407,13 @@ rules:
resources:
- secrets
resourceNames:
- antrea-controller-tls
- antrea-ipsec-ca
verbs:
- get
- update
- watch
- list
- apiGroups:
- ""
resources:
Expand Down
2 changes: 2 additions & 0 deletions build/yamls/antrea-gke.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6407,11 +6407,13 @@ rules:
resources:
- secrets
resourceNames:
- antrea-controller-tls
- antrea-ipsec-ca
verbs:
- get
- update
- watch
- list
- apiGroups:
- ""
resources:
Expand Down
2 changes: 2 additions & 0 deletions build/yamls/antrea-ipsec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6420,11 +6420,13 @@ rules:
resources:
- secrets
resourceNames:
- antrea-controller-tls
- antrea-ipsec-ca
verbs:
- get
- update
- watch
- list
- apiGroups:
- ""
resources:
Expand Down
2 changes: 2 additions & 0 deletions build/yamls/antrea.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6407,11 +6407,13 @@ rules:
resources:
- secrets
resourceNames:
- antrea-controller-tls
- antrea-ipsec-ca
verbs:
- get
- update
- watch
- list
- apiGroups:
- ""
resources:
Expand Down
2 changes: 1 addition & 1 deletion multicluster/cmd/multicluster-controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ func getCaConfig(isLeader bool, controllerNs string) *certificate.CAConfig {
MutationWebhookSelector: getWebhookLabel(isLeader, controllerNs),
ValidatingWebhookSelector: getWebhookLabel(isLeader, controllerNs),
CertReadyTimeout: 2 * time.Minute,
MaxRotateDuration: time.Hour * (24 * 365),
MinValidDuration: time.Hour * 24 * 90, // Rotate the certificate 90 days in advance.
}
}

Expand Down
4 changes: 2 additions & 2 deletions multicluster/cmd/multicluster-controller/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func TestGetCAConfig(t *testing.T) {
MutationWebhookSelector: getWebhookLabel(false, ""),
ValidatingWebhookSelector: getWebhookLabel(false, ""),
CertReadyTimeout: 2 * time.Minute,
MaxRotateDuration: time.Hour * (24 * 365),
MinValidDuration: time.Hour * (24 * 90),
},
},
{
Expand All @@ -86,7 +86,7 @@ func TestGetCAConfig(t *testing.T) {
MutationWebhookSelector: getWebhookLabel(true, "testNS"),
ValidatingWebhookSelector: getWebhookLabel(true, "testNS"),
CertReadyTimeout: 2 * time.Minute,
MaxRotateDuration: time.Hour * (24 * 365),
MinValidDuration: time.Hour * (24 * 90),
},
},
}
Expand Down
41 changes: 18 additions & 23 deletions pkg/apiserver/apiserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,12 @@ var (
// #nosec G101: false positive triggered by variable name which includes "token"
TokenPath = "/var/run/antrea/apiserver/loopback-client-token"

// antreaServedLabel includes the labels used to select resources served by antrea-controller
antreaServedLabel = map[string]string{
"app": "antrea",
"served-by": "antrea-controller",
// antreaServedLabelSelector selects resources served by antrea-controller.
antreaServedLabelSelector = &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "antrea",
"served-by": "antrea-controller",
},
}
)

Expand Down Expand Up @@ -338,24 +340,17 @@ func installHandlers(c *ExtraConfig, s *genericapiserver.GenericAPIServer) {

func DefaultCAConfig() *certificate.CAConfig {
return &certificate.CAConfig{
CAConfigMapName: certificate.AntreaCAConfigMapName,
APIServiceSelector: &metav1.LabelSelector{
MatchLabels: antreaServedLabel,
},
ValidatingWebhookSelector: &metav1.LabelSelector{
MatchLabels: antreaServedLabel,
},
MutationWebhookSelector: &metav1.LabelSelector{
MatchLabels: antreaServedLabel,
},
CRDConversionWebhookSelector: &metav1.LabelSelector{
MatchLabels: antreaServedLabel,
},
CertDir: "/var/run/antrea/antrea-controller-tls",
SelfSignedCertDir: "/var/run/antrea/antrea-controller-self-signed",
CertReadyTimeout: 2 * time.Minute,
MaxRotateDuration: time.Hour * (24 * 365),
ServiceName: certificate.AntreaServiceName,
PairName: "antrea-controller",
CAConfigMapName: certificate.AntreaCAConfigMapName,
TLSSecretName: certificate.AntreaControllerTLSSecretName,
APIServiceSelector: antreaServedLabelSelector,
ValidatingWebhookSelector: antreaServedLabelSelector,
MutationWebhookSelector: antreaServedLabelSelector,
CRDConversionWebhookSelector: antreaServedLabelSelector,
CertDir: "/var/run/antrea/antrea-controller-tls",
SelfSignedCertDir: "/var/run/antrea/antrea-controller-self-signed",
CertReadyTimeout: 2 * time.Minute,
MinValidDuration: time.Hour * 24 * 90, // Rotate the certificate 90 days in advance.
ServiceName: certificate.AntreaServiceName,
PairName: "antrea-controller",
}
}
13 changes: 8 additions & 5 deletions pkg/apiserver/certificate/cacert_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,6 @@ func (c *CACertController) syncCACert() error {

// syncMutatingWebhooks updates the CABundle of the MutatingWebhookConfiguration backed by antrea-controller.
func (c *CACertController) syncMutatingWebhooks(caCert []byte) error {
klog.Info("Syncing CA certificate with MutatingWebhookConfigurations")
if c.caConfig.MutationWebhookSelector == nil {
return nil
}
Expand All @@ -150,7 +149,6 @@ func (c *CACertController) syncMutatingWebhooks(caCert []byte) error {
}

func (c *CACertController) syncConversionWebhooks(caCert []byte) error {
klog.Info("Syncing CA certificate with CRDs that have conversion webhooks")
if c.caConfig.CRDConversionWebhookSelector == nil {
return nil
}
Expand All @@ -171,6 +169,7 @@ func (c *CACertController) syncConversionWebhooks(caCert []byte) error {
crdDef.Spec.Conversion.Webhook.ClientConfig.CABundle = caCert
}
if updated {
klog.InfoS("Syncing CA certificate with CRD that have conversion webhooks", "name", crdDef.Name)
if _, err := c.apiExtensionClient.ApiextensionsV1().CustomResourceDefinitions().Update(context.TODO(), &crdDef, metav1.UpdateOptions{}); err != nil {
return fmt.Errorf("error updating Antrea CA cert of CustomResourceDefinition %s: %v", name, err)
}
Expand All @@ -190,6 +189,7 @@ func (c *CACertController) patchWebhookWithCACert(webhookCfg *v1.MutatingWebhook
webhookCfg.Webhooks[idx] = webhook
}
if updated {
klog.InfoS("Syncing CA certificate with MutatingWebhookConfiguration", "name", webhookCfg.Name)
if _, err := c.client.AdmissionregistrationV1().MutatingWebhookConfigurations().Update(context.TODO(), webhookCfg, metav1.UpdateOptions{}); err != nil {
return err
}
Expand All @@ -199,7 +199,6 @@ func (c *CACertController) patchWebhookWithCACert(webhookCfg *v1.MutatingWebhook

// syncValidatingWebhooks updates the CABundle of the ValidatingWebhookConfiguration backed by antrea-controller.
func (c *CACertController) syncValidatingWebhooks(caCert []byte) error {
klog.Info("Syncing CA certificate with ValidatingWebhookConfigurations")
if c.caConfig.ValidatingWebhookSelector == nil {
return nil
}
Expand All @@ -223,6 +222,7 @@ func (c *CACertController) syncValidatingWebhooks(caCert []byte) error {
vWebhook.Webhooks[idx] = webhook
}
if updated {
klog.InfoS("Syncing CA certificate with ValidatingWebhookConfiguration", "name", vWebhook.Name)
if _, err := c.client.AdmissionregistrationV1().ValidatingWebhookConfigurations().Update(context.TODO(), &vWebhook, metav1.UpdateOptions{}); err != nil {
return fmt.Errorf("error updating Antrea CA cert of ValidatingWebhookConfiguration %s: %v", name, err)
}
Expand All @@ -233,7 +233,6 @@ func (c *CACertController) syncValidatingWebhooks(caCert []byte) error {

// syncAPIServices updates the CABundle of the APIServices backed by antrea-controller.
func (c *CACertController) syncAPIServices(caCert []byte) error {
klog.Info("Syncing CA certificate with APIServices")
if c.caConfig.APIServiceSelector == nil {
return nil
}
Expand All @@ -250,6 +249,7 @@ func (c *CACertController) syncAPIServices(caCert []byte) error {
if bytes.Equal(apiService.Spec.CABundle, caCert) {
continue
}
klog.InfoS("Syncing CA certificate with APIService", "name", apiService.Name)
apiService.Spec.CABundle = caCert
if _, err := c.aggregatorClient.ApiregistrationV1().APIServices().Update(context.TODO(), &apiService, metav1.UpdateOptions{}); err != nil {
return fmt.Errorf("error updating Antrea CA cert of APIService %s: %v", name, err)
Expand All @@ -260,7 +260,6 @@ func (c *CACertController) syncAPIServices(caCert []byte) error {

// syncConfigMap updates the ConfigMap that holds the CA bundle, which will be read by API clients, e.g. antrea-agent.
func (c *CACertController) syncConfigMap(caCert []byte) error {
klog.Info("Syncing CA certificate with ConfigMap")
// Use the Antrea Pod Namespace for the CA cert ConfigMap.
caConfigMapNamespace := GetCAConfigMapNamespace()
caConfigMap, err := c.client.CoreV1().ConfigMaps(caConfigMapNamespace).Get(context.TODO(), c.caConfig.CAConfigMapName, metav1.GetOptions{})
Expand All @@ -283,6 +282,7 @@ func (c *CACertController) syncConfigMap(caCert []byte) error {
if caConfigMap.Data != nil && caConfigMap.Data[CAConfigMapKey] == string(caCert) {
return nil
}
klog.InfoS("Syncing CA certificate with ConfigMap", "name", klog.KObj(caConfigMap))
caConfigMap.Data = map[string]string{
CAConfigMapKey: string(caCert),
}
Expand Down Expand Up @@ -329,6 +329,9 @@ func (c *CACertController) Run(ctx context.Context, workers int) {

// doesn't matter what workers say, only start one.
go wait.Until(c.runWorker, time.Second, ctx.Done())
// Periodically sync the CA cert to improve the robustness.
// In some cases the CA cert may be overridden by a stale instance or other deployment tools.
go wait.Until(c.Enqueue, 2*time.Minute, ctx.Done())

<-ctx.Done()
}
Expand Down
12 changes: 6 additions & 6 deletions pkg/apiserver/certificate/cacert_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ func TestSyncConfigMap(t *testing.T) {
}
aggregatorClientset := fakeaggregatorclientset.NewSimpleClientset()
apiExtensionClient := fakeapiextensionclientset.NewSimpleClientset()
caContentProvider, _ := generateSelfSignedCertificate(secureServing, caConfig)
caContentProvider, _ := newSelfSignedCertProvider(clientset, secureServing, caConfig)
tt.prepareReactor(clientset)

controller := newCACertController(caContentProvider, clientset, aggregatorClientset, apiExtensionClient, caConfig)
Expand Down Expand Up @@ -194,7 +194,7 @@ func TestSyncAPIServices(t *testing.T) {
clientset := fakeclientset.NewSimpleClientset()
aggregatorClientset := fakeaggregatorclientset.NewSimpleClientset()
apiExtensionClient := fakeapiextensionclientset.NewSimpleClientset()
caContentProvider, _ := generateSelfSignedCertificate(secureServing, caConfig)
caContentProvider, _ := newSelfSignedCertProvider(clientset, secureServing, caConfig)

if tt.existingAPIService != nil {
aggregatorClientset = fakeaggregatorclientset.NewSimpleClientset(tt.existingAPIService)
Expand Down Expand Up @@ -283,7 +283,7 @@ func TestSyncValidatingWebhooks(t *testing.T) {
clientset := fakeclientset.NewSimpleClientset()
aggregatorClientset := fakeaggregatorclientset.NewSimpleClientset()
apiExtensionClient := fakeapiextensionclientset.NewSimpleClientset()
caContentProvider, _ := generateSelfSignedCertificate(secureServing, caConfig)
caContentProvider, _ := newSelfSignedCertProvider(clientset, secureServing, caConfig)

if tt.existingWebhook != nil {
clientset = fakeclientset.NewSimpleClientset(tt.existingWebhook)
Expand Down Expand Up @@ -404,14 +404,14 @@ func TestSyncMutatingWebhooks(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
aggregatorClientset := fakeaggregatorclientset.NewSimpleClientset()
apiExtensionClient := fakeapiextensionclientset.NewSimpleClientset()
caContentProvider, _ := generateSelfSignedCertificate(secureServing, caConfig)

var objects []runtime.Object
for _, webhook := range tt.existingWebhooks {
objects = append(objects, webhook)
}
clientset := fakeclientset.NewSimpleClientset(objects...)
tt.prepareReactor(clientset)

caContentProvider, _ := newSelfSignedCertProvider(clientset, secureServing, caConfig)
controller := newCACertController(caContentProvider, clientset, aggregatorClientset, apiExtensionClient, caConfig)
caBundle := []byte("abc")
err = controller.syncMutatingWebhooks(caBundle)
Expand Down Expand Up @@ -518,7 +518,7 @@ func TestSyncConversionWebhooks(t *testing.T) {
clientset := fakeclientset.NewSimpleClientset()
aggregatorClientset := fakeaggregatorclientset.NewSimpleClientset()
apiExtensionClient := fakeapiextensionclientset.NewSimpleClientset()
caContentProvider, _ := generateSelfSignedCertificate(secureServing, caConfig)
caContentProvider, _ := newSelfSignedCertProvider(clientset, secureServing, caConfig)

if tt.existingCRD != nil {
apiExtensionClient = fakeapiextensionclientset.NewSimpleClientset(tt.existingCRD)
Expand Down
Loading

0 comments on commit 0884ea8

Please sign in to comment.