From 93f2e1572a761769025f47c9fada8c7d1ab07b2b Mon Sep 17 00:00:00 2001 From: Jonathan King Date: Tue, 22 Aug 2023 13:05:10 -0400 Subject: [PATCH 1/2] configmaps: Track resource versions of cluster provided CA certs (PROJQUAY-5174) - The config.openshift.io/inject-trusted-cabundle: 'true' annotation will inject content into the config map after kustomize has generated its resources. This means that kustomize will not be aware of changes made by Openshift to these config maps - In order to redeploy QuayRegistry resources when cluster CA are rotated, this PR implements a mechanism to track the resource version of the CA as an annotation on the Quay resources --- apis/quay/v1/quayregistry_types.go | 2 + controllers/quay/features.go | 56 +++++++++++++++ controllers/quay/quayregistry_controller.go | 11 +++ e2e/ca_rotation/00-assert.yaml | 71 ++++++++++++++++++++ e2e/ca_rotation/00-create-quay-registry.yaml | 16 +++++ e2e/ca_rotation/01-assert.yaml | 71 ++++++++++++++++++++ e2e/ca_rotation/01-manual-rotate.yaml | 13 ++++ kuttl-test.yaml | 2 +- pkg/context/context.go | 4 ++ pkg/kustomize/kustomize.go | 2 +- pkg/middleware/middleware.go | 9 ++- pkg/middleware/middleware_test.go | 4 +- 12 files changed, 257 insertions(+), 4 deletions(-) create mode 100644 e2e/ca_rotation/00-assert.yaml create mode 100644 e2e/ca_rotation/00-create-quay-registry.yaml create mode 100644 e2e/ca_rotation/01-assert.yaml create mode 100644 e2e/ca_rotation/01-manual-rotate.yaml diff --git a/apis/quay/v1/quayregistry_types.go b/apis/quay/v1/quayregistry_types.go index 9fdf234eb..62c58aa14 100644 --- a/apis/quay/v1/quayregistry_types.go +++ b/apis/quay/v1/quayregistry_types.go @@ -110,6 +110,8 @@ const ( QuayUpgradeJobName = "quay-app-upgrade" PostgresUpgradeJobName = "quay-postgres-upgrade" ClairPostgresUpgradeJobName = "clair-postgres-upgrade" + ClusterServiceCAName = "cluster-service-ca" + ClusterTrustedCAName = "cluster-trusted-ca" ) // QuayRegistrySpec defines the desired state of QuayRegistry. diff --git a/controllers/quay/features.go b/controllers/quay/features.go index bf0fba2bf..1d49c2075 100644 --- a/controllers/quay/features.go +++ b/controllers/quay/features.go @@ -3,7 +3,9 @@ package controllers import ( "bytes" "context" + "crypto/sha256" "crypto/tls" + "encoding/hex" "encoding/pem" "fmt" "strings" @@ -108,6 +110,60 @@ func (r *QuayRegistryReconciler) checkManagedKeys( return nil } +// checkClusterCAHash populates the provided QuayRegistryContext with revision version of the cluster provided CA configmaps. +// We must track these revisions so that we can force a restart of the QuayRegistry pods when the CA configmaps are updated. +func (r *QuayRegistryReconciler) checkClusterCAHash( + ctx context.Context, qctx *quaycontext.QuayRegistryContext, quay *v1.QuayRegistry, +) error { + + hashConfigMapContents := func(data map[string]string, key string) string { + certData, exists := data[key] + if !exists { + return "" + } + hash := sha256.Sum256([]byte(certData)) + hashStr := hex.EncodeToString(hash[:]) + return hashStr[len(hashStr)-8:] + } + + // Get cluster-service-ca hash + clusterServiceCAnsn := types.NamespacedName{ + Name: fmt.Sprintf("%s-%s", quay.Name, v1.ClusterServiceCAName), + Namespace: quay.Namespace, + } + var clusterServiceCA corev1.ConfigMap + if err := r.Get(ctx, clusterServiceCAnsn, &clusterServiceCA); err == nil { + qctx.ClusterServiceCAHash = hashConfigMapContents(clusterServiceCA.Data, "service-ca.crt") + if currentHash, exists := clusterServiceCA.Annotations[v1.ClusterServiceCAName]; !exists || currentHash != qctx.ClusterServiceCAHash { + r.Log.Info("Detected change in cluster-service-ca configmap, updating annotation to trigger restart") + clusterServiceCA.Annotations[v1.ClusterServiceCAName] = qctx.ClusterServiceCAHash + if err := r.Update(ctx, &clusterServiceCA); err != nil { + r.Log.Error(err, "unable to update cluster-service-ca configmap annotations") + return err + } + } + } + + clusterTrustedCAnsn := types.NamespacedName{ + Name: fmt.Sprintf("%s-%s", quay.Name, v1.ClusterTrustedCAName), + Namespace: quay.Namespace, + } + var clusterTrustedCA corev1.ConfigMap + if err := r.Get(ctx, clusterTrustedCAnsn, &clusterTrustedCA); err == nil { + qctx.ClusterTrustedCAHash = hashConfigMapContents(clusterTrustedCA.Data, "ca-bundle.crt") + if currentHash, exists := clusterTrustedCA.Annotations[v1.ClusterTrustedCAName]; !exists || currentHash != qctx.ClusterTrustedCAHash { + r.Log.Info("Detected change in cluster-trusted-ca configmap, updating annotation to trigger restart") + clusterTrustedCA.Annotations[v1.ClusterTrustedCAName] = qctx.ClusterTrustedCAHash + if err := r.Update(ctx, &clusterTrustedCA); err != nil { + r.Log.Error(err, "unable to update cluster-trusted-ca configmap annotations") + return err + } + } + } + + return nil +} + // checkManagedTLS verifies if provided bundle contains entries for ssl key and cert, // populate the data in provided QuayRegistryContext if found. func (r *QuayRegistryReconciler) checkManagedTLS( diff --git a/controllers/quay/quayregistry_controller.go b/controllers/quay/quayregistry_controller.go index 120456e39..df379b289 100644 --- a/controllers/quay/quayregistry_controller.go +++ b/controllers/quay/quayregistry_controller.go @@ -475,6 +475,17 @@ func (r *QuayRegistryReconciler) Reconcile(ctx context.Context, req ctrl.Request quayContext := quaycontext.NewQuayRegistryContext() r.checkManagedTLS(quayContext, cbundle) + if err := r.checkClusterCAHash(ctx, quayContext, updatedQuay); err != nil { + return r.reconcileWithCondition( + ctx, + &quay, + v1.ConditionTypeRolloutBlocked, + metav1.ConditionTrue, + v1.ConditionReasonConfigInvalid, + fmt.Sprintf("unable to check cluster CA certs: %s", err), + ) + } + if err := r.checkManagedKeys(ctx, quayContext, updatedQuay); err != nil { return r.reconcileWithCondition( ctx, diff --git a/e2e/ca_rotation/00-assert.yaml b/e2e/ca_rotation/00-assert.yaml new file mode 100644 index 000000000..10a4c3509 --- /dev/null +++ b/e2e/ca_rotation/00-assert.yaml @@ -0,0 +1,71 @@ +apiVersion: quay.redhat.com/v1 +kind: QuayRegistry +metadata: + name: ca-rotation +spec: + components: + - kind: monitoring + managed: false + - kind: mirror + managed: false + - kind: horizontalpodautoscaler + managed: false + - kind: clair + managed: false + - kind: clairpostgres + managed: false + - kind: quay + managed: true + - kind: postgres + managed: true + - kind: redis + managed: true + - kind: objectstorage + managed: true + - kind: route + managed: true + - kind: tls + managed: true +status: + conditions: + - type: ComponentHPAReady + reason: ComponentNotManaged + status: "True" + - type: ComponentRouteReady + reason: ComponentReady + status: "True" + - type: ComponentMonitoringReady + reason: ComponentNotManaged + status: "True" + - type: ComponentPostgresReady + reason: ComponentReady + status: "True" + - type: ComponentObjectStorageReady + reason: ComponentReady + status: "True" + - type: ComponentClairReady + reason: ComponentNotManaged + status: "True" + - type: ComponentClairPostgresReady + reason: ComponentNotManaged + status: "True" + - type: ComponentTLSReady + reason: ComponentReady + status: "True" + - type: ComponentRedisReady + reason: ComponentReady + status: "True" + - type: ComponentQuayReady + reason: ComponentReady + status: "True" + - type: ComponentMirrorReady + reason: ComponentNotManaged + status: "True" + - type: Available + reason: HealthChecksPassing + status: "True" + - type: ComponentsCreated + reason: ComponentsCreationSuccess + status: "True" + - type: RolloutBlocked + status: "False" diff --git a/e2e/ca_rotation/00-create-quay-registry.yaml b/e2e/ca_rotation/00-create-quay-registry.yaml new file mode 100644 index 000000000..49985ec9a --- /dev/null +++ b/e2e/ca_rotation/00-create-quay-registry.yaml @@ -0,0 +1,16 @@ +apiVersion: quay.redhat.com/v1 +kind: QuayRegistry +metadata: + name: ca-rotation +spec: + components: + - kind: monitoring + managed: false + - kind: mirror + managed: false + - kind: horizontalpodautoscaler + managed: false + - kind: clair + managed: false + - kind: clairpostgres + managed: false diff --git a/e2e/ca_rotation/01-assert.yaml b/e2e/ca_rotation/01-assert.yaml new file mode 100644 index 000000000..10a4c3509 --- /dev/null +++ b/e2e/ca_rotation/01-assert.yaml @@ -0,0 +1,71 @@ +apiVersion: quay.redhat.com/v1 +kind: QuayRegistry +metadata: + name: ca-rotation +spec: + components: + - kind: monitoring + managed: false + - kind: mirror + managed: false + - kind: horizontalpodautoscaler + managed: false + - kind: clair + managed: false + - kind: clairpostgres + managed: false + - kind: quay + managed: true + - kind: postgres + managed: true + - kind: redis + managed: true + - kind: objectstorage + managed: true + - kind: route + managed: true + - kind: tls + managed: true +status: + conditions: + - type: ComponentHPAReady + reason: ComponentNotManaged + status: "True" + - type: ComponentRouteReady + reason: ComponentReady + status: "True" + - type: ComponentMonitoringReady + reason: ComponentNotManaged + status: "True" + - type: ComponentPostgresReady + reason: ComponentReady + status: "True" + - type: ComponentObjectStorageReady + reason: ComponentReady + status: "True" + - type: ComponentClairReady + reason: ComponentNotManaged + status: "True" + - type: ComponentClairPostgresReady + reason: ComponentNotManaged + status: "True" + - type: ComponentTLSReady + reason: ComponentReady + status: "True" + - type: ComponentRedisReady + reason: ComponentReady + status: "True" + - type: ComponentQuayReady + reason: ComponentReady + status: "True" + - type: ComponentMirrorReady + reason: ComponentNotManaged + status: "True" + - type: Available + reason: HealthChecksPassing + status: "True" + - type: ComponentsCreated + reason: ComponentsCreationSuccess + status: "True" + - type: RolloutBlocked + status: "False" diff --git a/e2e/ca_rotation/01-manual-rotate.yaml b/e2e/ca_rotation/01-manual-rotate.yaml new file mode 100644 index 000000000..cafd675a8 --- /dev/null +++ b/e2e/ca_rotation/01-manual-rotate.yaml @@ -0,0 +1,13 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +assert: + - 01-assert.yaml +commands: + # This is how we manually rotate the cert as per https://docs.openshift.com/container-platform/4.13/security/certificates/service-serving-certificate.html#manually-rotate-service-ca_service-serving-certificate +- script: | + kubectl delete secret/signing-key -n openshift-service-ca; + for I in $(oc get ns -o jsonpath='{range .items[*]} {.metadata.name}{"\n"} {end}'); \ + do oc delete pods --all -n $I; \ + sleep 1; \ + done + timeout: 3000 diff --git a/kuttl-test.yaml b/kuttl-test.yaml index 961d6b461..0815c859a 100644 --- a/kuttl-test.yaml +++ b/kuttl-test.yaml @@ -2,7 +2,7 @@ apiVersion: kuttl.dev/v1beta1 kind: TestSuite testDirs: - ./e2e -timeout: 500 +timeout: 600 parallel: 1 suppress: - "events" diff --git a/pkg/context/context.go b/pkg/context/context.go index 517bcbfc8..1c7000663 100644 --- a/pkg/context/context.go +++ b/pkg/context/context.go @@ -8,6 +8,10 @@ type QuayRegistryContext struct { ServerHostname string BuildManagerHostname string + // Cluster CA Resource Versions + ClusterServiceCAHash string + ClusterTrustedCAHash string + // TLS ClusterWildcardCert []byte TLSCert []byte diff --git a/pkg/kustomize/kustomize.go b/pkg/kustomize/kustomize.go index 3337a692a..b836caaa9 100644 --- a/pkg/kustomize/kustomize.go +++ b/pkg/kustomize/kustomize.go @@ -651,7 +651,7 @@ func Inflate( } for index, resource := range resources { - obj, err := middleware.Process(quay, resource, skipres) + obj, err := middleware.Process(quay, ctx, resource, skipres) if err != nil { return nil, err } diff --git a/pkg/middleware/middleware.go b/pkg/middleware/middleware.go index fd9853fde..05b116ae8 100644 --- a/pkg/middleware/middleware.go +++ b/pkg/middleware/middleware.go @@ -5,6 +5,7 @@ import ( "strings" route "github.com/openshift/api/route/v1" + quaycontext "github.com/quay/quay-operator/pkg/context" appsv1 "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" @@ -27,7 +28,7 @@ const ( // Process applies any additional middleware steps to a managed k8s object that cannot be // accomplished using the Kustomize toolchain. if skipres is set all resource requests are // trimmed from the objects thus deploying quay with a much smaller footprint. -func Process(quay *v1.QuayRegistry, obj client.Object, skipres bool) (client.Object, error) { +func Process(quay *v1.QuayRegistry, qctx *quaycontext.QuayRegistryContext, obj client.Object, skipres bool) (client.Object, error) { objectMeta, err := meta.Accessor(obj) if err != nil { return nil, err @@ -89,6 +90,12 @@ func Process(quay *v1.QuayRegistry, obj client.Object, skipres bool) (client.Obj dep.Spec.Template.Spec.Affinity = oaff } + // Add annotations to track the hash of the cluster service CA. This is to ensure that we redeploy when the cluster service CA changes. + dep.Annotations[v1.ClusterServiceCAName] = qctx.ClusterServiceCAHash + dep.Annotations[v1.ClusterTrustedCAName] = qctx.ClusterTrustedCAHash + dep.Spec.Template.Annotations[v1.ClusterServiceCAName] = qctx.ClusterServiceCAHash + dep.Spec.Template.Annotations[v1.ClusterTrustedCAName] = qctx.ClusterTrustedCAHash + // here we do an attempt to setting the default or overwriten number of replicas // for clair, quay and mirror. we can't do that if horizontal pod autoscaler is // in managed state as we would be stomping in the values defined by the hpa diff --git a/pkg/middleware/middleware_test.go b/pkg/middleware/middleware_test.go index 6597c7fd4..db22698af 100644 --- a/pkg/middleware/middleware_test.go +++ b/pkg/middleware/middleware_test.go @@ -5,6 +5,7 @@ import ( "testing" route "github.com/openshift/api/route/v1" + quaycontext "github.com/quay/quay-operator/pkg/context" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -230,7 +231,8 @@ func TestProcess(t *testing.T) { for _, test := range processTests { t.Run(test.name, func(t *testing.T) { - processedObj, err := Process(test.quay, test.obj, false) + quayContext := quaycontext.NewQuayRegistryContext() + processedObj, err := Process(test.quay, quayContext, test.obj, false) if test.expectedError != nil { assert.Error(err, test.name) } else { From 7a40e35c92ce05c3bbeda5eb865269a499369024 Mon Sep 17 00:00:00 2001 From: bcaton Date: Tue, 29 Aug 2023 11:47:37 -0400 Subject: [PATCH 2/2] route: increasing route timeout to 5min (PROJQUAY-2679) - Increasing route timeout --- kustomize/components/route/quay.route.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kustomize/components/route/quay.route.yaml b/kustomize/components/route/quay.route.yaml index dc77c16dd..acde28c5c 100644 --- a/kustomize/components/route/quay.route.yaml +++ b/kustomize/components/route/quay.route.yaml @@ -6,7 +6,7 @@ metadata: quay-component: quay-app-route annotations: quay-component: route - haproxy.router.openshift.io/timeout: 60s + haproxy.router.openshift.io/timeout: 5m spec: host: $(SERVER_HOSTNAME) to: