diff --git a/apis/quay/v1/quayregistry_types.go b/apis/quay/v1/quayregistry_types.go index 9fdf234eb..62c58aa14 100644 --- a/apis/quay/v1/quayregistry_types.go +++ b/apis/quay/v1/quayregistry_types.go @@ -110,6 +110,8 @@ const ( QuayUpgradeJobName = "quay-app-upgrade" PostgresUpgradeJobName = "quay-postgres-upgrade" ClairPostgresUpgradeJobName = "clair-postgres-upgrade" + ClusterServiceCAName = "cluster-service-ca" + ClusterTrustedCAName = "cluster-trusted-ca" ) // QuayRegistrySpec defines the desired state of QuayRegistry. diff --git a/controllers/quay/features.go b/controllers/quay/features.go index bf0fba2bf..1d49c2075 100644 --- a/controllers/quay/features.go +++ b/controllers/quay/features.go @@ -3,7 +3,9 @@ package controllers import ( "bytes" "context" + "crypto/sha256" "crypto/tls" + "encoding/hex" "encoding/pem" "fmt" "strings" @@ -108,6 +110,60 @@ func (r *QuayRegistryReconciler) checkManagedKeys( return nil } +// checkClusterCAHash populates the provided QuayRegistryContext with revision version of the cluster provided CA configmaps. +// We must track these revisions so that we can force a restart of the QuayRegistry pods when the CA configmaps are updated. +func (r *QuayRegistryReconciler) checkClusterCAHash( + ctx context.Context, qctx *quaycontext.QuayRegistryContext, quay *v1.QuayRegistry, +) error { + + hashConfigMapContents := func(data map[string]string, key string) string { + certData, exists := data[key] + if !exists { + return "" + } + hash := sha256.Sum256([]byte(certData)) + hashStr := hex.EncodeToString(hash[:]) + return hashStr[len(hashStr)-8:] + } + + // Get cluster-service-ca hash + clusterServiceCAnsn := types.NamespacedName{ + Name: fmt.Sprintf("%s-%s", quay.Name, v1.ClusterServiceCAName), + Namespace: quay.Namespace, + } + var clusterServiceCA corev1.ConfigMap + if err := r.Get(ctx, clusterServiceCAnsn, &clusterServiceCA); err == nil { + qctx.ClusterServiceCAHash = hashConfigMapContents(clusterServiceCA.Data, "service-ca.crt") + if currentHash, exists := clusterServiceCA.Annotations[v1.ClusterServiceCAName]; !exists || currentHash != qctx.ClusterServiceCAHash { + r.Log.Info("Detected change in cluster-service-ca configmap, updating annotation to trigger restart") + clusterServiceCA.Annotations[v1.ClusterServiceCAName] = qctx.ClusterServiceCAHash + if err := r.Update(ctx, &clusterServiceCA); err != nil { + r.Log.Error(err, "unable to update cluster-service-ca configmap annotations") + return err + } + } + } + + clusterTrustedCAnsn := types.NamespacedName{ + Name: fmt.Sprintf("%s-%s", quay.Name, v1.ClusterTrustedCAName), + Namespace: quay.Namespace, + } + var clusterTrustedCA corev1.ConfigMap + if err := r.Get(ctx, clusterTrustedCAnsn, &clusterTrustedCA); err == nil { + qctx.ClusterTrustedCAHash = hashConfigMapContents(clusterTrustedCA.Data, "ca-bundle.crt") + if currentHash, exists := clusterTrustedCA.Annotations[v1.ClusterTrustedCAName]; !exists || currentHash != qctx.ClusterTrustedCAHash { + r.Log.Info("Detected change in cluster-trusted-ca configmap, updating annotation to trigger restart") + clusterTrustedCA.Annotations[v1.ClusterTrustedCAName] = qctx.ClusterTrustedCAHash + if err := r.Update(ctx, &clusterTrustedCA); err != nil { + r.Log.Error(err, "unable to update cluster-trusted-ca configmap annotations") + return err + } + } + } + + return nil +} + // checkManagedTLS verifies if provided bundle contains entries for ssl key and cert, // populate the data in provided QuayRegistryContext if found. func (r *QuayRegistryReconciler) checkManagedTLS( diff --git a/controllers/quay/quayregistry_controller.go b/controllers/quay/quayregistry_controller.go index 120456e39..df379b289 100644 --- a/controllers/quay/quayregistry_controller.go +++ b/controllers/quay/quayregistry_controller.go @@ -475,6 +475,17 @@ func (r *QuayRegistryReconciler) Reconcile(ctx context.Context, req ctrl.Request quayContext := quaycontext.NewQuayRegistryContext() r.checkManagedTLS(quayContext, cbundle) + if err := r.checkClusterCAHash(ctx, quayContext, updatedQuay); err != nil { + return r.reconcileWithCondition( + ctx, + &quay, + v1.ConditionTypeRolloutBlocked, + metav1.ConditionTrue, + v1.ConditionReasonConfigInvalid, + fmt.Sprintf("unable to check cluster CA certs: %s", err), + ) + } + if err := r.checkManagedKeys(ctx, quayContext, updatedQuay); err != nil { return r.reconcileWithCondition( ctx, diff --git a/e2e/ca_rotation/00-assert.yaml b/e2e/ca_rotation/00-assert.yaml new file mode 100644 index 000000000..10a4c3509 --- /dev/null +++ b/e2e/ca_rotation/00-assert.yaml @@ -0,0 +1,71 @@ +apiVersion: quay.redhat.com/v1 +kind: QuayRegistry +metadata: + name: ca-rotation +spec: + components: + - kind: monitoring + managed: false + - kind: mirror + managed: false + - kind: horizontalpodautoscaler + managed: false + - kind: clair + managed: false + - kind: clairpostgres + managed: false + - kind: quay + managed: true + - kind: postgres + managed: true + - kind: redis + managed: true + - kind: objectstorage + managed: true + - kind: route + managed: true + - kind: tls + managed: true +status: + conditions: + - type: ComponentHPAReady + reason: ComponentNotManaged + status: "True" + - type: ComponentRouteReady + reason: ComponentReady + status: "True" + - type: ComponentMonitoringReady + reason: ComponentNotManaged + status: "True" + - type: ComponentPostgresReady + reason: ComponentReady + status: "True" + - type: ComponentObjectStorageReady + reason: ComponentReady + status: "True" + - type: ComponentClairReady + reason: ComponentNotManaged + status: "True" + - type: ComponentClairPostgresReady + reason: ComponentNotManaged + status: "True" + - type: ComponentTLSReady + reason: ComponentReady + status: "True" + - type: ComponentRedisReady + reason: ComponentReady + status: "True" + - type: ComponentQuayReady + reason: ComponentReady + status: "True" + - type: ComponentMirrorReady + reason: ComponentNotManaged + status: "True" + - type: Available + reason: HealthChecksPassing + status: "True" + - type: ComponentsCreated + reason: ComponentsCreationSuccess + status: "True" + - type: RolloutBlocked + status: "False" diff --git a/e2e/ca_rotation/00-create-quay-registry.yaml b/e2e/ca_rotation/00-create-quay-registry.yaml new file mode 100644 index 000000000..49985ec9a --- /dev/null +++ b/e2e/ca_rotation/00-create-quay-registry.yaml @@ -0,0 +1,16 @@ +apiVersion: quay.redhat.com/v1 +kind: QuayRegistry +metadata: + name: ca-rotation +spec: + components: + - kind: monitoring + managed: false + - kind: mirror + managed: false + - kind: horizontalpodautoscaler + managed: false + - kind: clair + managed: false + - kind: clairpostgres + managed: false diff --git a/e2e/ca_rotation/01-assert.yaml b/e2e/ca_rotation/01-assert.yaml new file mode 100644 index 000000000..10a4c3509 --- /dev/null +++ b/e2e/ca_rotation/01-assert.yaml @@ -0,0 +1,71 @@ +apiVersion: quay.redhat.com/v1 +kind: QuayRegistry +metadata: + name: ca-rotation +spec: + components: + - kind: monitoring + managed: false + - kind: mirror + managed: false + - kind: horizontalpodautoscaler + managed: false + - kind: clair + managed: false + - kind: clairpostgres + managed: false + - kind: quay + managed: true + - kind: postgres + managed: true + - kind: redis + managed: true + - kind: objectstorage + managed: true + - kind: route + managed: true + - kind: tls + managed: true +status: + conditions: + - type: ComponentHPAReady + reason: ComponentNotManaged + status: "True" + - type: ComponentRouteReady + reason: ComponentReady + status: "True" + - type: ComponentMonitoringReady + reason: ComponentNotManaged + status: "True" + - type: ComponentPostgresReady + reason: ComponentReady + status: "True" + - type: ComponentObjectStorageReady + reason: ComponentReady + status: "True" + - type: ComponentClairReady + reason: ComponentNotManaged + status: "True" + - type: ComponentClairPostgresReady + reason: ComponentNotManaged + status: "True" + - type: ComponentTLSReady + reason: ComponentReady + status: "True" + - type: ComponentRedisReady + reason: ComponentReady + status: "True" + - type: ComponentQuayReady + reason: ComponentReady + status: "True" + - type: ComponentMirrorReady + reason: ComponentNotManaged + status: "True" + - type: Available + reason: HealthChecksPassing + status: "True" + - type: ComponentsCreated + reason: ComponentsCreationSuccess + status: "True" + - type: RolloutBlocked + status: "False" diff --git a/e2e/ca_rotation/01-manual-rotate.yaml b/e2e/ca_rotation/01-manual-rotate.yaml new file mode 100644 index 000000000..cafd675a8 --- /dev/null +++ b/e2e/ca_rotation/01-manual-rotate.yaml @@ -0,0 +1,13 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +assert: + - 01-assert.yaml +commands: + # This is how we manually rotate the cert as per https://docs.openshift.com/container-platform/4.13/security/certificates/service-serving-certificate.html#manually-rotate-service-ca_service-serving-certificate +- script: | + kubectl delete secret/signing-key -n openshift-service-ca; + for I in $(oc get ns -o jsonpath='{range .items[*]} {.metadata.name}{"\n"} {end}'); \ + do oc delete pods --all -n $I; \ + sleep 1; \ + done + timeout: 3000 diff --git a/kustomize/components/route/quay.route.yaml b/kustomize/components/route/quay.route.yaml index dc77c16dd..acde28c5c 100644 --- a/kustomize/components/route/quay.route.yaml +++ b/kustomize/components/route/quay.route.yaml @@ -6,7 +6,7 @@ metadata: quay-component: quay-app-route annotations: quay-component: route - haproxy.router.openshift.io/timeout: 60s + haproxy.router.openshift.io/timeout: 5m spec: host: $(SERVER_HOSTNAME) to: diff --git a/kuttl-test.yaml b/kuttl-test.yaml index 961d6b461..0815c859a 100644 --- a/kuttl-test.yaml +++ b/kuttl-test.yaml @@ -2,7 +2,7 @@ apiVersion: kuttl.dev/v1beta1 kind: TestSuite testDirs: - ./e2e -timeout: 500 +timeout: 600 parallel: 1 suppress: - "events" diff --git a/pkg/context/context.go b/pkg/context/context.go index 517bcbfc8..1c7000663 100644 --- a/pkg/context/context.go +++ b/pkg/context/context.go @@ -8,6 +8,10 @@ type QuayRegistryContext struct { ServerHostname string BuildManagerHostname string + // Cluster CA Resource Versions + ClusterServiceCAHash string + ClusterTrustedCAHash string + // TLS ClusterWildcardCert []byte TLSCert []byte diff --git a/pkg/kustomize/kustomize.go b/pkg/kustomize/kustomize.go index 3337a692a..b836caaa9 100644 --- a/pkg/kustomize/kustomize.go +++ b/pkg/kustomize/kustomize.go @@ -651,7 +651,7 @@ func Inflate( } for index, resource := range resources { - obj, err := middleware.Process(quay, resource, skipres) + obj, err := middleware.Process(quay, ctx, resource, skipres) if err != nil { return nil, err } diff --git a/pkg/middleware/middleware.go b/pkg/middleware/middleware.go index fd9853fde..05b116ae8 100644 --- a/pkg/middleware/middleware.go +++ b/pkg/middleware/middleware.go @@ -5,6 +5,7 @@ import ( "strings" route "github.com/openshift/api/route/v1" + quaycontext "github.com/quay/quay-operator/pkg/context" appsv1 "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" @@ -27,7 +28,7 @@ const ( // Process applies any additional middleware steps to a managed k8s object that cannot be // accomplished using the Kustomize toolchain. if skipres is set all resource requests are // trimmed from the objects thus deploying quay with a much smaller footprint. -func Process(quay *v1.QuayRegistry, obj client.Object, skipres bool) (client.Object, error) { +func Process(quay *v1.QuayRegistry, qctx *quaycontext.QuayRegistryContext, obj client.Object, skipres bool) (client.Object, error) { objectMeta, err := meta.Accessor(obj) if err != nil { return nil, err @@ -89,6 +90,12 @@ func Process(quay *v1.QuayRegistry, obj client.Object, skipres bool) (client.Obj dep.Spec.Template.Spec.Affinity = oaff } + // Add annotations to track the hash of the cluster service CA. This is to ensure that we redeploy when the cluster service CA changes. + dep.Annotations[v1.ClusterServiceCAName] = qctx.ClusterServiceCAHash + dep.Annotations[v1.ClusterTrustedCAName] = qctx.ClusterTrustedCAHash + dep.Spec.Template.Annotations[v1.ClusterServiceCAName] = qctx.ClusterServiceCAHash + dep.Spec.Template.Annotations[v1.ClusterTrustedCAName] = qctx.ClusterTrustedCAHash + // here we do an attempt to setting the default or overwriten number of replicas // for clair, quay and mirror. we can't do that if horizontal pod autoscaler is // in managed state as we would be stomping in the values defined by the hpa diff --git a/pkg/middleware/middleware_test.go b/pkg/middleware/middleware_test.go index 6597c7fd4..db22698af 100644 --- a/pkg/middleware/middleware_test.go +++ b/pkg/middleware/middleware_test.go @@ -5,6 +5,7 @@ import ( "testing" route "github.com/openshift/api/route/v1" + quaycontext "github.com/quay/quay-operator/pkg/context" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -230,7 +231,8 @@ func TestProcess(t *testing.T) { for _, test := range processTests { t.Run(test.name, func(t *testing.T) { - processedObj, err := Process(test.quay, test.obj, false) + quayContext := quaycontext.NewQuayRegistryContext() + processedObj, err := Process(test.quay, quayContext, test.obj, false) if test.expectedError != nil { assert.Error(err, test.name) } else {