diff --git a/cluster/cluster.yaml b/cluster/cluster.yaml index ba5244105e..e9b1b631ca 100644 --- a/cluster/cluster.yaml +++ b/cluster/cluster.yaml @@ -551,14 +551,14 @@ Resources: PolicyName: root RoleName: "{{.Cluster.LocalID}}-app-autoscaler" Type: 'AWS::IAM::Role' -{{- if eq .Cluster.ConfigItems.karpenter_pools_enabled "true"}} - KarpenterNodeInstanceProfile: + KarpenterNodeInstanceProfile: # instance profile for worker nodes spawn by karpenter controller Type: "AWS::IAM::InstanceProfile" Properties: InstanceProfileName: "{{ .Cluster.ID | awsValidID }}-WorkerKarpenter-InstanceProfile" Path: "/" Roles: - !Ref WorkerIAMRole +{{- if eq .Cluster.ConfigItems.karpenter_pools_enabled "true"}} KarpenterIAMRole: # role for the karpenter controller Properties: AssumeRolePolicyDocument: diff --git a/cluster/config-defaults.yaml b/cluster/config-defaults.yaml index 077e49fa47..0719cfdf8f 100644 --- a/cluster/config-defaults.yaml +++ b/cluster/config-defaults.yaml @@ -28,6 +28,10 @@ cluster_autoscaler_max_graceful_termination_sec: "1209600" # 2 weeks cluster_autoscaler_max_usnchedulable_pods_considered: "1000" # karpenter settings +# DO NOT SET TO FALSE IF THE CLUSTER HAS KARPENTER POOLS OR NODES. REFER TO TEAPOT DOCS FOR HOW TO ROLLBACK KARPENTER +# https://teapot.docs.zalando.net/howtos/karpenter-operations/ +karpenter_pools_enabled: "false" + karpenter_controller_cpu: "25m" karpenter_controller_memory: "256Mi" # set log level of karpenter: error|debug @@ -1005,8 +1009,6 @@ config_provider_service: "false" # enable SizeMemoryBackedVolumes feature flag enable_size_memory_backed_volumes: "true" -karpenter_pools_enabled: "false" - # enable StatefulSetAutoDeletePVC feature flag # https://kubernetes.io/blog/2021/12/16/kubernetes-1-23-statefulset-pvc-auto-deletion/ enable_statefulset_autodelete_pvc: "true" diff --git a/cluster/manifests/deletions.yaml b/cluster/manifests/deletions.yaml index a619062c8a..2cbd802c44 100644 --- a/cluster/manifests/deletions.yaml +++ b/cluster/manifests/deletions.yaml @@ -312,3 +312,10 @@ post_apply: - name: system:cloud-controller-manager kind: ClusterRoleBinding {{- end }} +# TODO(sszuecs) cleanup skipper-ingress roles after successful change +# - name: skipper-ingress +# kind: ClusterRole +# namespace: kube-system +# - name: skipper-ingress +# kind: ClusterRoleBinding +# namespace: kube-system diff --git a/cluster/manifests/skipper/deployment.yaml b/cluster/manifests/skipper/deployment.yaml index bfa8cba598..6527c74c3f 100644 --- a/cluster/manifests/skipper/deployment.yaml +++ b/cluster/manifests/skipper/deployment.yaml @@ -81,9 +81,7 @@ spec: parent-resource-hash: 71556441059f2d033fb06b1e73df03598c7ecaa6 {{- end }} priorityClassName: "{{ .Cluster.ConfigItems.system_priority_class }}" -{{ if ne .Cluster.ConfigItems.skipper_routesrv_enabled "exec" }} serviceAccountName: skipper-ingress -{{ end }} terminationGracePeriodSeconds: {{ .Cluster.ConfigItems.skipper_termination_grace_period }} dnsPolicy: ClusterFirstWithHostNet hostNetwork: true diff --git a/cluster/manifests/skipper/rbac.yaml b/cluster/manifests/skipper/rbac.yaml index 3c7e23c57e..02b4f7efd8 100644 --- a/cluster/manifests/skipper/rbac.yaml +++ b/cluster/manifests/skipper/rbac.yaml @@ -5,12 +5,27 @@ metadata: namespace: kube-system labels: application: skipper-ingress + component: ingress {{ if eq .Cluster.ConfigItems.skipper_open_policy_agent_enabled "true" }} # Note: if the role extends beyond OPA use, this condition can be removed annotations: iam.amazonaws.com/role: "{{ .Cluster.LocalID }}-app-skipper-ingress" {{ end }} --- +apiVersion: v1 +kind: ServiceAccount + +metadata: + name: skipper-ingress-routesrv + namespace: kube-system + labels: + application: skipper-ingress + component: routesrv +--- +# TODO(sszuecs) after successful rollout we can delete all permissions +# (not the ClusterRole -> we need PSP for hostnetwork), because +# component=ingress does not need kubernetes RBAC permissions to +# apiserver apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: @@ -45,6 +60,51 @@ rules: - list --- apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: skipper-ingress-routesrv +rules: +- apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - list +- apiGroups: [""] + resources: ["namespaces", "services", "endpoints", "pods"] + verbs: ["get", "list"] +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - get + - list +- apiGroups: + - zalando.org + resources: + - routegroups + verbs: + - get + - list +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: skipper-ingress-routesrv +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: skipper-ingress-routesrv +subjects: +- kind: ServiceAccount + name: skipper-ingress-routesrv + namespace: kube-system +--- +# TODO(sszuecs) after successful rollout we can delete this, because +# ingress does not have access to kube-apiserver +apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: skipper-ingress diff --git a/cluster/manifests/z-karpenter/deployment.yaml b/cluster/manifests/z-karpenter/deployment.yaml index de3c14bf88..c56287b34d 100644 --- a/cluster/manifests/z-karpenter/deployment.yaml +++ b/cluster/manifests/z-karpenter/deployment.yaml @@ -77,7 +77,7 @@ spec: - name: KUBERNETES_MIN_VERSION value: 1.22.0-0 - name: LOG_LEVEL - value: info + value: {{ .Cluster.ConfigItems.karpenter_log_level }} - name: MEMORY_LIMIT valueFrom: resourceFieldRef: