From 59fdc634e4412f6db26883670f14f2963cbcd097 Mon Sep 17 00:00:00 2001 From: naoki-take Date: Tue, 3 Sep 2024 02:21:52 +0000 Subject: [PATCH 1/4] Update cilium to v1.14.14 Signed-off-by: naoki-take --- Makefile | 2 +- artifacts.go | 8 +- artifacts_ignore.yaml | 8 - cilium/base/aggregate_cluster_role.yaml | 3 + cilium/pre/upstream.yaml | 303 +++++++++++++++++------- cilium/pre/values.yaml | 5 + cilium/prod/upstream.yaml | 303 +++++++++++++++++------- cilium/prod/values.yaml | 5 + etc/cilium-pre.yaml | 195 +++++++++++---- etc/cilium.yaml | 195 +++++++++++---- 10 files changed, 742 insertions(+), 285 deletions(-) diff --git a/Makefile b/Makefile index 9994d48e2..db0fd9954 100644 --- a/Makefile +++ b/Makefile @@ -82,7 +82,7 @@ update-coil: update-cilium: helm rm -rf /tmp/work-cilium mkdir -p /tmp/work-cilium - git clone --depth 1 --branch v$(shell echo $(CILIUM_TAG) | cut -d \. -f 1,2,3)-lb-dsr-patch https://github.com/cybozu-go/cilium /tmp/work-cilium + git clone --depth 1 --branch v$(shell echo $(CILIUM_TAG) | cut -d \. -f 1,2,3) https://github.com/cilium/cilium /tmp/work-cilium cd /tmp/work-cilium $(HELM) template /tmp/work-cilium/install/kubernetes/cilium/ \ --namespace=kube-system \ diff --git a/artifacts.go b/artifacts.go index 28c6a01c0..9bc01a625 100644 --- a/artifacts.go +++ b/artifacts.go @@ -16,10 +16,10 @@ var CurrentArtifacts = ArtifactSet{ {Name: "squid", Repository: "ghcr.io/cybozu/squid", Tag: "6.10.0.1", Private: false}, {Name: "squid-exporter", Repository: "ghcr.io/cybozu/squid-exporter", Tag: "1.0.6", Private: false}, {Name: "vault", Repository: "ghcr.io/cybozu/vault", Tag: "1.17.5.1", Private: false}, - {Name: "cilium", Repository: "ghcr.io/cybozu/cilium", Tag: "1.13.16.4", Private: false}, - {Name: "cilium-operator-generic", Repository: "ghcr.io/cybozu/cilium-operator-generic", Tag: "1.13.16.1", Private: false}, - {Name: "hubble-relay", Repository: "ghcr.io/cybozu/hubble-relay", Tag: "1.13.16.1", Private: false}, - {Name: "cilium-certgen", Repository: "ghcr.io/cybozu/cilium-certgen", Tag: "0.1.11.1", Private: false}, + {Name: "cilium", Repository: "ghcr.io/cybozu/cilium", Tag: "1.14.14.1", Private: false}, + {Name: "cilium-operator-generic", Repository: "ghcr.io/cybozu/cilium-operator-generic", Tag: "1.14.14.1", Private: false}, + {Name: "hubble-relay", Repository: "ghcr.io/cybozu/hubble-relay", Tag: "1.14.14.1", Private: false}, + {Name: "cilium-certgen", Repository: "ghcr.io/cybozu/cilium-certgen", Tag: "0.1.14.1", Private: false}, }, Debs: []DebianPackage{ {Name: "etcdpasswd", Owner: "cybozu-go", Repository: "etcdpasswd", Release: "v1.4.7"}, diff --git a/artifacts_ignore.yaml b/artifacts_ignore.yaml index c7ddbe875..e8ec5fc65 100644 --- a/artifacts_ignore.yaml +++ b/artifacts_ignore.yaml @@ -1,12 +1,4 @@ images: -- repository: ghcr.io/cybozu/cilium - versions: ["1.14.13.1", "1.14.13.2", "1.14.14.1"] -- repository: ghcr.io/cybozu/cilium-operator-generic - versions: ["1.14.13.1", "1.14.14.1"] -- repository: ghcr.io/cybozu/hubble-relay - versions: ["1.14.13.1", "1.14.13.2", "1.14.14.1"] -- repository: ghcr.io/cybozu/cilium-certgen - versions: ["0.1.14.1"] - repository: ghcr.io/cybozu/etcd versions: ["3.5.15.1"] osImage: diff --git a/cilium/base/aggregate_cluster_role.yaml b/cilium/base/aggregate_cluster_role.yaml index 5bfd4a6d1..9b1bc0b8f 100644 --- a/cilium/base/aggregate_cluster_role.yaml +++ b/cilium/base/aggregate_cluster_role.yaml @@ -50,6 +50,9 @@ rules: - ciliumexternalworkloads - ciliumexternalworkloads/finalizers - ciliumexternalworkloads/status + - ciliumcidrgroups + - ciliumcidrgroups/finalizers + - ciliumcidrgroups/status verbs: - "get" - "list" diff --git a/cilium/pre/upstream.yaml b/cilium/pre/upstream.yaml index a3062e605..60c4ec92e 100644 --- a/cilium/pre/upstream.yaml +++ b/cilium/pre/upstream.yaml @@ -51,8 +51,6 @@ data: cilium-endpoint-gc-interval: "5m0s" nodes-gc-interval: "5m0s" skip-cnp-status-startup-clean: "false" - # Disable the usage of CiliumEndpoint CRD - disable-endpoint-crd: "false" # To include or exclude matched resources from cilium identity evaluation labels: " k8s:app k8s:io\\.cilium\\.k8s\\.namespace\\.labels\\.team k8s:io\\.kubernetes\\.pod\\.namespace k8s:k8s-app io\\.cilium\\.k8s\\.policy cybozu\\.io/family app\\.cybozu\\.io neco\\.cybozu\\.io\\/registry identity\\.neco\\.cybozu\\.io " @@ -70,14 +68,14 @@ data: # NOTE that this will open the port on ALL nodes where Cilium pods are # scheduled. prometheus-serve-addr: ":9962" - # Port to expose Envoy metrics (e.g. "9964"). Envoy metrics listener will be disabled if this - # field is not set. - proxy-prometheus-port: "9964" # Metrics that should be enabled or disabled from the default metric # list. (+metric_foo to enable metric_foo , -metric_bar to disable # metric_bar). metrics: +cilium_bpf_map_pressure + # Port to expose Envoy metrics (e.g. "9964"). Envoy metrics listener will be disabled if this + # field is not set. + proxy-prometheus-port: "9964" # If you want metrics enabled in cilium-operator, set the port for # which the Cilium Operator will have their metrics exposed. # NOTE that this will open the port on the nodes where Cilium operator pod @@ -156,7 +154,7 @@ data: # - disabled # - vxlan (default) # - geneve - tunnel: "disabled" + routing-mode: "native" # Enables L7 proxy for L7 policy enforcement and visibility @@ -177,6 +175,7 @@ data: enable-local-node-route: "false" enable-ipv4-masquerade: "false" + enable-ipv4-big-tcp: "false" enable-ipv6-big-tcp: "false" enable-ipv6-masquerade: "true" @@ -212,7 +211,7 @@ data: pprof: "true" pprof-address: "0.0.0.0" pprof-port: "6060" - cni-uninstall: "true" + enable-k8s-networkpolicy: "true" # Disable health checking, when chaining mode is not set to portmap or none enable-endpoint-health-checking: "false" enable-health-checking: "true" @@ -232,9 +231,12 @@ data: hubble-tls-key-file: /var/lib/cilium/tls/hubble/server.key hubble-tls-client-ca-files: /var/lib/cilium/tls/hubble/client-ca.crt ipam: "cluster-pool" + ipam-cilium-node-update-rate: "15s" cluster-pool-ipv4-cidr: "10.0.0.0/8" cluster-pool-ipv4-mask-size: "24" disable-cnp-status-updates: "true" + cnp-node-status-gc-interval: "0s" + egress-gateway-reconciliation-trigger-interval: "1s" enable-vtep: "false" vtep-endpoint: "" vtep-cidr: "" @@ -247,22 +249,37 @@ data: cgroup-root: "/sys/fs/cgroup" enable-k8s-terminating-endpoint: "true" enable-sctp: "false" + k8s-client-qps: "5" + k8s-client-burst: "10" remove-cilium-node-taints: "true" + set-cilium-node-taints: "true" set-cilium-is-up-condition: "true" unmanaged-pod-watcher-interval: "15" - # default DNS proxy to transparent mode - dnsproxy-enable-transparent-mode: "true" + dnsproxy-socket-linger-timeout: "10" tofqdns-dns-reject-response-code: "refused" tofqdns-enable-dns-compression: "true" tofqdns-endpoint-max-ip-per-hostname: "50" tofqdns-idle-connection-grace-period: "0s" tofqdns-max-deferred-connection-deletes: "10000" - tofqdns-min-ttl: "3600" tofqdns-proxy-response-max-delay: "100ms" bpf-ct-timeout-regular-any: 1h0m0s bpf-ct-timeout-service-any: 1h0m0s agent-not-ready-taint-key: "node.cilium.io/agent-not-ready" + + mesh-auth-enabled: "true" + mesh-auth-queue-size: "1024" + mesh-auth-rotated-identities-queue-size: "1024" + mesh-auth-gc-interval: "5m0s" + + proxy-xff-num-trusted-hops-ingress: "0" + proxy-xff-num-trusted-hops-egress: "0" + proxy-connect-timeout: "2" + proxy-max-requests-per-connection: "0" + proxy-max-connection-duration-seconds: "0" + proxy-idle-timeout-seconds: "60" + + external-envoy-proxy: "false" --- # Source: cilium/templates/hubble-relay/configmap.yaml apiVersion: v1 @@ -275,15 +292,17 @@ data: cluster-name: default peer-service: "hubble-peer.kube-system.svc.cluster.local:443" listen-address: :4245 + gops: true + gops-port: "9893" dial-timeout: retry-timeout: sort-buffer-len-max: sort-buffer-drain-timeout: - tls-client-cert-file: /var/lib/hubble-relay/tls/client.crt - tls-client-key-file: /var/lib/hubble-relay/tls/client.key + tls-hubble-client-cert-file: /var/lib/hubble-relay/tls/client.crt + tls-hubble-client-key-file: /var/lib/hubble-relay/tls/client.key tls-hubble-server-ca-files: /var/lib/hubble-relay/tls/hubble-server-ca.crt - tls-server-cert-file: /var/lib/hubble-relay/tls/server.crt - tls-server-key-file: /var/lib/hubble-relay/tls/server.key + tls-relay-server-cert-file: /var/lib/hubble-relay/tls/server.crt + tls-relay-server-key-file: /var/lib/hubble-relay/tls/server.key --- # Source: cilium/templates/cilium-agent/clusterrole.yaml apiVersion: rbac.authorization.k8s.io/v1 @@ -348,6 +367,9 @@ rules: - ciliumnetworkpolicies - ciliumnodes - ciliumnodeconfigs + - ciliumcidrgroups + - ciliuml2announcementpolicies + - ciliumpodippools verbs: - list - watch @@ -388,6 +410,7 @@ rules: - ciliumclusterwidenetworkpolicies/status - ciliumendpoints/status - ciliumendpoints + - ciliuml2announcementpolicies/status verbs: - patch --- @@ -563,14 +586,24 @@ rules: - ciliumnetworkpolicies.cilium.io - ciliumnodes.cilium.io - ciliumnodeconfigs.cilium.io + - ciliumcidrgroups.cilium.io + - ciliuml2announcementpolicies.cilium.io + - ciliumpodippools.cilium.io - apiGroups: - cilium.io resources: - ciliumloadbalancerippools + - ciliumpodippools verbs: - get - list - watch +- apiGroups: + - cilium.io + resources: + - ciliumpodippools + verbs: + - create - apiGroups: - cilium.io resources: @@ -622,7 +655,6 @@ rules: - secrets resourceNames: - cilium-ca - - hubble-ca-secret verbs: - get - update @@ -795,7 +827,7 @@ spec: prometheus.io/port: "9962" prometheus.io/scrape: "true" # ensure pods roll when configmap updates - cilium.io/cilium-configmap-checksum: "89029740a4242a661efaec2ce058760e9d1e323c603534509c54902ef6891b1e" + cilium.io/cilium-configmap-checksum: "d4bf08bf4c6ee946280b8e7bcb2586f8833a3a3e46137f6979d77b3020e1f546" # Set app AppArmor's profile to "unconfined". The value of this annotation # can be modified as long users know which profiles they have available # in AppArmor. @@ -808,7 +840,7 @@ spec: spec: containers: - name: cilium-agent - image: "ghcr.io/cybozu/cilium:1.13.16.4" + image: "ghcr.io/cybozu/cilium:1.14.14.1" imagePullPolicy: IfNotPresent command: - cilium-agent @@ -865,18 +897,6 @@ spec: fieldPath: metadata.namespace - name: CILIUM_CLUSTERMESH_CONFIG value: /var/lib/cilium/clustermesh/ - - name: CILIUM_CNI_CHAINING_MODE - valueFrom: - configMapKeyRef: - name: cilium-config - key: cni-chaining-mode - optional: true - - name: CILIUM_CUSTOM_CNI_CONF - valueFrom: - configMapKeyRef: - name: cilium-config - key: custom-cni-conf - optional: true - name: KUBERNETES_SERVICE_HOST value: "127.0.0.1" - name: KUBERNETES_SERVICE_PORT @@ -888,7 +908,26 @@ spec: - "bash" - "-c" - | - /cni-install.sh --enable-debug=false --cni-exclusive=true --log-file=/var/run/cilium/cilium-cni.log + set -o errexit + set -o pipefail + set -o nounset + + # When running in AWS ENI mode, it's likely that 'aws-node' has + # had a chance to install SNAT iptables rules. These can result + # in dropped traffic, so we should attempt to remove them. + # We do it using a 'postStart' hook since this may need to run + # for nodes which might have already been init'ed but may still + # have dangling rules. This is safe because there are no + # dependencies on anything that is part of the startup script + # itself, and can be safely run multiple times per node (e.g. in + # case of a restart). + if [[ "$(iptables-save | grep -E -c 'AWS-SNAT-CHAIN|AWS-CONNMARK-CHAIN')" != "0" ]]; + then + echo 'Deleting iptables rules created by the AWS CNI VPC plugin' + iptables-save | grep -E -v 'AWS-SNAT-CHAIN|AWS-CONNMARK-CHAIN' | iptables-restore + fi + echo 'Done!' + preStop: exec: command: @@ -973,7 +1012,7 @@ spec: mountPath: /tmp initContainers: - name: config - image: "ghcr.io/cybozu/cilium:1.13.16.4" + image: "ghcr.io/cybozu/cilium:1.14.14.1" imagePullPolicy: IfNotPresent command: - cilium @@ -997,11 +1036,46 @@ spec: - name: tmp mountPath: /tmp terminationMessagePolicy: FallbackToLogsOnError + - name: apply-sysctl-overwrites + image: "ghcr.io/cybozu/cilium:1.14.14.1" + imagePullPolicy: IfNotPresent + env: + - name: BIN_PATH + value: /opt/cni/bin + command: + - sh + - -ec + # The statically linked Go program binary is invoked to avoid any + # dependency on utilities like sh that can be missing on certain + # distros installed on the underlying host. Copy the binary to the + # same directory where we install cilium cni plugin so that exec permissions + # are available. + - | + cp /usr/bin/cilium-sysctlfix /hostbin/cilium-sysctlfix; + nsenter --mount=/hostproc/1/ns/mnt "${BIN_PATH}/cilium-sysctlfix"; + rm /hostbin/cilium-sysctlfix + volumeMounts: + - name: hostproc + mountPath: /hostproc + - name: cni-path + mountPath: /hostbin + terminationMessagePolicy: FallbackToLogsOnError + securityContext: + seLinuxOptions: + level: s0 + type: spc_t + capabilities: + add: + - SYS_ADMIN + - SYS_CHROOT + - SYS_PTRACE + drop: + - ALL # Mount the bpf fs if it is not mounted. We will perform this task # from a privileged container because the mount propagation bidirectional # only works from privileged containers. - name: mount-bpf-fs - image: "ghcr.io/cybozu/cilium:1.13.16.4" + image: "ghcr.io/cybozu/cilium:1.14.14.1" imagePullPolicy: IfNotPresent args: - 'mount | grep "/sys/fs/bpf type bpf" || mount -t bpf bpf /sys/fs/bpf' @@ -1017,7 +1091,7 @@ spec: mountPath: /sys/fs/bpf mountPropagation: Bidirectional - name: clean-cilium-state - image: "ghcr.io/cybozu/cilium:1.13.16.4" + image: "ghcr.io/cybozu/cilium:1.14.14.1" imagePullPolicy: IfNotPresent command: - /init-container.sh @@ -1062,7 +1136,7 @@ spec: mountPath: /var/run/cilium # wait-for-kube-proxy # Install the CNI binaries in an InitContainer so we don't have a writable host mount in the agent - name: install-cni-binaries - image: "ghcr.io/cybozu/cilium:1.13.16.4" + image: "ghcr.io/cybozu/cilium:1.14.14.1" imagePullPolicy: IfNotPresent command: - "/install-plugin.sh" @@ -1113,6 +1187,11 @@ spec: hostPath: path: /sys/fs/bpf type: DirectoryOrCreate + # To mount cgroup2 filesystem on the host or apply sysctlfix + - name: hostproc + hostPath: + path: /proc + type: Directory # To keep state between restarts / upgrades for cgroup2 filesystem - name: cilium-cgroup hostPath: @@ -1139,11 +1218,27 @@ spec: type: FileOrCreate # To read the clustermesh configuration - name: clustermesh-secrets - secret: - secretName: cilium-clustermesh + projected: # note: the leading zero means this number is in octal representation: do not remove it defaultMode: 0400 - optional: true + sources: + - secret: + name: cilium-clustermesh + optional: true + # note: items are not explicitly listed here, since the entries of this secret + # depend on the peers configured, and that would cause a restart of all agents + # at every addition/removal. Leaving the field empty makes each secret entry + # to be automatically projected into the volume as a file whose name is the key. + - secret: + name: clustermesh-apiserver-remote-cert + optional: true + items: + - key: tls.key + path: common-etcd-client.key + - key: tls.crt + path: common-etcd-client.crt + - key: ca.crt + path: common-etcd-client-ca.crt - name: bgp-config-path configMap: name: bgp-config @@ -1164,12 +1259,12 @@ spec: name: hubble-server-certs optional: true items: - - key: ca.crt - path: client-ca.crt - key: tls.crt path: server.crt - key: tls.key path: server.key + - key: ca.crt + path: client-ca.crt --- # Source: cilium/templates/cilium-operator/deployment.yaml apiVersion: apps/v1 @@ -1190,16 +1285,20 @@ spec: matchLabels: io.cilium/app: operator name: cilium-operator + # ensure operator update on single node k8s clusters, by using rolling update with maxUnavailable=100% in case + # of one replica and no user configured Recreate strategy. + # otherwise an update might get stuck due to the default maxUnavailable=50% in combination with the + # podAntiAffinity which prevents deployments of multiple operator replicas on the same node. strategy: rollingUpdate: - maxSurge: 1 - maxUnavailable: 1 + maxSurge: 25% + maxUnavailable: 50% type: RollingUpdate template: metadata: annotations: # ensure pods roll when configmap updates - cilium.io/cilium-configmap-checksum: "89029740a4242a661efaec2ce058760e9d1e323c603534509c54902ef6891b1e" + cilium.io/cilium-configmap-checksum: "d4bf08bf4c6ee946280b8e7bcb2586f8833a3a3e46137f6979d77b3020e1f546" prometheus.io/port: "9963" prometheus.io/scrape: "true" labels: @@ -1210,7 +1309,7 @@ spec: spec: containers: - name: cilium-operator - image: "ghcr.io/cybozu/cilium-operator-generic:1.13.16.1" + image: "ghcr.io/cybozu/cilium-operator-generic:1.14.14.1" imagePullPolicy: IfNotPresent command: - cilium-operator-generic @@ -1252,6 +1351,16 @@ spec: initialDelaySeconds: 60 periodSeconds: 10 timeoutSeconds: 3 + readinessProbe: + httpGet: + host: "127.0.0.1" + path: /healthz + port: 9234 + scheme: HTTP + initialDelaySeconds: 0 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 5 volumeMounts: - name: cilium-config-path mountPath: /tmp/cilium/config-map @@ -1318,15 +1427,24 @@ spec: metadata: annotations: # ensure pods roll when configmap updates - cilium.io/hubble-relay-configmap-checksum: "121d3ca340f3623a68297728e72f60908cf197df412eb4bb266f449c1794a5a7" + cilium.io/hubble-relay-configmap-checksum: "021b54fa697399fbce31d464cf934ae4b921370cdcdcf3f98ca0a3d8a3201b76" labels: k8s-app: hubble-relay app.kubernetes.io/name: hubble-relay app.kubernetes.io/part-of: cilium spec: + securityContext: + fsGroup: 10000 containers: - name: hubble-relay - image: "ghcr.io/cybozu/hubble-relay:1.13.16.1" + securityContext: + capabilities: + drop: + - ALL + runAsGroup: 10000 + runAsNonRoot: true + runAsUser: 10000 + image: "ghcr.io/cybozu/hubble-relay:1.14.14.1" imagePullPolicy: IfNotPresent command: - hubble-relay @@ -1386,12 +1504,12 @@ spec: - secret: name: hubble-relay-client-certs items: - - key: ca.crt - path: hubble-server-ca.crt - key: tls.crt path: client.crt - key: tls.key path: client.key + - key: ca.crt + path: hubble-server-ca.crt - secret: name: hubble-relay-server-certs items: @@ -1400,49 +1518,6 @@ spec: - key: tls.key path: server.key --- -# Source: cilium/templates/hubble/tls-cronjob/job.yaml -apiVersion: batch/v1 -kind: Job -metadata: - name: hubble-generate-certs-4b23ed05ea - namespace: kube-system - labels: - k8s-app: hubble-generate-certs - app.kubernetes.io/name: hubble-generate-certs - app.kubernetes.io/part-of: cilium -spec: - template: - metadata: - labels: - k8s-app: hubble-generate-certs - spec: - containers: - - name: certgen - image: "ghcr.io/cybozu/cilium-certgen:0.1.11.1" - imagePullPolicy: IfNotPresent - command: - - "/usr/bin/cilium-certgen" - # Because this is executed as a job, we pass the values as command - # line args instead of via config map. This allows users to inspect - # the values used in past runs by inspecting the completed pod. - args: - - "--cilium-namespace=kube-system" - - "--ca-generate" - - "--ca-reuse-secret" - - "--hubble-server-cert-generate" - - "--hubble-server-cert-common-name=*.default.hubble-grpc.cilium.io" - - "--hubble-server-cert-validity-duration=94608000s" - - "--hubble-relay-client-cert-generate" - - "--hubble-relay-client-cert-validity-duration=94608000s" - - "--hubble-relay-server-cert-generate" - - "--hubble-relay-server-cert-validity-duration=94608000s" - hostNetwork: true - serviceAccount: "hubble-generate-certs" - serviceAccountName: "hubble-generate-certs" - automountServiceAccountToken: true - restartPolicy: OnFailure - ttlSecondsAfterFinished: 1800 ---- # Source: cilium/templates/hubble/tls-cronjob/cronjob.yaml apiVersion: batch/v1 kind: CronJob @@ -1453,6 +1528,7 @@ metadata: k8s-app: hubble-generate-certs app.kubernetes.io/name: hubble-generate-certs app.kubernetes.io/part-of: cilium + annotations: spec: schedule: "0 0 1 */4 *" concurrencyPolicy: Forbid @@ -1465,7 +1541,7 @@ spec: spec: containers: - name: certgen - image: "ghcr.io/cybozu/cilium-certgen:0.1.11.1" + image: "ghcr.io/cybozu/cilium-certgen:0.1.14.1" imagePullPolicy: IfNotPresent command: - "/usr/bin/cilium-certgen" @@ -1494,3 +1570,48 @@ spec: # Only create the namespace if it's different from Ingress secret namespace or Ingress is not enabled. # Only create the namespace if it's different from Ingress and Gateway API secret namespaces (if enabled). +--- +# Source: cilium/templates/hubble/tls-cronjob/job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: hubble-generate-certs + namespace: kube-system + labels: + k8s-app: hubble-generate-certs + app.kubernetes.io/name: hubble-generate-certs + app.kubernetes.io/part-of: cilium + annotations: + "helm.sh/hook": post-install,post-upgrade +spec: + template: + metadata: + labels: + k8s-app: hubble-generate-certs + spec: + containers: + - name: certgen + image: "ghcr.io/cybozu/cilium-certgen:0.1.14.1" + imagePullPolicy: IfNotPresent + command: + - "/usr/bin/cilium-certgen" + # Because this is executed as a job, we pass the values as command + # line args instead of via config map. This allows users to inspect + # the values used in past runs by inspecting the completed pod. + args: + - "--cilium-namespace=kube-system" + - "--ca-generate" + - "--ca-reuse-secret" + - "--hubble-server-cert-generate" + - "--hubble-server-cert-common-name=*.default.hubble-grpc.cilium.io" + - "--hubble-server-cert-validity-duration=94608000s" + - "--hubble-relay-client-cert-generate" + - "--hubble-relay-client-cert-validity-duration=94608000s" + - "--hubble-relay-server-cert-generate" + - "--hubble-relay-server-cert-validity-duration=94608000s" + hostNetwork: true + serviceAccount: "hubble-generate-certs" + serviceAccountName: "hubble-generate-certs" + automountServiceAccountToken: true + restartPolicy: OnFailure + ttlSecondsAfterFinished: 1800 diff --git a/cilium/pre/values.yaml b/cilium/pre/values.yaml index a03a95f9d..cb1f75dd7 100644 --- a/cilium/pre/values.yaml +++ b/cilium/pre/values.yaml @@ -38,6 +38,11 @@ hubble: requests: cpu: 210m memory: 120Mi + podSecurityContext: + fsGroup: 10000 + securityContext: + runAsUser: 10000 + runAsGroup: 10000 tls: auto: method: "cronJob" diff --git a/cilium/prod/upstream.yaml b/cilium/prod/upstream.yaml index c2c3b003c..509db46c4 100644 --- a/cilium/prod/upstream.yaml +++ b/cilium/prod/upstream.yaml @@ -51,8 +51,6 @@ data: cilium-endpoint-gc-interval: "5m0s" nodes-gc-interval: "5m0s" skip-cnp-status-startup-clean: "false" - # Disable the usage of CiliumEndpoint CRD - disable-endpoint-crd: "false" # To include or exclude matched resources from cilium identity evaluation labels: " k8s:app k8s:io\\.cilium\\.k8s\\.namespace\\.labels\\.team k8s:io\\.kubernetes\\.pod\\.namespace k8s:k8s-app io\\.cilium\\.k8s\\.policy cybozu\\.io/family app\\.cybozu\\.io neco\\.cybozu\\.io\\/registry identity\\.neco\\.cybozu\\.io " @@ -70,14 +68,14 @@ data: # NOTE that this will open the port on ALL nodes where Cilium pods are # scheduled. prometheus-serve-addr: ":9962" - # Port to expose Envoy metrics (e.g. "9964"). Envoy metrics listener will be disabled if this - # field is not set. - proxy-prometheus-port: "9964" # Metrics that should be enabled or disabled from the default metric # list. (+metric_foo to enable metric_foo , -metric_bar to disable # metric_bar). metrics: +cilium_bpf_map_pressure + # Port to expose Envoy metrics (e.g. "9964"). Envoy metrics listener will be disabled if this + # field is not set. + proxy-prometheus-port: "9964" # If you want metrics enabled in cilium-operator, set the port for # which the Cilium Operator will have their metrics exposed. # NOTE that this will open the port on the nodes where Cilium operator pod @@ -156,7 +154,7 @@ data: # - disabled # - vxlan (default) # - geneve - tunnel: "disabled" + routing-mode: "native" # Enables L7 proxy for L7 policy enforcement and visibility @@ -177,6 +175,7 @@ data: enable-local-node-route: "false" enable-ipv4-masquerade: "false" + enable-ipv4-big-tcp: "false" enable-ipv6-big-tcp: "false" enable-ipv6-masquerade: "true" @@ -209,7 +208,7 @@ data: enable-svc-source-range-check: "true" enable-l2-neigh-discovery: "true" arping-refresh-period: "30s" - cni-uninstall: "true" + enable-k8s-networkpolicy: "true" # Disable health checking, when chaining mode is not set to portmap or none enable-endpoint-health-checking: "false" enable-health-checking: "true" @@ -229,9 +228,12 @@ data: hubble-tls-key-file: /var/lib/cilium/tls/hubble/server.key hubble-tls-client-ca-files: /var/lib/cilium/tls/hubble/client-ca.crt ipam: "cluster-pool" + ipam-cilium-node-update-rate: "15s" cluster-pool-ipv4-cidr: "10.0.0.0/8" cluster-pool-ipv4-mask-size: "24" disable-cnp-status-updates: "true" + cnp-node-status-gc-interval: "0s" + egress-gateway-reconciliation-trigger-interval: "1s" enable-vtep: "false" vtep-endpoint: "" vtep-cidr: "" @@ -244,22 +246,37 @@ data: cgroup-root: "/sys/fs/cgroup" enable-k8s-terminating-endpoint: "true" enable-sctp: "false" + k8s-client-qps: "5" + k8s-client-burst: "10" remove-cilium-node-taints: "true" + set-cilium-node-taints: "true" set-cilium-is-up-condition: "true" unmanaged-pod-watcher-interval: "15" - # default DNS proxy to transparent mode - dnsproxy-enable-transparent-mode: "true" + dnsproxy-socket-linger-timeout: "10" tofqdns-dns-reject-response-code: "refused" tofqdns-enable-dns-compression: "true" tofqdns-endpoint-max-ip-per-hostname: "50" tofqdns-idle-connection-grace-period: "0s" tofqdns-max-deferred-connection-deletes: "10000" - tofqdns-min-ttl: "3600" tofqdns-proxy-response-max-delay: "100ms" bpf-ct-timeout-regular-any: 1h0m0s bpf-ct-timeout-service-any: 1h0m0s agent-not-ready-taint-key: "node.cilium.io/agent-not-ready" + + mesh-auth-enabled: "true" + mesh-auth-queue-size: "1024" + mesh-auth-rotated-identities-queue-size: "1024" + mesh-auth-gc-interval: "5m0s" + + proxy-xff-num-trusted-hops-ingress: "0" + proxy-xff-num-trusted-hops-egress: "0" + proxy-connect-timeout: "2" + proxy-max-requests-per-connection: "0" + proxy-max-connection-duration-seconds: "0" + proxy-idle-timeout-seconds: "60" + + external-envoy-proxy: "false" --- # Source: cilium/templates/hubble-relay/configmap.yaml apiVersion: v1 @@ -272,15 +289,17 @@ data: cluster-name: default peer-service: "hubble-peer.kube-system.svc.cluster.local:443" listen-address: :4245 + gops: true + gops-port: "9893" dial-timeout: retry-timeout: sort-buffer-len-max: sort-buffer-drain-timeout: - tls-client-cert-file: /var/lib/hubble-relay/tls/client.crt - tls-client-key-file: /var/lib/hubble-relay/tls/client.key + tls-hubble-client-cert-file: /var/lib/hubble-relay/tls/client.crt + tls-hubble-client-key-file: /var/lib/hubble-relay/tls/client.key tls-hubble-server-ca-files: /var/lib/hubble-relay/tls/hubble-server-ca.crt - tls-server-cert-file: /var/lib/hubble-relay/tls/server.crt - tls-server-key-file: /var/lib/hubble-relay/tls/server.key + tls-relay-server-cert-file: /var/lib/hubble-relay/tls/server.crt + tls-relay-server-key-file: /var/lib/hubble-relay/tls/server.key --- # Source: cilium/templates/cilium-agent/clusterrole.yaml apiVersion: rbac.authorization.k8s.io/v1 @@ -345,6 +364,9 @@ rules: - ciliumnetworkpolicies - ciliumnodes - ciliumnodeconfigs + - ciliumcidrgroups + - ciliuml2announcementpolicies + - ciliumpodippools verbs: - list - watch @@ -385,6 +407,7 @@ rules: - ciliumclusterwidenetworkpolicies/status - ciliumendpoints/status - ciliumendpoints + - ciliuml2announcementpolicies/status verbs: - patch --- @@ -560,14 +583,24 @@ rules: - ciliumnetworkpolicies.cilium.io - ciliumnodes.cilium.io - ciliumnodeconfigs.cilium.io + - ciliumcidrgroups.cilium.io + - ciliuml2announcementpolicies.cilium.io + - ciliumpodippools.cilium.io - apiGroups: - cilium.io resources: - ciliumloadbalancerippools + - ciliumpodippools verbs: - get - list - watch +- apiGroups: + - cilium.io + resources: + - ciliumpodippools + verbs: + - create - apiGroups: - cilium.io resources: @@ -619,7 +652,6 @@ rules: - secrets resourceNames: - cilium-ca - - hubble-ca-secret verbs: - get - update @@ -792,7 +824,7 @@ spec: prometheus.io/port: "9962" prometheus.io/scrape: "true" # ensure pods roll when configmap updates - cilium.io/cilium-configmap-checksum: "6ce5254ae5e45c178f019621aa0bca076d336d1231fd90ddb8df2f77e2ebc667" + cilium.io/cilium-configmap-checksum: "d5a6358f3358cdc61bf73eddd0be4f8a5b8909d0f95d0236cd095e308678a1a0" # Set app AppArmor's profile to "unconfined". The value of this annotation # can be modified as long users know which profiles they have available # in AppArmor. @@ -805,7 +837,7 @@ spec: spec: containers: - name: cilium-agent - image: "ghcr.io/cybozu/cilium:1.13.16.4" + image: "ghcr.io/cybozu/cilium:1.14.14.1" imagePullPolicy: IfNotPresent command: - cilium-agent @@ -862,18 +894,6 @@ spec: fieldPath: metadata.namespace - name: CILIUM_CLUSTERMESH_CONFIG value: /var/lib/cilium/clustermesh/ - - name: CILIUM_CNI_CHAINING_MODE - valueFrom: - configMapKeyRef: - name: cilium-config - key: cni-chaining-mode - optional: true - - name: CILIUM_CUSTOM_CNI_CONF - valueFrom: - configMapKeyRef: - name: cilium-config - key: custom-cni-conf - optional: true - name: KUBERNETES_SERVICE_HOST value: "127.0.0.1" - name: KUBERNETES_SERVICE_PORT @@ -885,7 +905,26 @@ spec: - "bash" - "-c" - | - /cni-install.sh --enable-debug=false --cni-exclusive=true --log-file=/var/run/cilium/cilium-cni.log + set -o errexit + set -o pipefail + set -o nounset + + # When running in AWS ENI mode, it's likely that 'aws-node' has + # had a chance to install SNAT iptables rules. These can result + # in dropped traffic, so we should attempt to remove them. + # We do it using a 'postStart' hook since this may need to run + # for nodes which might have already been init'ed but may still + # have dangling rules. This is safe because there are no + # dependencies on anything that is part of the startup script + # itself, and can be safely run multiple times per node (e.g. in + # case of a restart). + if [[ "$(iptables-save | grep -E -c 'AWS-SNAT-CHAIN|AWS-CONNMARK-CHAIN')" != "0" ]]; + then + echo 'Deleting iptables rules created by the AWS CNI VPC plugin' + iptables-save | grep -E -v 'AWS-SNAT-CHAIN|AWS-CONNMARK-CHAIN' | iptables-restore + fi + echo 'Done!' + preStop: exec: command: @@ -970,7 +1009,7 @@ spec: mountPath: /tmp initContainers: - name: config - image: "ghcr.io/cybozu/cilium:1.13.16.4" + image: "ghcr.io/cybozu/cilium:1.14.14.1" imagePullPolicy: IfNotPresent command: - cilium @@ -994,11 +1033,46 @@ spec: - name: tmp mountPath: /tmp terminationMessagePolicy: FallbackToLogsOnError + - name: apply-sysctl-overwrites + image: "ghcr.io/cybozu/cilium:1.14.14.1" + imagePullPolicy: IfNotPresent + env: + - name: BIN_PATH + value: /opt/cni/bin + command: + - sh + - -ec + # The statically linked Go program binary is invoked to avoid any + # dependency on utilities like sh that can be missing on certain + # distros installed on the underlying host. Copy the binary to the + # same directory where we install cilium cni plugin so that exec permissions + # are available. + - | + cp /usr/bin/cilium-sysctlfix /hostbin/cilium-sysctlfix; + nsenter --mount=/hostproc/1/ns/mnt "${BIN_PATH}/cilium-sysctlfix"; + rm /hostbin/cilium-sysctlfix + volumeMounts: + - name: hostproc + mountPath: /hostproc + - name: cni-path + mountPath: /hostbin + terminationMessagePolicy: FallbackToLogsOnError + securityContext: + seLinuxOptions: + level: s0 + type: spc_t + capabilities: + add: + - SYS_ADMIN + - SYS_CHROOT + - SYS_PTRACE + drop: + - ALL # Mount the bpf fs if it is not mounted. We will perform this task # from a privileged container because the mount propagation bidirectional # only works from privileged containers. - name: mount-bpf-fs - image: "ghcr.io/cybozu/cilium:1.13.16.4" + image: "ghcr.io/cybozu/cilium:1.14.14.1" imagePullPolicy: IfNotPresent args: - 'mount | grep "/sys/fs/bpf type bpf" || mount -t bpf bpf /sys/fs/bpf' @@ -1014,7 +1088,7 @@ spec: mountPath: /sys/fs/bpf mountPropagation: Bidirectional - name: clean-cilium-state - image: "ghcr.io/cybozu/cilium:1.13.16.4" + image: "ghcr.io/cybozu/cilium:1.14.14.1" imagePullPolicy: IfNotPresent command: - /init-container.sh @@ -1059,7 +1133,7 @@ spec: mountPath: /var/run/cilium # wait-for-kube-proxy # Install the CNI binaries in an InitContainer so we don't have a writable host mount in the agent - name: install-cni-binaries - image: "ghcr.io/cybozu/cilium:1.13.16.4" + image: "ghcr.io/cybozu/cilium:1.14.14.1" imagePullPolicy: IfNotPresent command: - "/install-plugin.sh" @@ -1110,6 +1184,11 @@ spec: hostPath: path: /sys/fs/bpf type: DirectoryOrCreate + # To mount cgroup2 filesystem on the host or apply sysctlfix + - name: hostproc + hostPath: + path: /proc + type: Directory # To keep state between restarts / upgrades for cgroup2 filesystem - name: cilium-cgroup hostPath: @@ -1136,11 +1215,27 @@ spec: type: FileOrCreate # To read the clustermesh configuration - name: clustermesh-secrets - secret: - secretName: cilium-clustermesh + projected: # note: the leading zero means this number is in octal representation: do not remove it defaultMode: 0400 - optional: true + sources: + - secret: + name: cilium-clustermesh + optional: true + # note: items are not explicitly listed here, since the entries of this secret + # depend on the peers configured, and that would cause a restart of all agents + # at every addition/removal. Leaving the field empty makes each secret entry + # to be automatically projected into the volume as a file whose name is the key. + - secret: + name: clustermesh-apiserver-remote-cert + optional: true + items: + - key: tls.key + path: common-etcd-client.key + - key: tls.crt + path: common-etcd-client.crt + - key: ca.crt + path: common-etcd-client-ca.crt - name: bgp-config-path configMap: name: bgp-config @@ -1161,12 +1256,12 @@ spec: name: hubble-server-certs optional: true items: - - key: ca.crt - path: client-ca.crt - key: tls.crt path: server.crt - key: tls.key path: server.key + - key: ca.crt + path: client-ca.crt --- # Source: cilium/templates/cilium-operator/deployment.yaml apiVersion: apps/v1 @@ -1187,16 +1282,20 @@ spec: matchLabels: io.cilium/app: operator name: cilium-operator + # ensure operator update on single node k8s clusters, by using rolling update with maxUnavailable=100% in case + # of one replica and no user configured Recreate strategy. + # otherwise an update might get stuck due to the default maxUnavailable=50% in combination with the + # podAntiAffinity which prevents deployments of multiple operator replicas on the same node. strategy: rollingUpdate: - maxSurge: 1 - maxUnavailable: 1 + maxSurge: 25% + maxUnavailable: 50% type: RollingUpdate template: metadata: annotations: # ensure pods roll when configmap updates - cilium.io/cilium-configmap-checksum: "6ce5254ae5e45c178f019621aa0bca076d336d1231fd90ddb8df2f77e2ebc667" + cilium.io/cilium-configmap-checksum: "d5a6358f3358cdc61bf73eddd0be4f8a5b8909d0f95d0236cd095e308678a1a0" prometheus.io/port: "9963" prometheus.io/scrape: "true" labels: @@ -1207,7 +1306,7 @@ spec: spec: containers: - name: cilium-operator - image: "ghcr.io/cybozu/cilium-operator-generic:1.13.16.1" + image: "ghcr.io/cybozu/cilium-operator-generic:1.14.14.1" imagePullPolicy: IfNotPresent command: - cilium-operator-generic @@ -1249,6 +1348,16 @@ spec: initialDelaySeconds: 60 periodSeconds: 10 timeoutSeconds: 3 + readinessProbe: + httpGet: + host: "127.0.0.1" + path: /healthz + port: 9234 + scheme: HTTP + initialDelaySeconds: 0 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 5 volumeMounts: - name: cilium-config-path mountPath: /tmp/cilium/config-map @@ -1312,15 +1421,24 @@ spec: metadata: annotations: # ensure pods roll when configmap updates - cilium.io/hubble-relay-configmap-checksum: "121d3ca340f3623a68297728e72f60908cf197df412eb4bb266f449c1794a5a7" + cilium.io/hubble-relay-configmap-checksum: "021b54fa697399fbce31d464cf934ae4b921370cdcdcf3f98ca0a3d8a3201b76" labels: k8s-app: hubble-relay app.kubernetes.io/name: hubble-relay app.kubernetes.io/part-of: cilium spec: + securityContext: + fsGroup: 10000 containers: - name: hubble-relay - image: "ghcr.io/cybozu/hubble-relay:1.13.16.1" + securityContext: + capabilities: + drop: + - ALL + runAsGroup: 10000 + runAsNonRoot: true + runAsUser: 10000 + image: "ghcr.io/cybozu/hubble-relay:1.14.14.1" imagePullPolicy: IfNotPresent command: - hubble-relay @@ -1377,12 +1495,12 @@ spec: - secret: name: hubble-relay-client-certs items: - - key: ca.crt - path: hubble-server-ca.crt - key: tls.crt path: client.crt - key: tls.key path: client.key + - key: ca.crt + path: hubble-server-ca.crt - secret: name: hubble-relay-server-certs items: @@ -1391,49 +1509,6 @@ spec: - key: tls.key path: server.key --- -# Source: cilium/templates/hubble/tls-cronjob/job.yaml -apiVersion: batch/v1 -kind: Job -metadata: - name: hubble-generate-certs-4b23ed05ea - namespace: kube-system - labels: - k8s-app: hubble-generate-certs - app.kubernetes.io/name: hubble-generate-certs - app.kubernetes.io/part-of: cilium -spec: - template: - metadata: - labels: - k8s-app: hubble-generate-certs - spec: - containers: - - name: certgen - image: "ghcr.io/cybozu/cilium-certgen:0.1.11.1" - imagePullPolicy: IfNotPresent - command: - - "/usr/bin/cilium-certgen" - # Because this is executed as a job, we pass the values as command - # line args instead of via config map. This allows users to inspect - # the values used in past runs by inspecting the completed pod. - args: - - "--cilium-namespace=kube-system" - - "--ca-generate" - - "--ca-reuse-secret" - - "--hubble-server-cert-generate" - - "--hubble-server-cert-common-name=*.default.hubble-grpc.cilium.io" - - "--hubble-server-cert-validity-duration=94608000s" - - "--hubble-relay-client-cert-generate" - - "--hubble-relay-client-cert-validity-duration=94608000s" - - "--hubble-relay-server-cert-generate" - - "--hubble-relay-server-cert-validity-duration=94608000s" - hostNetwork: true - serviceAccount: "hubble-generate-certs" - serviceAccountName: "hubble-generate-certs" - automountServiceAccountToken: true - restartPolicy: OnFailure - ttlSecondsAfterFinished: 1800 ---- # Source: cilium/templates/hubble/tls-cronjob/cronjob.yaml apiVersion: batch/v1 kind: CronJob @@ -1444,6 +1519,7 @@ metadata: k8s-app: hubble-generate-certs app.kubernetes.io/name: hubble-generate-certs app.kubernetes.io/part-of: cilium + annotations: spec: schedule: "0 0 1 */4 *" concurrencyPolicy: Forbid @@ -1456,7 +1532,7 @@ spec: spec: containers: - name: certgen - image: "ghcr.io/cybozu/cilium-certgen:0.1.11.1" + image: "ghcr.io/cybozu/cilium-certgen:0.1.14.1" imagePullPolicy: IfNotPresent command: - "/usr/bin/cilium-certgen" @@ -1485,3 +1561,48 @@ spec: # Only create the namespace if it's different from Ingress secret namespace or Ingress is not enabled. # Only create the namespace if it's different from Ingress and Gateway API secret namespaces (if enabled). +--- +# Source: cilium/templates/hubble/tls-cronjob/job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: hubble-generate-certs + namespace: kube-system + labels: + k8s-app: hubble-generate-certs + app.kubernetes.io/name: hubble-generate-certs + app.kubernetes.io/part-of: cilium + annotations: + "helm.sh/hook": post-install,post-upgrade +spec: + template: + metadata: + labels: + k8s-app: hubble-generate-certs + spec: + containers: + - name: certgen + image: "ghcr.io/cybozu/cilium-certgen:0.1.14.1" + imagePullPolicy: IfNotPresent + command: + - "/usr/bin/cilium-certgen" + # Because this is executed as a job, we pass the values as command + # line args instead of via config map. This allows users to inspect + # the values used in past runs by inspecting the completed pod. + args: + - "--cilium-namespace=kube-system" + - "--ca-generate" + - "--ca-reuse-secret" + - "--hubble-server-cert-generate" + - "--hubble-server-cert-common-name=*.default.hubble-grpc.cilium.io" + - "--hubble-server-cert-validity-duration=94608000s" + - "--hubble-relay-client-cert-generate" + - "--hubble-relay-client-cert-validity-duration=94608000s" + - "--hubble-relay-server-cert-generate" + - "--hubble-relay-server-cert-validity-duration=94608000s" + hostNetwork: true + serviceAccount: "hubble-generate-certs" + serviceAccountName: "hubble-generate-certs" + automountServiceAccountToken: true + restartPolicy: OnFailure + ttlSecondsAfterFinished: 1800 diff --git a/cilium/prod/values.yaml b/cilium/prod/values.yaml index ad673a00c..19f53eede 100644 --- a/cilium/prod/values.yaml +++ b/cilium/prod/values.yaml @@ -35,6 +35,11 @@ hubble: requests: cpu: 100m memory: 200Mi + podSecurityContext: + fsGroup: 10000 + securityContext: + runAsUser: 10000 + runAsGroup: 10000 tls: auto: method: "cronJob" diff --git a/etc/cilium-pre.yaml b/etc/cilium-pre.yaml index ae02b6925..41b209bd9 100644 --- a/etc/cilium-pre.yaml +++ b/etc/cilium-pre.yaml @@ -91,6 +91,9 @@ rules: - ciliumexternalworkloads - ciliumexternalworkloads/finalizers - ciliumexternalworkloads/status + - ciliumcidrgroups + - ciliumcidrgroups/finalizers + - ciliumcidrgroups/status verbs: - get - list @@ -155,6 +158,9 @@ rules: - ciliumnetworkpolicies - ciliumnodes - ciliumnodeconfigs + - ciliumcidrgroups + - ciliuml2announcementpolicies + - ciliumpodippools verbs: - list - watch @@ -194,6 +200,7 @@ rules: - ciliumclusterwidenetworkpolicies/status - ciliumendpoints/status - ciliumendpoints + - ciliuml2announcementpolicies/status verbs: - patch --- @@ -351,6 +358,9 @@ rules: - ciliumnetworkpolicies.cilium.io - ciliumnodes.cilium.io - ciliumnodeconfigs.cilium.io + - ciliumcidrgroups.cilium.io + - ciliuml2announcementpolicies.cilium.io + - ciliumpodippools.cilium.io resources: - customresourcedefinitions verbs: @@ -359,10 +369,17 @@ rules: - cilium.io resources: - ciliumloadbalancerippools + - ciliumpodippools verbs: - get - list - watch +- apiGroups: + - cilium.io + resources: + - ciliumpodippools + verbs: + - create - apiGroups: - cilium.io resources: @@ -405,7 +422,6 @@ rules: - "" resourceNames: - cilium-ca - - hubble-ca-secret resources: - secrets verbs: @@ -538,15 +554,15 @@ data: cluster-pool-ipv4-cidr: 10.0.0.0/8 cluster-pool-ipv4-mask-size: "24" cni-chaining-mode: generic-veth - cni-uninstall: "true" + cnp-node-status-gc-interval: 0s custom-cni-conf: "true" debug: "false" debug-verbose: "" devices: eth+ eno1+ eno2+ direct-routing-device: e+ disable-cnp-status-updates: "true" - disable-endpoint-crd: "false" - dnsproxy-enable-transparent-mode: "true" + dnsproxy-socket-linger-timeout: "10" + egress-gateway-reconciliation-trigger-interval: 1s enable-auto-protect-node-port-range: "true" enable-bgp-control-plane: "false" enable-bpf-clock-probe: "false" @@ -559,10 +575,12 @@ data: enable-hubble: "true" enable-identity-mark: "false" enable-ipv4: "true" + enable-ipv4-big-tcp: "false" enable-ipv4-masquerade: "false" enable-ipv6: "false" enable-ipv6-big-tcp: "false" enable-ipv6-masquerade: "true" + enable-k8s-networkpolicy: "true" enable-k8s-terminating-endpoint: "true" enable-l2-neigh-discovery: "true" enable-l7-proxy: "true" @@ -578,6 +596,7 @@ data: enable-vtep: "false" enable-well-known-identities: "false" enable-xt-socket-fallback: "true" + external-envoy-proxy: "false" hubble-disable-tls: "false" hubble-listen-address: :4244 hubble-socket-path: /var/run/cilium/hubble.sock @@ -589,11 +608,18 @@ data: identity-heartbeat-timeout: 30m0s install-no-conntrack-iptables-rules: "false" ipam: cluster-pool + ipam-cilium-node-update-rate: 15s + k8s-client-burst: "10" + k8s-client-qps: "5" kube-proxy-replacement: partial kube-proxy-replacement-healthz-bind-address: "" labels: ' k8s:app k8s:io\.cilium\.k8s\.namespace\.labels\.team k8s:io\.kubernetes\.pod\.namespace k8s:k8s-app io\.cilium\.k8s\.policy cybozu\.io/family app\.cybozu\.io neco\.cybozu\.io\/registry identity\.neco\.cybozu\.io ' + mesh-auth-enabled: "true" + mesh-auth-gc-interval: 5m0s + mesh-auth-queue-size: "1024" + mesh-auth-rotated-identities-queue-size: "1024" metrics: +cilium_bpf_map_pressure monitor-aggregation: medium monitor-aggregation-flags: all @@ -609,9 +635,17 @@ data: preallocate-bpf-maps: "false" procfs: /host/proc prometheus-serve-addr: :9962 + proxy-connect-timeout: "2" + proxy-idle-timeout-seconds: "60" + proxy-max-connection-duration-seconds: "0" + proxy-max-requests-per-connection: "0" proxy-prometheus-port: "9964" + proxy-xff-num-trusted-hops-egress: "0" + proxy-xff-num-trusted-hops-ingress: "0" remove-cilium-node-taints: "true" + routing-mode: native set-cilium-is-up-condition: "true" + set-cilium-node-taints: "true" sidecar-istio-proxy-image: cilium/istio_proxy skip-cnp-status-startup-clean: "false" synchronize-k8s-nodes: "true" @@ -620,9 +654,7 @@ data: tofqdns-endpoint-max-ip-per-hostname: "50" tofqdns-idle-connection-grace-period: 0s tofqdns-max-deferred-connection-deletes: "10000" - tofqdns-min-ttl: "3600" tofqdns-proxy-response-max-delay: 100ms - tunnel: disabled unmanaged-pod-watcher-interval: "15" vtep-cidr: "" vtep-endpoint: "" @@ -636,10 +668,10 @@ metadata: apiVersion: v1 data: config.yaml: "cluster-name: default\npeer-service: \"hubble-peer.kube-system.svc.cluster.local:443\"\nlisten-address: - :4245\ndial-timeout: \nretry-timeout: \nsort-buffer-len-max: \nsort-buffer-drain-timeout: - \ntls-client-cert-file: /var/lib/hubble-relay/tls/client.crt\ntls-client-key-file: - /var/lib/hubble-relay/tls/client.key\ntls-hubble-server-ca-files: /var/lib/hubble-relay/tls/hubble-server-ca.crt\ntls-server-cert-file: - /var/lib/hubble-relay/tls/server.crt\ntls-server-key-file: /var/lib/hubble-relay/tls/server.key\n" + :4245\ngops: true\ngops-port: \"9893\"\ndial-timeout: \nretry-timeout: \nsort-buffer-len-max: + \nsort-buffer-drain-timeout: \ntls-hubble-client-cert-file: /var/lib/hubble-relay/tls/client.crt\ntls-hubble-client-key-file: + /var/lib/hubble-relay/tls/client.key\ntls-hubble-server-ca-files: /var/lib/hubble-relay/tls/hubble-server-ca.crt\ntls-relay-server-cert-file: + /var/lib/hubble-relay/tls/server.crt\ntls-relay-server-key-file: /var/lib/hubble-relay/tls/server.key\n" kind: ConfigMap metadata: name: hubble-relay-config @@ -725,13 +757,13 @@ spec: name: cilium-operator strategy: rollingUpdate: - maxSurge: 1 - maxUnavailable: 1 + maxSurge: 25% + maxUnavailable: 50% type: RollingUpdate template: metadata: annotations: - cilium.io/cilium-configmap-checksum: 89029740a4242a661efaec2ce058760e9d1e323c603534509c54902ef6891b1e + cilium.io/cilium-configmap-checksum: d4bf08bf4c6ee946280b8e7bcb2586f8833a3a3e46137f6979d77b3020e1f546 prometheus.io/port: "9963" prometheus.io/scrape: "true" labels: @@ -775,7 +807,7 @@ spec: value: 127.0.0.1 - name: KUBERNETES_SERVICE_PORT value: "16443" - image: ghcr.io/cybozu/cilium-operator-generic:1.13.16.1 + image: ghcr.io/cybozu/cilium-operator-generic:1.14.14.1 imagePullPolicy: IfNotPresent livenessProbe: httpGet: @@ -792,6 +824,16 @@ spec: hostPort: 9963 name: prometheus protocol: TCP + readinessProbe: + failureThreshold: 5 + httpGet: + host: 127.0.0.1 + path: /healthz + port: 9234 + scheme: HTTP + initialDelaySeconds: 0 + periodSeconds: 5 + timeoutSeconds: 3 resources: limits: cpu: 250m @@ -845,7 +887,7 @@ spec: template: metadata: annotations: - cilium.io/hubble-relay-configmap-checksum: 121d3ca340f3623a68297728e72f60908cf197df412eb4bb266f449c1794a5a7 + cilium.io/hubble-relay-configmap-checksum: 021b54fa697399fbce31d464cf934ae4b921370cdcdcf3f98ca0a3d8a3201b76 labels: app.kubernetes.io/name: hubble-relay app.kubernetes.io/part-of: cilium @@ -864,7 +906,7 @@ spec: - serve command: - hubble-relay - image: ghcr.io/cybozu/hubble-relay:1.13.16.1 + image: ghcr.io/cybozu/hubble-relay:1.14.14.1 imagePullPolicy: IfNotPresent livenessProbe: tcpSocket: @@ -883,6 +925,13 @@ spec: requests: cpu: 210m memory: 120Mi + securityContext: + capabilities: + drop: + - ALL + runAsGroup: 10000 + runAsNonRoot: true + runAsUser: 10000 terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /etc/hubble-relay @@ -895,6 +944,8 @@ spec: kubernetes.io/os: linux priorityClassName: null restartPolicy: Always + securityContext: + fsGroup: 10000 serviceAccount: hubble-relay serviceAccountName: hubble-relay terminationGracePeriodSeconds: 1 @@ -911,12 +962,12 @@ spec: sources: - secret: items: - - key: ca.crt - path: hubble-server-ca.crt - key: tls.crt path: client.crt - key: tls.key path: client.key + - key: ca.crt + path: hubble-server-ca.crt name: hubble-relay-client-certs - secret: items: @@ -959,7 +1010,7 @@ spec: - --hubble-relay-server-cert-validity-duration=94608000s command: - /usr/bin/cilium-certgen - image: ghcr.io/cybozu/cilium-certgen:0.1.11.1 + image: ghcr.io/cybozu/cilium-certgen:0.1.14.1 imagePullPolicy: IfNotPresent name: certgen hostNetwork: true @@ -1002,7 +1053,7 @@ spec: template: metadata: annotations: - cilium.io/cilium-configmap-checksum: 89029740a4242a661efaec2ce058760e9d1e323c603534509c54902ef6891b1e + cilium.io/cilium-configmap-checksum: d4bf08bf4c6ee946280b8e7bcb2586f8833a3a3e46137f6979d77b3020e1f546 container.apparmor.security.beta.kubernetes.io/cilium-agent: unconfined container.apparmor.security.beta.kubernetes.io/clean-cilium-state: unconfined prometheus.io/port: "9962" @@ -1038,23 +1089,11 @@ spec: fieldPath: metadata.namespace - name: CILIUM_CLUSTERMESH_CONFIG value: /var/lib/cilium/clustermesh/ - - name: CILIUM_CNI_CHAINING_MODE - valueFrom: - configMapKeyRef: - key: cni-chaining-mode - name: cilium-config - optional: true - - name: CILIUM_CUSTOM_CNI_CONF - valueFrom: - configMapKeyRef: - key: custom-cni-conf - name: cilium-config - optional: true - name: KUBERNETES_SERVICE_HOST value: 127.0.0.1 - name: KUBERNETES_SERVICE_PORT value: "16443" - image: ghcr.io/cybozu/cilium:1.13.16.4 + image: ghcr.io/cybozu/cilium:1.14.14.1 imagePullPolicy: IfNotPresent lifecycle: postStart: @@ -1063,7 +1102,25 @@ spec: - bash - -c - | - /cni-install.sh --enable-debug=false --cni-exclusive=true --log-file=/var/run/cilium/cilium-cni.log + set -o errexit + set -o pipefail + set -o nounset + + # When running in AWS ENI mode, it's likely that 'aws-node' has + # had a chance to install SNAT iptables rules. These can result + # in dropped traffic, so we should attempt to remove them. + # We do it using a 'postStart' hook since this may need to run + # for nodes which might have already been init'ed but may still + # have dangling rules. This is safe because there are no + # dependencies on anything that is part of the startup script + # itself, and can be safely run multiple times per node (e.g. in + # case of a restart). + if [[ "$(iptables-save | grep -E -c 'AWS-SNAT-CHAIN|AWS-CONNMARK-CHAIN')" != "0" ]]; + then + echo 'Deleting iptables rules created by the AWS CNI VPC plugin' + iptables-save | grep -E -v 'AWS-SNAT-CHAIN|AWS-CONNMARK-CHAIN' | iptables-restore + fi + echo 'Done!' preStop: exec: command: @@ -1195,20 +1252,50 @@ spec: value: 127.0.0.1 - name: KUBERNETES_SERVICE_PORT value: "16443" - image: ghcr.io/cybozu/cilium:1.13.16.4 + image: ghcr.io/cybozu/cilium:1.14.14.1 imagePullPolicy: IfNotPresent name: config terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /tmp name: tmp + - command: + - sh + - -ec + - | + cp /usr/bin/cilium-sysctlfix /hostbin/cilium-sysctlfix; + nsenter --mount=/hostproc/1/ns/mnt "${BIN_PATH}/cilium-sysctlfix"; + rm /hostbin/cilium-sysctlfix + env: + - name: BIN_PATH + value: /opt/cni/bin + image: ghcr.io/cybozu/cilium:1.14.14.1 + imagePullPolicy: IfNotPresent + name: apply-sysctl-overwrites + securityContext: + capabilities: + add: + - SYS_ADMIN + - SYS_CHROOT + - SYS_PTRACE + drop: + - ALL + seLinuxOptions: + level: s0 + type: spc_t + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /hostproc + name: hostproc + - mountPath: /hostbin + name: cni-path - args: - mount | grep "/sys/fs/bpf type bpf" || mount -t bpf bpf /sys/fs/bpf command: - /bin/bash - -c - -- - image: ghcr.io/cybozu/cilium:1.13.16.4 + image: ghcr.io/cybozu/cilium:1.14.14.1 imagePullPolicy: IfNotPresent name: mount-bpf-fs securityContext: @@ -1237,7 +1324,7 @@ spec: value: 127.0.0.1 - name: KUBERNETES_SERVICE_PORT value: "16443" - image: ghcr.io/cybozu/cilium:1.13.16.4 + image: ghcr.io/cybozu/cilium:1.14.14.1 imagePullPolicy: IfNotPresent name: clean-cilium-state securityContext: @@ -1263,7 +1350,7 @@ spec: name: cilium-run - command: - /install-plugin.sh - image: ghcr.io/cybozu/cilium:1.13.16.4 + image: ghcr.io/cybozu/cilium:1.14.14.1 imagePullPolicy: IfNotPresent name: install-cni-binaries resources: @@ -1301,6 +1388,10 @@ spec: path: /sys/fs/bpf type: DirectoryOrCreate name: bpf-maps + - hostPath: + path: /proc + type: Directory + name: hostproc - hostPath: path: /sys/fs/cgroup type: DirectoryOrCreate @@ -1321,10 +1412,22 @@ spec: type: FileOrCreate name: xtables-lock - name: clustermesh-secrets - secret: + projected: defaultMode: 256 - optional: true - secretName: cilium-clustermesh + sources: + - secret: + name: cilium-clustermesh + optional: true + - secret: + items: + - key: tls.key + path: common-etcd-client.key + - key: tls.crt + path: common-etcd-client.crt + - key: ca.crt + path: common-etcd-client-ca.crt + name: clustermesh-apiserver-remote-cert + optional: true - configMap: name: bgp-config name: bgp-config-path @@ -1342,12 +1445,12 @@ spec: sources: - secret: items: - - key: ca.crt - path: client-ca.crt - key: tls.crt path: server.crt - key: tls.key path: server.key + - key: ca.crt + path: client-ca.crt name: hubble-server-certs optional: true updateStrategy: @@ -1356,11 +1459,13 @@ spec: apiVersion: batch/v1 kind: Job metadata: + annotations: + helm.sh/hook: post-install,post-upgrade labels: app.kubernetes.io/name: hubble-generate-certs app.kubernetes.io/part-of: cilium k8s-app: hubble-generate-certs - name: hubble-generate-certs-4b23ed05ea + name: hubble-generate-certs namespace: kube-system spec: template: @@ -1383,7 +1488,7 @@ spec: - --hubble-relay-server-cert-validity-duration=94608000s command: - /usr/bin/cilium-certgen - image: ghcr.io/cybozu/cilium-certgen:0.1.11.1 + image: ghcr.io/cybozu/cilium-certgen:0.1.14.1 imagePullPolicy: IfNotPresent name: certgen hostNetwork: true diff --git a/etc/cilium.yaml b/etc/cilium.yaml index 21e735e39..05f6b4ca4 100644 --- a/etc/cilium.yaml +++ b/etc/cilium.yaml @@ -91,6 +91,9 @@ rules: - ciliumexternalworkloads - ciliumexternalworkloads/finalizers - ciliumexternalworkloads/status + - ciliumcidrgroups + - ciliumcidrgroups/finalizers + - ciliumcidrgroups/status verbs: - get - list @@ -155,6 +158,9 @@ rules: - ciliumnetworkpolicies - ciliumnodes - ciliumnodeconfigs + - ciliumcidrgroups + - ciliuml2announcementpolicies + - ciliumpodippools verbs: - list - watch @@ -194,6 +200,7 @@ rules: - ciliumclusterwidenetworkpolicies/status - ciliumendpoints/status - ciliumendpoints + - ciliuml2announcementpolicies/status verbs: - patch --- @@ -351,6 +358,9 @@ rules: - ciliumnetworkpolicies.cilium.io - ciliumnodes.cilium.io - ciliumnodeconfigs.cilium.io + - ciliumcidrgroups.cilium.io + - ciliuml2announcementpolicies.cilium.io + - ciliumpodippools.cilium.io resources: - customresourcedefinitions verbs: @@ -359,10 +369,17 @@ rules: - cilium.io resources: - ciliumloadbalancerippools + - ciliumpodippools verbs: - get - list - watch +- apiGroups: + - cilium.io + resources: + - ciliumpodippools + verbs: + - create - apiGroups: - cilium.io resources: @@ -405,7 +422,6 @@ rules: - "" resourceNames: - cilium-ca - - hubble-ca-secret resources: - secrets verbs: @@ -538,15 +554,15 @@ data: cluster-pool-ipv4-cidr: 10.0.0.0/8 cluster-pool-ipv4-mask-size: "24" cni-chaining-mode: generic-veth - cni-uninstall: "true" + cnp-node-status-gc-interval: 0s custom-cni-conf: "true" debug: "false" debug-verbose: "" devices: eth+ eno1+ eno2+ direct-routing-device: e+ disable-cnp-status-updates: "true" - disable-endpoint-crd: "false" - dnsproxy-enable-transparent-mode: "true" + dnsproxy-socket-linger-timeout: "10" + egress-gateway-reconciliation-trigger-interval: 1s enable-auto-protect-node-port-range: "true" enable-bgp-control-plane: "false" enable-bpf-clock-probe: "false" @@ -559,10 +575,12 @@ data: enable-hubble: "true" enable-identity-mark: "false" enable-ipv4: "true" + enable-ipv4-big-tcp: "false" enable-ipv4-masquerade: "false" enable-ipv6: "false" enable-ipv6-big-tcp: "false" enable-ipv6-masquerade: "true" + enable-k8s-networkpolicy: "true" enable-k8s-terminating-endpoint: "true" enable-l2-neigh-discovery: "true" enable-l7-proxy: "true" @@ -578,6 +596,7 @@ data: enable-vtep: "false" enable-well-known-identities: "false" enable-xt-socket-fallback: "true" + external-envoy-proxy: "false" hubble-disable-tls: "false" hubble-listen-address: :4244 hubble-socket-path: /var/run/cilium/hubble.sock @@ -589,11 +608,18 @@ data: identity-heartbeat-timeout: 30m0s install-no-conntrack-iptables-rules: "false" ipam: cluster-pool + ipam-cilium-node-update-rate: 15s + k8s-client-burst: "10" + k8s-client-qps: "5" kube-proxy-replacement: partial kube-proxy-replacement-healthz-bind-address: "" labels: ' k8s:app k8s:io\.cilium\.k8s\.namespace\.labels\.team k8s:io\.kubernetes\.pod\.namespace k8s:k8s-app io\.cilium\.k8s\.policy cybozu\.io/family app\.cybozu\.io neco\.cybozu\.io\/registry identity\.neco\.cybozu\.io ' + mesh-auth-enabled: "true" + mesh-auth-gc-interval: 5m0s + mesh-auth-queue-size: "1024" + mesh-auth-rotated-identities-queue-size: "1024" metrics: +cilium_bpf_map_pressure monitor-aggregation: medium monitor-aggregation-flags: all @@ -606,9 +632,17 @@ data: preallocate-bpf-maps: "false" procfs: /host/proc prometheus-serve-addr: :9962 + proxy-connect-timeout: "2" + proxy-idle-timeout-seconds: "60" + proxy-max-connection-duration-seconds: "0" + proxy-max-requests-per-connection: "0" proxy-prometheus-port: "9964" + proxy-xff-num-trusted-hops-egress: "0" + proxy-xff-num-trusted-hops-ingress: "0" remove-cilium-node-taints: "true" + routing-mode: native set-cilium-is-up-condition: "true" + set-cilium-node-taints: "true" sidecar-istio-proxy-image: cilium/istio_proxy skip-cnp-status-startup-clean: "false" synchronize-k8s-nodes: "true" @@ -617,9 +651,7 @@ data: tofqdns-endpoint-max-ip-per-hostname: "50" tofqdns-idle-connection-grace-period: 0s tofqdns-max-deferred-connection-deletes: "10000" - tofqdns-min-ttl: "3600" tofqdns-proxy-response-max-delay: 100ms - tunnel: disabled unmanaged-pod-watcher-interval: "15" vtep-cidr: "" vtep-endpoint: "" @@ -633,10 +665,10 @@ metadata: apiVersion: v1 data: config.yaml: "cluster-name: default\npeer-service: \"hubble-peer.kube-system.svc.cluster.local:443\"\nlisten-address: - :4245\ndial-timeout: \nretry-timeout: \nsort-buffer-len-max: \nsort-buffer-drain-timeout: - \ntls-client-cert-file: /var/lib/hubble-relay/tls/client.crt\ntls-client-key-file: - /var/lib/hubble-relay/tls/client.key\ntls-hubble-server-ca-files: /var/lib/hubble-relay/tls/hubble-server-ca.crt\ntls-server-cert-file: - /var/lib/hubble-relay/tls/server.crt\ntls-server-key-file: /var/lib/hubble-relay/tls/server.key\n" + :4245\ngops: true\ngops-port: \"9893\"\ndial-timeout: \nretry-timeout: \nsort-buffer-len-max: + \nsort-buffer-drain-timeout: \ntls-hubble-client-cert-file: /var/lib/hubble-relay/tls/client.crt\ntls-hubble-client-key-file: + /var/lib/hubble-relay/tls/client.key\ntls-hubble-server-ca-files: /var/lib/hubble-relay/tls/hubble-server-ca.crt\ntls-relay-server-cert-file: + /var/lib/hubble-relay/tls/server.crt\ntls-relay-server-key-file: /var/lib/hubble-relay/tls/server.key\n" kind: ConfigMap metadata: name: hubble-relay-config @@ -722,13 +754,13 @@ spec: name: cilium-operator strategy: rollingUpdate: - maxSurge: 1 - maxUnavailable: 1 + maxSurge: 25% + maxUnavailable: 50% type: RollingUpdate template: metadata: annotations: - cilium.io/cilium-configmap-checksum: 6ce5254ae5e45c178f019621aa0bca076d336d1231fd90ddb8df2f77e2ebc667 + cilium.io/cilium-configmap-checksum: d5a6358f3358cdc61bf73eddd0be4f8a5b8909d0f95d0236cd095e308678a1a0 prometheus.io/port: "9963" prometheus.io/scrape: "true" labels: @@ -772,7 +804,7 @@ spec: value: 127.0.0.1 - name: KUBERNETES_SERVICE_PORT value: "16443" - image: ghcr.io/cybozu/cilium-operator-generic:1.13.16.1 + image: ghcr.io/cybozu/cilium-operator-generic:1.14.14.1 imagePullPolicy: IfNotPresent livenessProbe: httpGet: @@ -789,6 +821,16 @@ spec: hostPort: 9963 name: prometheus protocol: TCP + readinessProbe: + failureThreshold: 5 + httpGet: + host: 127.0.0.1 + path: /healthz + port: 9234 + scheme: HTTP + initialDelaySeconds: 0 + periodSeconds: 5 + timeoutSeconds: 3 resources: requests: cpu: 100m @@ -839,7 +881,7 @@ spec: template: metadata: annotations: - cilium.io/hubble-relay-configmap-checksum: 121d3ca340f3623a68297728e72f60908cf197df412eb4bb266f449c1794a5a7 + cilium.io/hubble-relay-configmap-checksum: 021b54fa697399fbce31d464cf934ae4b921370cdcdcf3f98ca0a3d8a3201b76 labels: app.kubernetes.io/name: hubble-relay app.kubernetes.io/part-of: cilium @@ -858,7 +900,7 @@ spec: - serve command: - hubble-relay - image: ghcr.io/cybozu/hubble-relay:1.13.16.1 + image: ghcr.io/cybozu/hubble-relay:1.14.14.1 imagePullPolicy: IfNotPresent livenessProbe: tcpSocket: @@ -874,6 +916,13 @@ spec: requests: cpu: 100m memory: 200Mi + securityContext: + capabilities: + drop: + - ALL + runAsGroup: 10000 + runAsNonRoot: true + runAsUser: 10000 terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /etc/hubble-relay @@ -886,6 +935,8 @@ spec: kubernetes.io/os: linux priorityClassName: null restartPolicy: Always + securityContext: + fsGroup: 10000 serviceAccount: hubble-relay serviceAccountName: hubble-relay terminationGracePeriodSeconds: 1 @@ -902,12 +953,12 @@ spec: sources: - secret: items: - - key: ca.crt - path: hubble-server-ca.crt - key: tls.crt path: client.crt - key: tls.key path: client.key + - key: ca.crt + path: hubble-server-ca.crt name: hubble-relay-client-certs - secret: items: @@ -950,7 +1001,7 @@ spec: - --hubble-relay-server-cert-validity-duration=94608000s command: - /usr/bin/cilium-certgen - image: ghcr.io/cybozu/cilium-certgen:0.1.11.1 + image: ghcr.io/cybozu/cilium-certgen:0.1.14.1 imagePullPolicy: IfNotPresent name: certgen hostNetwork: true @@ -993,7 +1044,7 @@ spec: template: metadata: annotations: - cilium.io/cilium-configmap-checksum: 6ce5254ae5e45c178f019621aa0bca076d336d1231fd90ddb8df2f77e2ebc667 + cilium.io/cilium-configmap-checksum: d5a6358f3358cdc61bf73eddd0be4f8a5b8909d0f95d0236cd095e308678a1a0 container.apparmor.security.beta.kubernetes.io/cilium-agent: unconfined container.apparmor.security.beta.kubernetes.io/clean-cilium-state: unconfined prometheus.io/port: "9962" @@ -1029,23 +1080,11 @@ spec: fieldPath: metadata.namespace - name: CILIUM_CLUSTERMESH_CONFIG value: /var/lib/cilium/clustermesh/ - - name: CILIUM_CNI_CHAINING_MODE - valueFrom: - configMapKeyRef: - key: cni-chaining-mode - name: cilium-config - optional: true - - name: CILIUM_CUSTOM_CNI_CONF - valueFrom: - configMapKeyRef: - key: custom-cni-conf - name: cilium-config - optional: true - name: KUBERNETES_SERVICE_HOST value: 127.0.0.1 - name: KUBERNETES_SERVICE_PORT value: "16443" - image: ghcr.io/cybozu/cilium:1.13.16.4 + image: ghcr.io/cybozu/cilium:1.14.14.1 imagePullPolicy: IfNotPresent lifecycle: postStart: @@ -1054,7 +1093,25 @@ spec: - bash - -c - | - /cni-install.sh --enable-debug=false --cni-exclusive=true --log-file=/var/run/cilium/cilium-cni.log + set -o errexit + set -o pipefail + set -o nounset + + # When running in AWS ENI mode, it's likely that 'aws-node' has + # had a chance to install SNAT iptables rules. These can result + # in dropped traffic, so we should attempt to remove them. + # We do it using a 'postStart' hook since this may need to run + # for nodes which might have already been init'ed but may still + # have dangling rules. This is safe because there are no + # dependencies on anything that is part of the startup script + # itself, and can be safely run multiple times per node (e.g. in + # case of a restart). + if [[ "$(iptables-save | grep -E -c 'AWS-SNAT-CHAIN|AWS-CONNMARK-CHAIN')" != "0" ]]; + then + echo 'Deleting iptables rules created by the AWS CNI VPC plugin' + iptables-save | grep -E -v 'AWS-SNAT-CHAIN|AWS-CONNMARK-CHAIN' | iptables-restore + fi + echo 'Done!' preStop: exec: command: @@ -1186,20 +1243,50 @@ spec: value: 127.0.0.1 - name: KUBERNETES_SERVICE_PORT value: "16443" - image: ghcr.io/cybozu/cilium:1.13.16.4 + image: ghcr.io/cybozu/cilium:1.14.14.1 imagePullPolicy: IfNotPresent name: config terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /tmp name: tmp + - command: + - sh + - -ec + - | + cp /usr/bin/cilium-sysctlfix /hostbin/cilium-sysctlfix; + nsenter --mount=/hostproc/1/ns/mnt "${BIN_PATH}/cilium-sysctlfix"; + rm /hostbin/cilium-sysctlfix + env: + - name: BIN_PATH + value: /opt/cni/bin + image: ghcr.io/cybozu/cilium:1.14.14.1 + imagePullPolicy: IfNotPresent + name: apply-sysctl-overwrites + securityContext: + capabilities: + add: + - SYS_ADMIN + - SYS_CHROOT + - SYS_PTRACE + drop: + - ALL + seLinuxOptions: + level: s0 + type: spc_t + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /hostproc + name: hostproc + - mountPath: /hostbin + name: cni-path - args: - mount | grep "/sys/fs/bpf type bpf" || mount -t bpf bpf /sys/fs/bpf command: - /bin/bash - -c - -- - image: ghcr.io/cybozu/cilium:1.13.16.4 + image: ghcr.io/cybozu/cilium:1.14.14.1 imagePullPolicy: IfNotPresent name: mount-bpf-fs securityContext: @@ -1228,7 +1315,7 @@ spec: value: 127.0.0.1 - name: KUBERNETES_SERVICE_PORT value: "16443" - image: ghcr.io/cybozu/cilium:1.13.16.4 + image: ghcr.io/cybozu/cilium:1.14.14.1 imagePullPolicy: IfNotPresent name: clean-cilium-state securityContext: @@ -1254,7 +1341,7 @@ spec: name: cilium-run - command: - /install-plugin.sh - image: ghcr.io/cybozu/cilium:1.13.16.4 + image: ghcr.io/cybozu/cilium:1.14.14.1 imagePullPolicy: IfNotPresent name: install-cni-binaries resources: @@ -1292,6 +1379,10 @@ spec: path: /sys/fs/bpf type: DirectoryOrCreate name: bpf-maps + - hostPath: + path: /proc + type: Directory + name: hostproc - hostPath: path: /sys/fs/cgroup type: DirectoryOrCreate @@ -1312,10 +1403,22 @@ spec: type: FileOrCreate name: xtables-lock - name: clustermesh-secrets - secret: + projected: defaultMode: 256 - optional: true - secretName: cilium-clustermesh + sources: + - secret: + name: cilium-clustermesh + optional: true + - secret: + items: + - key: tls.key + path: common-etcd-client.key + - key: tls.crt + path: common-etcd-client.crt + - key: ca.crt + path: common-etcd-client-ca.crt + name: clustermesh-apiserver-remote-cert + optional: true - configMap: name: bgp-config name: bgp-config-path @@ -1333,12 +1436,12 @@ spec: sources: - secret: items: - - key: ca.crt - path: client-ca.crt - key: tls.crt path: server.crt - key: tls.key path: server.key + - key: ca.crt + path: client-ca.crt name: hubble-server-certs optional: true updateStrategy: @@ -1347,11 +1450,13 @@ spec: apiVersion: batch/v1 kind: Job metadata: + annotations: + helm.sh/hook: post-install,post-upgrade labels: app.kubernetes.io/name: hubble-generate-certs app.kubernetes.io/part-of: cilium k8s-app: hubble-generate-certs - name: hubble-generate-certs-4b23ed05ea + name: hubble-generate-certs namespace: kube-system spec: template: @@ -1374,7 +1479,7 @@ spec: - --hubble-relay-server-cert-validity-duration=94608000s command: - /usr/bin/cilium-certgen - image: ghcr.io/cybozu/cilium-certgen:0.1.11.1 + image: ghcr.io/cybozu/cilium-certgen:0.1.14.1 imagePullPolicy: IfNotPresent name: certgen hostNetwork: true From 4333b7f295c6ea26ed9b93f8d56827121671ea2e Mon Sep 17 00:00:00 2001 From: naoki-take Date: Mon, 19 Aug 2024 08:46:36 +0000 Subject: [PATCH 2/4] Delete only 1 cilium pod Signed-off-by: naoki-take --- dctest/before_test.go | 2 ++ dctest/l4lb_test.go | 40 +++++++++++++++++++++++++++++++--------- dctest/upgrade_test.go | 41 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 72 insertions(+), 11 deletions(-) diff --git a/dctest/before_test.go b/dctest/before_test.go index 7c121770d..266f37e13 100644 --- a/dctest/before_test.go +++ b/dctest/before_test.go @@ -22,6 +22,8 @@ func runBeforeSuite() { SetDefaultEventuallyPollingInterval(time.Second) SetDefaultEventuallyTimeout(10 * time.Minute) + SetDefaultConsistentlyDuration(time.Second) + SetDefaultConsistentlyPollingInterval(100 * time.Millisecond) data, err := os.ReadFile(machinesFile) Expect(err).NotTo(HaveOccurred()) diff --git a/dctest/l4lb_test.go b/dctest/l4lb_test.go index e14d18d8b..4c644d9f5 100644 --- a/dctest/l4lb_test.go +++ b/dctest/l4lb_test.go @@ -78,6 +78,14 @@ func testL4LB() { return exec.Command("ip", "netns", "exec", "external", "curl", targetIPForLocal, "-m", "5").Run() }).Should(Succeed()) + Consistently(func() error { + err := exec.Command("ip", "netns", "exec", "external", "curl", targetIP, "-m", "5").Run() + if err != nil { + return err + } + + return exec.Command("ip", "netns", "exec", "external", "curl", targetIPForLocal, "-m", "5").Run() + }).Should(Succeed()) By("access service from external(Inbound packets have the tos)") Expect(exec.Command("ip", "netns", "exec", "external", @@ -90,6 +98,14 @@ func testL4LB() { return exec.Command("ip", "netns", "exec", "external", "curl", targetIPForLocal, "-m", "5").Run() }).Should(Succeed()) + Consistently(func() error { + err := exec.Command("ip", "netns", "exec", "external", "curl", targetIP, "-m", "5").Run() + if err != nil { + return err + } + + return exec.Command("ip", "netns", "exec", "external", "curl", targetIPForLocal, "-m", "5").Run() + }).Should(Succeed()) Expect(exec.Command("ip", "netns", "exec", "external", "iptables", "-t", "mangle", "-D", "OUTPUT", "-p", "TCP", "--dport", "80", "-j", "TOS", "--set-tos", "0x20").Run()).ShouldNot(HaveOccurred()) @@ -131,20 +147,26 @@ func testL4LB() { Expect(err).NotTo(HaveOccurred(), "stderr: %s", stderr) By("access service from a Pod") + stdout, stderr, err = execAt(bootServers[0], "kubectl", "-n", ns, "get", "pods", "-l", "app.kubernetes.io/name=ubuntu-l4lb-client", "-o", "json") + Expect(err).ShouldNot(HaveOccurred(), "stdout=%s, stderr=%s", stdout, stderr) + podList = &corev1.PodList{} + err = json.Unmarshal(stdout, podList) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(podList.Items)).To(Equal(1)) + podName := podList.Items[0].Name + Eventually(func() error { - stdout, stderr, err := execAt(bootServers[0], "kubectl", "-n", ns, "get", "pods", "-l", "app.kubernetes.io/name=ubuntu-l4lb-client", "-o", "json") + stdout, stderr, err = execAt(bootServers[0], "kubectl", "exec", "-n", ns, podName, "--", "curl", targetIP, "-m", "5") if err != nil { return fmt.Errorf("stdout: %s, stderr: %s, err: %v", stdout, stderr, err) } - podList := &corev1.PodList{} - if err := json.Unmarshal(stdout, podList); err != nil { - return err - } - if len(podList.Items) != 1 { - return fmt.Errorf("podList length is not 1: %d", len(podList.Items)) + stdout, stderr, err = execAt(bootServers[0], "kubectl", "exec", "-n", ns, podName, "--", "curl", targetIPForLocal, "-m", "5") + if err != nil { + return fmt.Errorf("stdout: %s, stderr: %s, err: %v", stdout, stderr, err) } - podName := podList.Items[0].Name - + return nil + }).Should(Succeed()) + Consistently(func() error { stdout, stderr, err = execAt(bootServers[0], "kubectl", "exec", "-n", ns, podName, "--", "curl", targetIP, "-m", "5") if err != nil { return fmt.Errorf("stdout: %s, stderr: %s, err: %v", stdout, stderr, err) diff --git a/dctest/upgrade_test.go b/dctest/upgrade_test.go index 53f0c4681..2d0188ad7 100644 --- a/dctest/upgrade_test.go +++ b/dctest/upgrade_test.go @@ -173,7 +173,14 @@ func testUpgrade() { }) It("should update cilium-agent", func() { - stdout, stderr, err := execAt(bootServers[0], "kubectl", "delete", "pod", "-n=kube-system", "-l=app.kubernetes.io/name=cilium-agent") + stdout, stderr, err := execAt(bootServers[0], "kubectl", "-n=kube-system", "get", "pods", "-l=app.kubernetes.io/name=cilium-agent", "-o=json") + Expect(err).NotTo(HaveOccurred(), "stdout=%s, stderr=%s", stdout, stderr) + podList := new(corev1.PodList) + err = json.Unmarshal(stdout, podList) + Expect(err).NotTo(HaveOccurred(), "data=%s", stdout) + Expect(len(podList.Items)).To(BeNumerically(">", 0)) + podName := podList.Items[0].Name + stdout, stderr, err = execAt(bootServers[0], "kubectl", "delete", "pod", "-n=kube-system", podName) Expect(err).ShouldNot(HaveOccurred(), "stdout=%s, stderr=%s", stdout, stderr) }) @@ -273,7 +280,7 @@ func testUpgrade() { case "squid-exporter": return checkVersionInDeployment("internet-egress", "squid", newImage) case "cilium": - return checkVersionInDaemonSet("kube-system", "cilium", newImage) + return checkVersionInDaemonSetPartial("kube-system", "cilium", newImage, 1) case "cilium-operator-generic": return checkVersionInDeployment("kube-system", "cilium-operator", newImage) case "hubble-relay": @@ -443,6 +450,36 @@ func checkVersionInDaemonSet(namespace, dsName, image string) error { return nil } +func checkVersionInDaemonSetPartial(namespace, dsName, image string, desiredNumber int32) error { + stdout, _, err := execAt(bootServers[0], "kubectl", "get", "ds", "-n", namespace, dsName, "-o", "json") + if err != nil { + return err + } + ds := new(appsv1.DaemonSet) + err = json.Unmarshal(stdout, ds) + if err != nil { + return err + } + found := false + for _, c := range ds.Spec.Template.Spec.Containers { + if c.Image == image { + found = true + } + } + if !found { + return fmt.Errorf("%s not found in %s", image, dsName) + } + if ds.Status.DesiredNumberScheduled != ds.Status.NumberAvailable { + return fmt.Errorf("%s %s is not updated completely. desired number scheduled is %d, but actual available is %d", + dsName, image, ds.Status.DesiredNumberScheduled, ds.Status.NumberAvailable) + } + if desiredNumber != ds.Status.UpdatedNumberScheduled { + return fmt.Errorf("%s %s is not updated completely. desired number scheduled is %d, but actual updated is %d", + dsName, image, desiredNumber, ds.Status.UpdatedNumberScheduled) + } + return nil +} + func checkVersionInDeployment(namespace, deploymentName, image string) error { stdout, _, err := execAt(bootServers[0], "kubectl", "get", "deployment", "-n", namespace, deploymentName, "-o", "json") if err != nil { From d9361ca3aa3219c8525a31b38e06b70d9c124a70 Mon Sep 17 00:00:00 2001 From: naoki-take Date: Fri, 27 Sep 2024 07:05:51 +0000 Subject: [PATCH 3/4] Kill half number of cilium-agent Signed-off-by: naoki-take --- dctest/upgrade_test.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dctest/upgrade_test.go b/dctest/upgrade_test.go index 2d0188ad7..5b0f4f7dd 100644 --- a/dctest/upgrade_test.go +++ b/dctest/upgrade_test.go @@ -179,9 +179,12 @@ func testUpgrade() { err = json.Unmarshal(stdout, podList) Expect(err).NotTo(HaveOccurred(), "data=%s", stdout) Expect(len(podList.Items)).To(BeNumerically(">", 0)) - podName := podList.Items[0].Name - stdout, stderr, err = execAt(bootServers[0], "kubectl", "delete", "pod", "-n=kube-system", podName) - Expect(err).ShouldNot(HaveOccurred(), "stdout=%s, stderr=%s", stdout, stderr) + podNumToKill := len(podList.Items) / 2 + for i := 0; i < podNumToKill; i++ { + podName := podList.Items[i].Name + stdout, stderr, err = execAt(bootServers[0], "kubectl", "delete", "pod", "-n=kube-system", podName) + Expect(err).ShouldNot(HaveOccurred(), "stdout=%s, stderr=%s", stdout, stderr) + } }) It("should running newer cke desired image version", func() { From bac0161e02babdaa3c9993d73d9b8fb02d722afc Mon Sep 17 00:00:00 2001 From: naoki-take Date: Fri, 27 Sep 2024 01:30:06 +0000 Subject: [PATCH 4/4] Increase worker node to 6 Signed-off-by: naoki-take --- dctest/cke_test.go | 6 +++--- dctest/upgrade_test.go | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dctest/cke_test.go b/dctest/cke_test.go index 6de770f12..acf891cc4 100644 --- a/dctest/cke_test.go +++ b/dctest/cke_test.go @@ -37,7 +37,7 @@ func testCKESetup() { It("should generates cluster.yml automatically", func() { By("setting configurations") execSafeAt(bootServers[0], "ckecli", "constraints", "set", "control-plane-count", "3") - execSafeAt(bootServers[0], "ckecli", "constraints", "set", "minimum-workers", "2") + execSafeAt(bootServers[0], "ckecli", "constraints", "set", "minimum-workers", "6") execSafeAt(bootServers[0], "ckecli", "sabakan", "set-url", "http://localhost:10080") By("waiting for cluster.yml generation") @@ -128,9 +128,9 @@ func testCKE() { return err } - // control-plane-count + minimum-workers = 5 + // control-plane-count + minimum-workers = 9 // https://github.com/cybozu-go/cke/blob/main/docs/sabakan-integration.md#initialization - if len(nl.Items) != 5 { + if len(nl.Items) != 9 { return fmt.Errorf("too few nodes: %d", len(nl.Items)) } return nil diff --git a/dctest/upgrade_test.go b/dctest/upgrade_test.go index 5b0f4f7dd..8f72585b1 100644 --- a/dctest/upgrade_test.go +++ b/dctest/upgrade_test.go @@ -283,7 +283,7 @@ func testUpgrade() { case "squid-exporter": return checkVersionInDeployment("internet-egress", "squid", newImage) case "cilium": - return checkVersionInDaemonSetPartial("kube-system", "cilium", newImage, 1) + return checkVersionInDaemonSetPartial("kube-system", "cilium", newImage, 4) case "cilium-operator-generic": return checkVersionInDeployment("kube-system", "cilium-operator", newImage) case "hubble-relay":