From ca662060962c70ba370409ce8e984cdf09ed15a4 Mon Sep 17 00:00:00 2001 From: Sherif Akoush Date: Fri, 29 Jul 2022 09:50:37 +0100 Subject: [PATCH] Enable kafka kraft in k8s (#370) * upgrade to kafka 3.2 * upgrade kafka in compose to 3.2 * changes to Makefile * changes to cluster.yaml for raft * add to ansible * increase tmpDirSizeLimit * reinstate the readiness probes * parametrise kafka img for compose --- ansible/roles/ecosystem/defaults/main.yaml | 1 + ansible/roles/ecosystem/tasks/kafka.yaml | 5 +++++ kafka/strimzi/cluster.yaml | 21 ++++++++++----------- scheduler/Makefile | 2 +- scheduler/all-base.yaml | 2 +- scheduler/env.all | 2 +- 6 files changed, 19 insertions(+), 14 deletions(-) diff --git a/ansible/roles/ecosystem/defaults/main.yaml b/ansible/roles/ecosystem/defaults/main.yaml index cdec2735d3..9c3a26cf5c 100644 --- a/ansible/roles/ecosystem/defaults/main.yaml +++ b/ansible/roles/ecosystem/defaults/main.yaml @@ -12,6 +12,7 @@ install_opentelemetry: true # Configuration on/off flags ecosystem_configure_prometheus: "{{ install_prometheus }}" +ecosystem_enable_kraft: "{{ install_kafka }}" ecosystem_configure_kafka: "{{ install_kafka }}" ecosystem_configure_jaeger: "{{ install_jaeger }}" ecosystem_configure_opentelemetry: "{{ install_opentelemetry }}" diff --git a/ansible/roles/ecosystem/tasks/kafka.yaml b/ansible/roles/ecosystem/tasks/kafka.yaml index dfdb457142..0c686a6a60 100644 --- a/ansible/roles/ecosystem/tasks/kafka.yaml +++ b/ansible/roles/ecosystem/tasks/kafka.yaml @@ -3,6 +3,11 @@ name: seldonio.k8s.strimzi_kafka when: install_kafka | bool +- name: Enable KRaft + shell: + " kubectl set env deployment/strimzi-cluster-operator STRIMZI_FEATURE_GATES=+UseStrimziPodSets,+UseKRaft -n {{ kafka_namespace }} " + tags: kraft + when: ecosystem_enable_kraft | bool - name: Create Kafka Cluster kubernetes.core.k8s: diff --git a/kafka/strimzi/cluster.yaml b/kafka/strimzi/cluster.yaml index b80fb7c58c..32ca2cf2f3 100644 --- a/kafka/strimzi/cluster.yaml +++ b/kafka/strimzi/cluster.yaml @@ -4,7 +4,7 @@ metadata: name: seldon spec: kafka: - version: 3.1.0 + version: 3.2.0 replicas: 3 listeners: - name: plain @@ -28,6 +28,10 @@ spec: transaction.state.log.min.isr: 1 default.replication.factor: 1 min.insync.replicas: 1 + inter.broker.protocol.version: "3.2" + template: + pod: + tmpDirSizeLimit: 100Mi storage: type: jbod volumes: @@ -41,8 +45,12 @@ spec: configMapKeyRef: name: kafka-metrics key: kafka-metrics-config.yml + # zookeeper settings should not be use in case of STRIMZI_FEATURE_GATES=+UseStrimziPodSets,+UseKRaft (raft) + # to enable raft run: + # `kubectl set env deployment/strimzi-cluster-operator STRIMZI_FEATURE_GATES=+UseStrimziPodSets,+UseKRaft -n kafka` + # which is the default with ansible install zookeeper: - replicas: 3 + replicas: 1 readinessProbe: initialDelaySeconds: 15 timeoutSeconds: 5 @@ -53,15 +61,6 @@ spec: type: persistent-claim size: 100Gi deleteClaim: false - metricsConfig: - type: jmxPrometheusExporter - valueFrom: - configMapKeyRef: - name: kafka-metrics - key: zookeeper-metrics-config.yml - entityOperator: - topicOperator: {} - userOperator: {} kafkaExporter: topicRegex: ".*" groupRegex: ".*" diff --git a/scheduler/Makefile b/scheduler/Makefile index 3c986e89d1..c4204328f7 100644 --- a/scheduler/Makefile +++ b/scheduler/Makefile @@ -7,7 +7,7 @@ ENVOY_IMG ?= ${DOCKERHUB_USERNAME}/seldon-envoy:${CUSTOM_IMAGE_TAG} # Grafana image only used for Docker compose not k8s GRAFANA_IMG ?= ${DOCKERHUB_USERNAME}/seldon-grafana:${CUSTOM_IMAGE_TAG} HODOMETER_IMG ?= ${DOCKERHUB_USERNAME}/seldon-hodometer:${CUSTOM_IMAGE_TAG} -KAFKA_IMG ?= quay.io/strimzi/kafka:0.28.0-kafka-3.1.0 +KAFKA_IMG ?= docker.io/bitnami/kafka:3.1 MLSERVER_IMG ?= cliveseldon/mlserver:1.2.0.dev1 MODELGATEWAY_IMG ?= ${DOCKERHUB_USERNAME}/seldon-modelgateway:${CUSTOM_IMAGE_TAG} OTELCOL_IMG ?= otel/opentelemetry-collector-contrib-dev:latest diff --git a/scheduler/all-base.yaml b/scheduler/all-base.yaml index e237982cc9..11d2ef960a 100644 --- a/scheduler/all-base.yaml +++ b/scheduler/all-base.yaml @@ -100,7 +100,7 @@ services: - "14250" kafka: - image: docker.io/bitnami/kafka:3.1 + image: "${KAFKA_IMAGE_AND_TAG}" volumes: - "kafka_data:/bitnami" depends_on: diff --git a/scheduler/env.all b/scheduler/env.all index 8cd4dff22e..0963c31e4d 100644 --- a/scheduler/env.all +++ b/scheduler/env.all @@ -40,7 +40,7 @@ PIPELINEGATEWAY_IMAGE_AND_TAG=seldonio/seldon-pipelinegateway:latest SERVER_MLSERVER_IMAGE_AND_TAG=cliveseldon/mlserver:1.2.0.dev1 SERVER_TRITON_IMAGE_AND_TAG=nvcr.io/nvidia/tritonserver:22.05-py3 SCHEDULER_IMAGE_AND_TAG=seldonio/seldon-scheduler:latest -KAFKA_IMAGE_AND_TAG=quay.io/strimzi/kafka:0.28.0-kafka-3.1.0 +KAFKA_IMAGE_AND_TAG=docker.io/bitnami/kafka:3.1 GRAFANA_IMAGE_AND_TAG=seldonio/seldon-grafana:latest AGENT_OVERCOMMIT_PERCENTAGE=20