From 64da9cef54c1efcc41d167c2569710661215ca4d Mon Sep 17 00:00:00 2001 From: Or Mergi Date: Tue, 17 Nov 2020 10:09:27 +0200 Subject: [PATCH] kind infra, run etcd in memory Currently we encounter bad performance of etcd on sriov provider cluster on DinD setup. We get 'etcdserver: timeout errors' that causes jobs to fail often. In cases where etcd has bad performance and the data shouldnt be persistant (e.g: on CI and dev environments) it is recommanded [1] to use in-memory etcd To do that this commit: - Adds kubeadm ClusterConfiguration to kind config, setting etcd data directory to '/tmp/...' inside kind cluster nodes. '/tmp/' directory is already mounted to RAM memory as tmpfs. - 'KUBEVIRT_WITH_KIND_ETCD_IN_MEMORY', expected variables: "true", "false" controls running etcd in memory on kind providers. - 'ETD_DATA_DIR', expects directory path that mounted to RAM memory controls the path of etcd data directory inside kind cluster nodes Running etcd in memory should improve performance and will stabilize sriov provider and lanes. [1] https://github.com/kubernetes-sigs/kind/issues/1922 Signed-off-by: Or Mergi --- .../cluster/kind-k8s-sriov-1.17.0/provider.sh | 1 + cluster-up/cluster/kind/common.sh | 27 +++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/cluster-up/cluster/kind-k8s-sriov-1.17.0/provider.sh b/cluster-up/cluster/kind-k8s-sriov-1.17.0/provider.sh index 22d49f0b35..ee1afa93db 100755 --- a/cluster-up/cluster/kind-k8s-sriov-1.17.0/provider.sh +++ b/cluster-up/cluster/kind-k8s-sriov-1.17.0/provider.sh @@ -16,6 +16,7 @@ function up() { # print hardware info for easier debugging based on logs echo 'Available NICs' docker run --rm --cap-add=SYS_RAWIO quay.io/phoracek/lspci@sha256:0f3cacf7098202ef284308c64e3fc0ba441871a846022bb87d65ff130c79adb1 sh -c "lspci | egrep -i 'network|ethernet'" + echo "" cp $KIND_MANIFESTS_DIR/kind.yaml ${KUBEVIRTCI_CONFIG_PATH}/$KUBEVIRT_PROVIDER/kind.yaml diff --git a/cluster-up/cluster/kind/common.sh b/cluster-up/cluster/kind/common.sh index df967e2c04..d5f9932b11 100755 --- a/cluster-up/cluster/kind/common.sh +++ b/cluster-up/cluster/kind/common.sh @@ -15,6 +15,9 @@ REGISTRY_NAME=${CLUSTER_NAME}-registry MASTER_NODES_PATTERN="control-plane" WORKER_NODES_PATTERN="worker" +KUBEVIRT_WITH_KIND_ETCD_IN_MEMORY=${KUBEVIRT_WITH_KIND_ETCD_IN_MEMORY:-"true"} +ETCD_DATA_DIR="/tmp/kind-cluster-etcd" + function _wait_kind_up { echo "Waiting for kind to be ready ..." while [ -z "$(docker exec --privileged ${CLUSTER_NAME}-control-plane kubectl --kubeconfig=/etc/kubernetes/admin.conf get nodes --selector=node-role.kubernetes.io/master -o=jsonpath='{.items..status.conditions[-1:].status}' | grep True)" ]; do @@ -154,6 +157,13 @@ function setup_kind() { docker cp ${CLUSTER_NAME}-control-plane:/kind/bin/kubectl ${KUBEVIRTCI_CONFIG_PATH}/$KUBEVIRT_PROVIDER/.kubectl chmod u+x ${KUBEVIRTCI_CONFIG_PATH}/$KUBEVIRT_PROVIDER/.kubectl + if [ $KUBEVIRT_WITH_KIND_ETCD_IN_MEMORY == "true" ]; then + echo "Checking KIND cluster etcd data is mounted to RAM: $ETCD_DATA_DIR" + docker exec "$CLUSTER_NAME-control-plane" df -h $(dirname $ETCD_DATA_DIR) + docker exec "$CLUSTER_NAME-control-plane" du -h $ETCD_DATA_DIR + [ $(echo $?) != 0 ] && echo "falid to check etcd data directory" && return 1 + fi + for node in $(_get_nodes | awk '{print $1}'); do docker exec $node /bin/sh -c "curl -L https://github.com/containernetworking/plugins/releases/download/v0.8.5/cni-plugins-linux-amd64-v0.8.5.tgz | tar xz -C /opt/cni/bin" done @@ -230,8 +240,25 @@ EOF done } +function _add_kubeadm_config_patches() { + if [ $KUBEVIRT_WITH_KIND_ETCD_IN_MEMORY == "true" ]; then + cat <> ${KUBEVIRTCI_CONFIG_PATH}/$KUBEVIRT_PROVIDER/kind.yaml +kubeadmConfigPatches: +- | + kind: ClusterConfiguration + metadata: + name: config + etcd: + local: + dataDir: $ETCD_DATA_DIR +EOF + echo "KIND cluster etcd data will be mounted to RAM on kind nodes: $ETCD_DATA_DIR" + fi +} + function _prepare_kind_config() { _add_workers + _add_kubeadm_config_patches echo "Final KIND config:" cat ${KUBEVIRTCI_CONFIG_PATH}/$KUBEVIRT_PROVIDER/kind.yaml