Skip to content

Commit

Permalink
feat: add support for scheduling pods based on node labels, taints, a…
Browse files Browse the repository at this point in the history
…nd affinity (#352)

Signed-off-by: Lenin Mehedy <[email protected]>
  • Loading branch information
leninmehedy authored Oct 4, 2023
1 parent e91f465 commit 5dd625a
Show file tree
Hide file tree
Showing 17 changed files with 206 additions and 40 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/zxc-compile-code.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ jobs:
uses: helm/kind-action@dda0770415bac9fc20092cacbc54aa298604d140 # v1.8.0
if: ${{ inputs.enable-unit-tests && !cancelled() }}
with:
cluster_name: fst
config: dev/dev-cluster.yaml
version: v0.19.0
verbosity: 3
wait: 120s
Expand All @@ -125,6 +125,7 @@ jobs:
run: |
kubectl config get-contexts
kubectl get crd
kubectl get node --show-labels
# This step is currently required because the Hedera Services artifacts are not publicly accessible.
# May be removed once the artifacts are publicly accessible.
Expand Down
4 changes: 2 additions & 2 deletions charts/hedera-network/Chart.lock
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ dependencies:
- name: tenant
repository: https://operator.min.io/
version: 5.0.7
digest: sha256:cf355b295abceb5814ef57d3e146ec9d4e8db7365a700079d683bd5f766ad374
generated: "2023-09-20T13:51:41.203996+10:00"
digest: sha256:5dbc1a4af8f2b057dbd7730b6308e1a2954f3f95f86e8484bb232e64ed12e923
generated: "2023-10-04T15:47:44.747012+11:00"
8 changes: 5 additions & 3 deletions charts/hedera-network/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,20 @@ version: 0.8.0
appVersion: "0.8.0"

# This is range of versions of Kubernetes server that is supported by this chart.
kubeVersion: ">=1.25.0"
# Note we need to use -0 suffix to support GKE version
# Reference: https://github.com/helm/helm/issues/3810#issuecomment-379877753
kubeVersion: ">=1.25.0-0"

dependencies:
- name: hedera-explorer
version: 0.2.0
condition: cloud.minio.enable
condition: hedera-explorer.enable

- name: hedera-mirror
alias: hedera-mirror-node
version: 0.86.0
repository: https://hashgraph.github.io/hedera-mirror-node/charts
condition: cloud.minio.enable
condition: hedera-mirror-node.enable

- name: tenant
alias: minio-server
Expand Down
22 changes: 22 additions & 0 deletions charts/hedera-network/templates/network-node-statefulset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ metadata:
namespace: {{ default $.Release.Namespace $.Values.global.namespaceOverride }}
labels:
app: network-{{ $node.name }}
{{- if $.Values.deployment.podLabels }}
{{- $.Values.deployment.podLabels | toYaml | nindent 4 }}
{{- end }}
{{- if $.Values.deployment.podAnnotations }}
annotations:
{{- $.Values.deployment.podAnnotations | toYaml | nindent 4 }}
{{- end }}
spec:
replicas: 1
serviceName: "network-{{ $node.name }}"
Expand All @@ -30,6 +37,21 @@ spec:
fullstack.hedera.com/type: network-node
fullstack.hedera.com/node-name: {{ $node.name }}
spec:
{{- if $.Values.deployment.nodeSelectors }}
nodeSelector:
{{- $.Values.deployment.nodeSelectors | toYaml | nindent 8 }}
{{- end }}
{{- if $.Values.deployment.tolerations }}
tolerations:
{{- $.Values.deployment.tolerations | toYaml | nindent 8 }}
{{- end }}
{{- if $.Values.deployment.affinity }}
affinity:
{{- $.Values.deployment.affinity | toYaml | nindent 8 }}
{{- end }}
{{- if $.Values.deployment.priorityClassName }}
priorityClassName: {{ $.Values.deployment.priorityClassName }}
{{- end }}
terminationGracePeriodSeconds: {{ $.Values.terminationGracePeriodSeconds }}
volumes:
- name: hgcapp-storage # change me
Expand Down
24 changes: 24 additions & 0 deletions charts/hedera-network/templates/pdb.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{{- if $.Values.deployment.podDisruptionBudget.create }}
{{ range $index, $node := $.Values.hedera.nodes }}
---
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: network-node-pdb-{{ $node.name }}
namespace: {{ default $.Release.Namespace $.Values.global.namespaceOverride }}
labels:
fullstack.hedera.com/type: pod-disruption-budget
fullstack.hedera.com/node-name: {{ $node.name }}
spec:
selector:
matchLabels:
fullstack.hedera.com/type: network-node
fullstack.hedera.com/node-name: {{ $node.name }}
{{- if $.Values.deployment.podDisruptionBudget.minAvailable }}
minAvailable: {{ $.Values.deployment.podDisruptionBudget.minAvailable }}
{{- end }}
{{- if $.Values.deployment.podDisruptionBudget.maxUnavailable }}
maxUnavailable: {{ $.Values.deployment.podDisruptionBudget.maxUnavailable }}
{{- end }}
{{- end }}
{{- end }}
2 changes: 2 additions & 0 deletions charts/hedera-network/templates/rbac/pod-monitor.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- if $.Values.tester.deployPodMonitor | eq "true" }}
apiVersion: v1
kind: ServiceAccount
metadata:
Expand All @@ -17,3 +18,4 @@ roleRef:
kind: ClusterRole
name: {{ $.Values.tester.clusterRoleName }}
apiGroup: rbac.authorization.k8s.io
{{- end }}
15 changes: 12 additions & 3 deletions charts/hedera-network/tests/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
# Every script must load (source) this in the beginning
# Warning: avoid making these variables readonly since it can be sourced multiple times

CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"

# load .env file if it exists in order to load variables with custom values
ENV_FILE="$(dirname "${BASH_SOURCE[0]}")/.env"
ENV_FILE="${CUR_DIR}/.env"
if [[ -f "${ENV_FILE}" ]]; then
set -a
# shellcheck source=./../temp/.env
Expand All @@ -13,8 +15,15 @@ if [[ -f "${ENV_FILE}" ]]; then
fi

# set global env variables if not set
BATS_HOME="${BATS_HOME:-../../../dev/bats}"
TESTS_DIR="${TESTS_DIR:-.}"
BATS_HOME="${BATS_HOME:-${CUR_DIR}/../../../dev/bats}"
TESTS_DIR="${TESTS_DIR:-${CUR_DIR}}"

TOTAL_NODES="${TOTAL_NODES:-3}"
USER="${USER:-changeme}"
NAMESPACE="${NAMESPACE:-fst-${USER}}"
LOG_DIR="${LOG_DIR:-${CUR_DIR}/logs}"
LOG_FILE="${LOG_FILE:-helm-test.log}"
OUTPUT_LOG="${OUTPUT_LOG:-false}"
[ ! -d "${LOG_DIR}" ] && mkdir "${LOG_DIR}"

echo "--------------------------Env Setup: fullstack-testing Helm Test------------------------------------------------"
Expand Down
3 changes: 3 additions & 0 deletions charts/hedera-network/tests/env.template
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
USER="${USER:-changeme}"
NAMESPACE="${NAMESPACE:-fst-${USER}}"

TOTAL_NODES=3

LOG_DIR="${LOG_DIR:-/tmp/fullstack-testing-logs}"
Expand Down
2 changes: 1 addition & 1 deletion charts/hedera-network/tests/run.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
CUR_DIR=$(dirname "${BASH_SOURCE[0]}")
CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
source "${CUR_DIR}/env.sh"
source "${CUR_DIR}/logging.sh"

Expand Down
43 changes: 40 additions & 3 deletions charts/hedera-network/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@ global:

# cloud configuration
cloud:
minio:
enable: true
buckets:
streamBucket: "fst-streams"
backupBucket: "fst-backups"
s3:
enable: "true"
gcs:
enable: "true"
minio:
enable: true

# telemetry configurations
telemetry:
Expand All @@ -26,6 +26,7 @@ terminationGracePeriodSeconds: 10

# helm test container
tester:
deployPodMonitor: "true"
clusterRoleName: "pod-monitor-role" # this is a shared cluster role for all namespaces
image:
registry: "ghcr.io"
Expand Down Expand Up @@ -63,7 +64,6 @@ gatewayApi:
route:
hostname: "{{ .node.name }}.fst.local"


# default settings for a single node
# This default configurations can be overridden for each node in the hedera.nodes section.
defaults:
Expand Down Expand Up @@ -222,7 +222,9 @@ minio-server:
certificate:
requestAutoCert: false

# hedera mirror node configuration
hedera-mirror-node:
enable: true
global:
namespaceOverride: "{{ tpl (.Values.global.namespaceOverride | toString) }}"
# importer is a component of the hedera mirror node
Expand Down Expand Up @@ -250,7 +252,9 @@ hedera-mirror-node:
bucketName: "fst-streams"
# for s3 configuration of mirror node look at uploader-mirror-secrets.yaml

# hedera explorer configuration
hedera-explorer:
enable: true
global:
namespaceOverride: "{{ tpl (.Values.global.namespaceOverride | toString) }}"
# The hedera explorer UI /api url will proxy all request to mirror node
Expand All @@ -272,6 +276,39 @@ hedera-explorer:
}
]
# common deployment configuration
deployment:
podAnnotations: {}
podLabels: {}
nodeSelectors:
fullstack-scheduling.io/os: linux
fullstack-scheduling.io/role: network
tolerations:
- key: "fullstack-scheduling.io/os"
operator: "Equal"
value: "linux"
effect: "NoSchedule"
- key: "fullstack-scheduling.io/role"
operator: "Equal"
value: "network"
effect: "NoSchedule"
# Specify pod affinity
# Use complete affinity spec starting with key "nodeAffinity:"
# Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#node-affinity
affinity: {}
priorityClassName: {}
## PodDisruptionBudget for fullstack testing pods
## Default backend Pod Disruption Budget configuration
## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/
## @param deployment.podDisruptionBudget.create Enable Pod Disruption Budget configuration
## @param deployment.podDisruptionBudget.minAvailable Minimum number/percentage of pods that should remain scheduled
## @param deployment.podDisruptionBudget.maxUnavailable Maximum number/percentage of pods that should remain scheduled
##
podDisruptionBudget:
create: true
minAvailable: 1
maxUnavailable: ""

# hedera node configuration
# Only the name of the node is required. The rest of the configuration will be inherited from `defaults` section
hedera:
Expand Down
28 changes: 9 additions & 19 deletions dev/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,10 @@ run-func:
source "${SCRIPTS_DIR}/${SCRIPT_NAME}" && ${FUNC}

.PHONY: start
start: deploy-minio-operator-if-required update-helm-dependencies deploy-network setup-nodes start-nodes
start: ci-deploy-network setup-nodes start-nodes

.PHONY: stop
stop: stop-nodes destroy-network

.PHONY: restart
restart: stop-nodes start-nodes
Expand Down Expand Up @@ -233,30 +236,17 @@ destroy-test-container:
local-kubectl-bats:
source "${SCRIPTS_DIR}/${DOCKER_SCRIPT}" && build_kubectl_bats "${CLUSTER_NAME}"

# Here we run all steps in sequence, if any step fails, deploy-all trap the EXIT and run cleanup
.PHONY: run-deploy-seq
run-deploy-seq: setup deploy-network helm-test setup-nodes start-nodes

.PHONY: deploy-all
deploy-all:
.PHONY: ci-test
ci-test:
# Enable cleanup_test function so that even if test fails, we cleanup the cluster.
# We are only enabling this in this make target, however if necessary, similar pattern can be used in other targets.
# Ref: https://stackoverflow.com/questions/28597794/how-can-i-clean-up-after-an-error-in-a-makefile
function cleanup_test {
# NOTE: It needs latest make (version ~=4.3)
function cleanup_test () {
$(MAKE) destroy-network
}
trap cleanup_test EXIT # always destroy-network on exit
$(MAKE) run-deploy-seq

.PHONY: destroy-all
destroy-all:
-$(MAKE) destroy-network
-$(MAKE) undeploy-minio-operator
-$(MAKE) destroy-prometheus-operator

.PHONY: ci-test
ci-test: setup-cluster local-kubectl-bats
$(MAKE) deploy-all CHART_VALUES_FILES="$(PWD)/ci/ci-values.yaml"
$(MAKE) ci-deploy-network setup-nodes start-nodes

.PHONY: ci-deploy-network
ci-deploy-network: setup-cluster local-kubectl-bats
Expand Down
8 changes: 8 additions & 0 deletions dev/dev-cluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
name: fst # this is overridden if CLUSTER_NAME env var is set. Check .env file
nodes:
- role: control-plane
labels:
fullstack-scheduling.io/os: linux
fullstack-scheduling.io/role: network
6 changes: 4 additions & 2 deletions dev/scripts/docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ function build_kubectl_bats() {
[[ -z "${CLUSTER_NAME}" ]] && echo "ERROR: [build_kubectl_bats] Cluster name is required" && return 1

echo ""
echo "Building kubectl-bats image"
echo "Building kubectl-bats image"
echo "-----------------------------------------------------------------------------------------------------"
cd "${DOCKERFILE_DIR}/kubectl-bats" && docker build -t "${KUBECTL_BATS_IMAGE}" .
cd "${DOCKERFILE_DIR}/kubectl-bats" && docker build -t "${KUBECTL_BATS_IMAGE}" .
kind load docker-image "${KUBECTL_BATS_IMAGE}" -n "${CLUSTER_NAME}"

log_time "build_kubectl_bats"
}
21 changes: 20 additions & 1 deletion dev/scripts/env.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/usr/bin/env bash

start_time=$(date +%s)

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"

readonly SCRIPT_DIR
Expand Down Expand Up @@ -58,7 +60,11 @@ function setup_kubectl_context() {
kubectl get ns

echo "Setting kubectl context..."
kubectl config use-context "kind-${CLUSTER_NAME}"
local count
count=$(kubectl config get-contexts --no-headers | grep -c "kind-${CLUSTER_NAME}")
if [[ $count -ne 0 ]]; then
kubectl config use-context "kind-${CLUSTER_NAME}"
fi
kubectl config set-context --current --namespace="${NAMESPACE}"
kubectl config get-contexts
}
Expand All @@ -68,6 +74,19 @@ function setup() {
load_env_file
}

function log_time() {
local end_time duration execution_time

local func_name=$1

end_time=$(date +%s)
duration=$((end_time - start_time))
execution_time=$(printf "%.2f seconds" "${duration}")
echo "-----------------------------------------------------------------------------------------------------"
echo "<<< ${func_name} execution took: ${execution_time} >>>"
echo "-----------------------------------------------------------------------------------------------------"
}

setup

echo "--------------------------Env Setup: fullstack-testing ------------------------------------------------"
Expand Down
Loading

0 comments on commit 5dd625a

Please sign in to comment.