From a90bc784ef6ad97cbbfdca1510fad467ea88837e Mon Sep 17 00:00:00 2001 From: Maksim Paskal Date: Thu, 8 Feb 2024 17:43:38 +0000 Subject: [PATCH] Windows 2019 support Signed-off-by: Maksim Paskal --- .github/workflows/release.yaml | 14 ++- Dockerfile.windows | 4 +- README.md | 91 +++++++++++++++++-- .../aks-node-termination-handler/Chart.yaml | 2 +- .../templates/configmap.yaml | 2 +- .../templates/daemonset.yaml | 16 ++-- .../templates/rbac.yaml | 10 +- .../aks-node-termination-handler/values.yaml | 4 +- 8 files changed, 118 insertions(+), 25 deletions(-) diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index e8759e9..1204106 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -31,6 +31,7 @@ jobs: with: distribution: goreleaser version: latest + # args: build --clean --skip=validate --snapshot args: release --clean env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -74,6 +75,9 @@ jobs: publish-windows-amd64: runs-on: windows-latest + strategy: + matrix: + windows-version: [ 'ltsc2019', 'ltsc2022' ] needs: build steps: - uses: docker/login-action@v3 @@ -82,9 +86,9 @@ jobs: password: ${{ secrets.DOCKER_PASSWORD }} - uses: actions/download-artifact@v4 - run: tar xvf ./release/release.tar - - run: "docker build --pull --platform windows/amd64 -t ${{ env.IMAGE }}-windows-amd64 ." + - run: "docker build --build-arg WINDOWS_VERSION=${{ matrix.windows-version }} --pull --platform windows/amd64 -t ${{ env.IMAGE }}-windows-${{ matrix.windows-version }}-amd64 ." working-directory: ./dist/aks-node-termination-handler_windows_amd64_v1 - - run: docker push ${{ env.IMAGE }}-windows-amd64 + - run: docker push ${{ env.IMAGE }}-windows-${{ matrix.windows-version }}-amd64 publish-manifest: runs-on: ubuntu-latest @@ -94,7 +98,9 @@ jobs: with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - - run: docker manifest create ${{ env.IMAGE }} ${{ env.IMAGE }}-linux-amd64 ${{ env.IMAGE }}-linux-arm64 ${{ env.IMAGE }}-windows-amd64 + - run: docker manifest create ${{ env.IMAGE }} ${{ env.IMAGE }}-linux-amd64 ${{ env.IMAGE }}-linux-arm64 ${{ env.IMAGE }}-windows-ltsc2022-amd64 - run: docker manifest push ${{ env.IMAGE }} - - run: docker manifest create ${{ env.IMAGE_LATEST }} ${{ env.IMAGE }}-linux-amd64 ${{ env.IMAGE }}-linux-arm64 ${{ env.IMAGE }}-windows-amd64 + - run: docker manifest create ${{ env.IMAGE_LATEST }} ${{ env.IMAGE }}-linux-amd64 ${{ env.IMAGE }}-linux-arm64 ${{ env.IMAGE }}-windows-ltsc2022-amd64 - run: docker manifest push ${{ env.IMAGE_LATEST }} + - run: docker manifest create ${{ env.IMAGE_LATEST }}-ltsc2019 ${{ env.IMAGE }}-linux-amd64 ${{ env.IMAGE }}-linux-arm64 ${{ env.IMAGE }}-windows-ltsc2019-amd64 + - run: docker manifest push ${{ env.IMAGE_LATEST }}-ltsc2019 \ No newline at end of file diff --git a/Dockerfile.windows b/Dockerfile.windows index c6d901b..4cbdc53 100644 --- a/Dockerfile.windows +++ b/Dockerfile.windows @@ -1,4 +1,6 @@ -FROM mcr.microsoft.com/windows/nanoserver:ltsc2022 +ARG WINDOWS_VERSION=ltsc2022 + +FROM mcr.microsoft.com/windows/nanoserver:$WINDOWS_VERSION WORKDIR /app/ diff --git a/README.md b/README.md index 9ad8eb0..60ab9e3 100644 --- a/README.md +++ b/README.md @@ -8,11 +8,11 @@ Gracefully handle Azure Virtual Machines shutdown within Kubernetes ## Motivation -This tool ensures that kubernetes cluster responds appropriately to events that can cause your Azure Virtual Machines to become unavailable, like evictions Azure Spot Virtual Machines or Reboot. If not handled, your application code may not stop gracefully, take longer to recover full availability, or accidentally schedule work to nodes that are going down. It also can send Telegram or Slack message before Azure Virtual Machines evictions. +This tool ensures that the Kubernetes cluster responds appropriately to events that can cause your Azure Virtual Machines to become unavailable, such as evictions of Azure Spot Virtual Machines or reboots. If not handled, your application code may not stop gracefully, recovery to full availability may take longer, or work might accidentally be scheduled to nodes that are shutting down. This tool can also send Telegram, Slack or Webhook messages before Azure Virtual Machines evictions occur. Based on [Azure Scheduled Events](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/scheduled-events) and [Safely Drain a Node](https://kubernetes.io/docs/tasks/administer-cluster/safely-drain-node/) -Support Linux (amd64, arm64) and Windows (amd64) nodes. +Support Linux (amd64, arm64) and Windows 2022, 2019* (amd64) nodes. ## Create Azure Kubernetes Cluster @@ -53,11 +53,12 @@ az aks nodepool add \ --min-count 0 \ --max-count 10 -# Create Windows nodepool with Spot Virtual Machines and autoscaling +# Create Windows (Windows Server 2022) nodepool with Spot Virtual Machines and autoscaling az aks nodepool add \ --resource-group test-aks-group-eastus \ --cluster-name MyManagedCluster \ --os-type Windows \ +--os-sku Windows2022 \ --priority Spot \ --eviction-policy Delete \ --spot-max-price -1 \ @@ -66,6 +67,20 @@ az aks nodepool add \ --min-count 1 \ --max-count 3 +# Create Windows (Windows Server 2019) nodepool with Spot Virtual Machines and autoscaling +az aks nodepool add \ +--resource-group test-aks-group-eastus \ +--cluster-name MyManagedCluster \ +--os-type Windows \ +--os-sku Windows2019 \ +--priority Spot \ +--eviction-policy Delete \ +--spot-max-price -1 \ +--enable-cluster-autoscaler \ +--name spot2 \ +--min-count 1 \ +--max-count 3 + # Get config to connect to cluster az aks get-credentials \ --resource-group test-aks-group-eastus \ @@ -89,7 +104,7 @@ aks-node-termination-handler/aks-node-termination-handler \ ## Send notification events -You can compose your payload with markers that described [here](pkg/template/README.md) +You can compose your payload with markers that are described [here](pkg/template/README.md)
Send Telegram notification @@ -171,7 +186,7 @@ aks-node-termination-handler/aks-node-termination-handler \ ## Simulate eviction -You can test with [Simulate Eviction API](https://docs.microsoft.com/en-us/rest/api/compute/virtual-machines/simulate-eviction) and change API endpoint to correspond `virtualMachineScaleSets` that used in AKS +You can test with [Simulate Eviction API](https://docs.microsoft.com/en-us/rest/api/compute/virtual-machines/simulate-eviction) and change API endpoint to correspond `virtualMachineScaleSets` that are used in AKS. ```bash POST https://management.azure.com/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Compute/virtualMachineScaleSets/{vmScaleSetName}/virtualMachines/{instanceId}/simulateEviction?api-version=2021-11-01 @@ -179,10 +194,74 @@ POST https://management.azure.com/subscriptions/{subscriptionId}/resourceGroups/ ## Metrics -Application expose Prometheus metrics in `/metrics` endpoint. Installing latest chart will add annotations to pods: +The application exposes Prometheus metrics at the `/metrics` endpoint. Installing the latest chart will add annotations to the pods: ```yaml annotations: prometheus.io/port: "17923" prometheus.io/scrape: "true" ``` + +## Windows 2019 support + +If your cluster has (Linux and Windows 2019 nodes), you need to use another image: + +```bash +helm upgrade aks-node-termination-handler \ +--install \ +--namespace kube-system \ +aks-node-termination-handler/aks-node-termination-handler \ +--set priorityClassName=system-node-critical \ +--set image=paskalmaksim/aks-node-termination-handler:latest-ltsc2019 +``` + +If your cluster includes Linux, Windows 2022, and Windows 2019 nodes, you will need two separate helm installations of `aks-node-termination-handler`, each with different values. + +
+ linux-windows2022.values.yaml + +```bash +priorityClassName: system-node-critical + +image: paskalmaksim/aks-node-termination-handler:latest + +affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.azure.com/os-sku + operator: NotIn + values: + - Windows2019 +``` +
+ +
+ linux-windows2019.values.yaml + +```bash +priorityClassName: system-node-critical + +image: paskalmaksim/aks-node-termination-handler:latest-ltsc2019 + +nodeSelector: + kubernetes.azure.com/os-sku: Windows2019 +``` +
+ +```bash +# install aks-node-termination-handler for Linux and Windows 2022 nodes +helm upgrade aks-node-termination-handler \ +--install \ +--namespace kube-system \ +aks-node-termination-handler/aks-node-termination-handler \ +--values=linux-windows2022.values.yaml + +# install aks-node-termination-handler for Windows 2019 nodes +helm upgrade aks-node-termination-handler-windows-2019 \ +--install \ +--namespace kube-system \ +aks-node-termination-handler/aks-node-termination-handler \ +--values=linux-windows2019.values.yaml +``` \ No newline at end of file diff --git a/charts/aks-node-termination-handler/Chart.yaml b/charts/aks-node-termination-handler/Chart.yaml index 4205396..f3017cc 100644 --- a/charts/aks-node-termination-handler/Chart.yaml +++ b/charts/aks-node-termination-handler/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 icon: https://helm.sh/img/helm.svg name: aks-node-termination-handler -version: 1.1.3 +version: 1.1.4 description: Gracefully handle Azure Virtual Machines shutdown within Kubernetes maintainers: - name: maksim-paskal # Maksim Paskal diff --git a/charts/aks-node-termination-handler/templates/configmap.yaml b/charts/aks-node-termination-handler/templates/configmap.yaml index 3810e17..7dd3024 100644 --- a/charts/aks-node-termination-handler/templates/configmap.yaml +++ b/charts/aks-node-termination-handler/templates/configmap.yaml @@ -2,7 +2,7 @@ apiVersion: v1 kind: ConfigMap metadata: - name: {{ .Values.configMap.name }} + name: {{ tpl .Values.configMap.name . }} data: {{ toYaml .Values.configMap.data | indent 2 }} {{ end }} \ No newline at end of file diff --git a/charts/aks-node-termination-handler/templates/daemonset.yaml b/charts/aks-node-termination-handler/templates/daemonset.yaml index 3c43b4a..58cedca 100644 --- a/charts/aks-node-termination-handler/templates/daemonset.yaml +++ b/charts/aks-node-termination-handler/templates/daemonset.yaml @@ -1,13 +1,13 @@ apiVersion: apps/v1 kind: DaemonSet metadata: - name: aks-node-termination-handler + name: {{ .Release.Name }} labels: - app: aks-node-termination-handler + app: {{ .Release.Name }} spec: selector: matchLabels: - app: aks-node-termination-handler + app: {{ .Release.Name }} template: metadata: annotations: @@ -19,12 +19,12 @@ spec: {{ toYaml .Values.annotations | indent 8 }} {{ end }} labels: - app: aks-node-termination-handler + app: {{ .Release.Name }} {{ if .Values.labels }} {{ toYaml .Values.labels | indent 8 }} {{ end }} spec: - serviceAccount: aks-node-termination-handler + serviceAccount: {{ .Release.Name }} {{ if .Values.priorityClassName }} priorityClassName: {{ .Values.priorityClassName | quote }} {{ end }} @@ -35,11 +35,15 @@ spec: {{- if .Values.nodeSelector}} nodeSelector: {{- toYaml .Values.nodeSelector | nindent 8 }} +{{- end }} +{{- if .Values.affinity }} + affinity: +{{- toYaml .Values.affinity | nindent 8 }} {{- end }} volumes: - name: files configMap: - name: {{ .Values.configMap.name }} + name: {{ tpl .Values.configMap.name . }} {{ if .Values.extraVolumes }} {{ toYaml .Values.extraVolumes | indent 6 }} {{ end }} diff --git a/charts/aks-node-termination-handler/templates/rbac.yaml b/charts/aks-node-termination-handler/templates/rbac.yaml index 29602ae..074e68d 100644 --- a/charts/aks-node-termination-handler/templates/rbac.yaml +++ b/charts/aks-node-termination-handler/templates/rbac.yaml @@ -1,13 +1,13 @@ apiVersion: v1 kind: ServiceAccount metadata: - name: aks-node-termination-handler + name: {{ .Release.Name }} namespace: {{ .Release.Namespace }} --- kind: ClusterRole apiVersion: rbac.authorization.k8s.io/v1 metadata: - name: aks-node-termination-handler + name: {{ .Release.Name }} rules: - apiGroups: - "" @@ -53,12 +53,12 @@ rules: kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: - name: aks-node-termination-handler + name: {{ .Release.Name }} subjects: - kind: ServiceAccount - name: aks-node-termination-handler + name: {{ .Release.Name }} namespace: {{ .Release.Namespace }} roleRef: kind: ClusterRole - name: aks-node-termination-handler + name: {{ .Release.Name }} apiGroup: rbac.authorization.k8s.io \ No newline at end of file diff --git a/charts/aks-node-termination-handler/values.yaml b/charts/aks-node-termination-handler/values.yaml index b213d05..b76d231 100644 --- a/charts/aks-node-termination-handler/values.yaml +++ b/charts/aks-node-termination-handler/values.yaml @@ -10,7 +10,7 @@ labels: {} configMap: create: true - name: aks-node-termination-handler-files + name: "{{ .Release.Name }}-files" mountPath: /files data: {} # slack-payload.json: | @@ -40,6 +40,8 @@ securityContext: windowsOptions: runAsUserName: "ContainerUser" +affinity: {} + tolerations: - key: "kubernetes.azure.com/scalesetpriority" operator: "Equal"