diff --git a/.github/workflows/core.yaml b/.github/workflows/core.yaml new file mode 100644 index 0000000..e269643 --- /dev/null +++ b/.github/workflows/core.yaml @@ -0,0 +1,65 @@ +name: Core CI/CD workflow + +on: + push: + # For GitFlow the following patterns are suggested: + # - master + # - develop + # - feature/* + # - release/* + # - hotfix/* + # + # For an action with build_push_image=false the following patterns are suggested: + # - master + branches: + - master + +env: + # AWS region of the AWS account storing images in ECR. + CORE_AWS_REGION: eu-north-1 + # the latest action version + ECR_BUILD_PUSH_ACTION_VERSION: v2 + +jobs: + ecr-build-push: + name: Checkout main and ecr-build-push action repositories, run ecr-build-push action + if: ${{ !contains(github.event.head_commit.message, '[ci skip]') }} + runs-on: ubuntu-20.04 + steps: + - name: Checkout main repository + uses: actions/checkout@v2 + with: + ref: ${{ github.ref }} + fetch-depth: 0 + + - name: Checkout ecr-build-push action repository + uses: actions/checkout@v2 + with: + repository: edenlabllc/fhir.ecr_build_push.action + ref: ${{ env.ECR_BUILD_PUSH_ACTION_VERSION }} + token: ${{ secrets.GH_TOKEN_REPO_FULL_ACCESS }} + path: .github/actions/ecr-build-push + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-region: ${{ env.CORE_AWS_REGION }} + aws-access-key-id: ${{ secrets.CORE_AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.CORE_AWS_SECRET_ACCESS_KEY }} + + - name: Login to AWS ECR + uses: aws-actions/amazon-ecr-login@v1 + + - name: Pull ecr-build-push action image from AWS ECR + run: | + ECR_REGISTRY_URL="${{ secrets.CORE_AWS_ACCOUNT_ID }}.dkr.ecr.${{ env.CORE_AWS_REGION }}.amazonaws.com" + IMAGE_NAME="core.fhir.ecr_build_push.action" + IMAGE_FULL_NAME="${ECR_REGISTRY_URL}/${IMAGE_NAME}:${{ env.ECR_BUILD_PUSH_ACTION_VERSION }}" + docker pull "${IMAGE_FULL_NAME}" + docker tag "${IMAGE_FULL_NAME}" "${IMAGE_NAME}" + + - name: Run ecr-build-push action + id: ecr-build-push + uses: ./.github/actions/ecr-build-push + with: + build_push_image: false diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..600ba75 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.DS_Store +.idea/ +.env +!**.gitkeep +bin/restore-volume-snapshot/pvc-* diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..3230053 --- /dev/null +++ b/README.md @@ -0,0 +1,178 @@ +# helmfile.hooks.infra + +[![Release](https://img.shields.io/github/v/release/edenlabllc/helmfile.hooks.infra.svg?style=for-the-badge)](https://github.com/edenlabllc/helmfile.hooks.infra/releases/latest) +[![Software License](https://img.shields.io/github/license/edenlabllc/helmfile.hooks.infra.svg?style=for-the-badge)](LICENSE) +[![Powered By: Edenlab](https://img.shields.io/badge/powered%20by-edenlab-8A2BE2.svg?style=for-the-badge)](https://edenlab.io) + +This repository provides shell scripts for the [Helmfile hooks](https://helmfile.readthedocs.io/en/latest/#hooks). +Mainly it is designed to be managed by administrators, DevOps engineers, SREs. + +## Contents + +* [Requirements](#requirements) +* [Git workflow](#git-workflow) +* [Additional information](#additional-information) +* [Development](#development) +* [Upgrading EKS cluster](#upgrading-eks-cluster) + * [General EKS upgrade instructions](#general-eks-upgrade-instructions) + * [Overview of EKS upgrade scripts](#overview-of-eks-upgrade-scripts) + * [Upgrading to EKS 1.27](#upgrading-to-eks-127) + +## Requirements + +`helm`, `kubectl`, `jq`, `yq` = version are specified in the [project.yaml](https://github.com/edenlabllc/rmk/blob/develop/docs/configuration/project-management/preparation-of-project-repository.md#projectyaml) file +of each project of the repository in the `tools` section. + +## Git workflow + +This repository uses the classic [GitFlow](https://www.atlassian.com/git/tutorials/comparing-workflows/gitflow-workflow) workflow, +embracing all its advantages and disadvantages. + +**Stable branches:** develop, master + +Each merge into the master branch adds a new [SemVer2](https://semver.org/) tag and a GitHub release is created. + +## Additional information + +* This set of hook scripts can only be launched from the project repository via [RMK](https://github.com/edenlabllc/rmk), + because the entire input set of the variables is formed by [RMK](https://github.com/edenlabllc/rmk) at the moment the release commands are launched, e.g.: + + ```shell + rmk release sync + ``` + + [RMK](https://github.com/edenlabllc/rmk) also keeps track of which version of the release hook scripts the project repository will use. + The version of the hook scripts artifact is described in the [project.yaml](https://github.com/edenlabllc/rmk/blob/develop/docs/configuration/project-management/preparation-of-project-repository.md#projectyaml) file + of each project repository in the `inventory.hooks` section, e.g.: + + ```yaml + inventory: + # ... + hooks: + helmfile.hooks.infra: + version: + url: git::https://github.com/edenlabllc/{{.Name}}.git?ref={{.Version}} + # ... + ``` +* The hook scripts are designed to ensure consistent deployment of Helm releases described in a Helmfile. + These scripts should be designed with declarative deployment in mind. + They will only execute when there are differences in the state. + +## Development + +For development, navigate to the local `.PROJECT/inventory/hooks/helmfile.hooks.infra-/bin` directory of a project repository, +then perform the changes directly in the files and test them. Finally, copy the changed files to a new feature branch +of this repository and create a pull request (PR). + +## Upgrading EKS cluster + +### General EKS upgrade instructions + +The list of official [EKS](https://aws.amazon.com/eks/) upgrade instructions is +https://docs.aws.amazon.com/eks/latest/userguide/update-cluster.html + +> Only self-managed [EKS addons](https://docs.aws.amazon.com/eks/latest/userguide/eks-add-ons.html) are used. This means that we install all the AWS-related releases via `Helmfile` like any other release. + +In general, the steps are the following (should be executed in the specified order): + +1. Make the needed changes to the project repository: + - Upgrade components in [project.yaml](https://github.com/edenlabllc/rmk/blob/develop/docs/configuration/project-management/preparation-of-project-repository.md#projectyaml). + - Investigate recent changes in case a chart was upgraded, adjust the release values so a new chart is synced correctly. + > This might be required in case of any incompatibilities between a release and K8S versions. + - If required, enable/disable releases in `etc///releases.yaml`. + - Run `rmk secret manager generate` and `rmk secret manager encode` to generate new secrets by a template. + > Environments variables might be required by the `generate` command. + - Resolve recommended [AMI](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AMIs.html) ID and set in `etc/clusters/aws//values/worker-groups.auto.tfvars`. + > Each K8S version has it own recommended AMI image, see the instructions: https://docs.aws.amazon.com/eks/latest/userguide/retrieve-ami-id.html + - Set the desired K8S version in `k8s_cluster_version` in `etc/clusters/aws//values/variables.auto.tfvars`. +2. Resolve recommended `kube-proxy`, `coredns` versions and set it in `upgrade-nodes.sh`. + > See the following instructions: \ + > https://docs.aws.amazon.com/eks/latest/userguide/managing-kube-proxy.html \ + > https://docs.aws.amazon.com/eks/latest/userguide/managing-coredns.html#coredns-add-on-self-managed-update +3. Sync helm releases for all scopes: `upgrade-releases.sh` + > In general, the script will only contain `rmk release sync`. However, a more complex set might be included. +4. Upgrade the K8S control plane and the system components (1 K8S version will be upgraded per iteration): `upgrade-cluster.sh` +5. Rolling-update nodes, fix [AZs](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html) for [PVs](https://kubernetes.io/docs/concepts/storage/persistent-volumes/) for each [ASG](https://docs.aws.amazon.com/autoscaling/ec2/userguide/auto-scaling-groups.html): `upgrade-nodes.sh` +6. Validate cluster health and perform some end-to-end tests. + +### Overview of EKS upgrade scripts + +There is a dedicated directory for all the K8S upgrade scripts: `bin/k8s-upgrade/` + +The scripts are grouped by a _target_ K8S version, e.g.: `bin/k8s-upgrade/1.27/` + +The main script is `upgrade-all.sh`. It is a wrapper around the subscripts, the execution is ordered strictly. + +The subscripts are `upgrade-releases.sh`, `upgrade-cluster.sh`, `upgrade-nodes.sh`. + +> Other scripts might be implemented and added to `upgrade-all.sh` to handle any non-trivial upgrade steps. + +The logic in the scripts is pretty straightforward. Most of the instructions are executed linearly one by one +and can be considered as some kind of one-time "migrations". + +> It is recommended to investigate the scripts logic before applying to a K8S cluster. + +#### Upgrading to EKS 1.27 + +The scripts support upgrading K8S from a minimal version of `1.23` to `1.27`. + +**Requirements:** + +* [RMK](https://github.com/edenlabllc/rmk) >= v0.41.0 +* [AWS CLI](https://aws.amazon.com/cli/) >= 2.9 +* [eksctl](https://eksctl.io/) >= v0.160.0 +* [yq](https://mikefarah.gitbook.io/yq) >= v4.35.2 + +> The current upgrade covers 4 minor versions, therefore the logic is complex. For the next versions, +> it might have been simplified greatly, when upgrading to the closest version only, e.g. from `1.27` to `1.28`. + +> The scripts should be used as a reference point when implementing other upgrade scripts for future versions. +> They should be idempotent and can be re-executed in case of unexpected errors, e.g. connection timeout/error. +> In case of small and reduced clusters, the scripts should check whether a corresponding release exists before applying the changes. + +The list of scripts: +- [upgrade-all.sh](bin/k8s-upgrade/1.27/upgrade-all.sh) - Initialize [RMK](https://github.com/edenlabllc/rmk) configuration, calling rest of scripts one by one (the main upgrade script). +- [upgrade-releases.sh](bin/k8s-upgrade/1.27/upgrade-releases.sh) - Upgrade all releases. The following subscripts are executed: + - [upgrade-kafka-operator.sh](bin/k8s-upgrade/1.27/upgrade-kafka-operator.sh) - Upgrade the [kafka](https://kafka.apache.org/) [operator](https://strimzi.io/). + - [upgrade-postgres-operator.sh](bin/k8s-upgrade/1.27/upgrade-postgres-operator.sh) - Upgrade the [postgres](https://www.postgresql.org/) [operator](https://postgres-operator.readthedocs.io/en/latest/). + - [upgrade-loki-stack.sh](bin/k8s-upgrade/1.27/upgrade-loki-stack.sh) - Upgrade the [loki stack](https://github.com/grafana/helm-charts/tree/main/charts/loki-stack). + - [upgrade-linkerd-planes.sh](bin/k8s-upgrade/1.27/upgrade-linkerd-planes.sh) - Upgrade [Linkerd](https://linkerd.io/) to the latest version (executes the full `release sync` command multiple times). + > This is the most complex script, because the Linkerd charts have been reorganized recently and split into multiple ones. + > Therefore, the scripts contain some tricky parts, e.g. forcing pod restarts manually. In general, it is needed for some of the releases which freeze during the upgrade at some point. +- [upgrade-cluster.sh](bin/k8s-upgrade/1.27/upgrade-cluster.sh) - Upgrade the K8S control plane and system worker node components (1 K8S version per iteration). +- [upgrade-nodes.sh](bin/k8s-upgrade/1.27/upgrade-nodes.sh) - Rolling-update all the K8S worker nodes. + +Before running the scripts you should disable Linkerd in globals **without committing** the changes. +This changes will be reverted automatically in the middle of execution of `upgrade-releases.sh`. + +To list all the globals files that should be changed before the execution: + +```shell +ls -alh etc/*//globals.yaml.gotmpl +``` + +Current file content: + +```yaml +configs: + # ... + linkerd: + # enable/disable linkerd-await at runtime: true|false + await: true + # enable/disable linkerd sidecar injection: enabled|disabled + inject: enabled + # ... +``` + +Expected file content before `upgrade-all.sh` is executed: + +```yaml +configs: + # ... + linkerd: + # enable/disable linkerd-await at runtime: true|false + await: false + # enable/disable linkerd sidecar injection: enabled|disabled + inject: disabled + # ... +``` diff --git a/bin/actions-runner-postsync-hook.sh b/bin/actions-runner-postsync-hook.sh new file mode 100755 index 0000000..d6c1837 --- /dev/null +++ b/bin/actions-runner-postsync-hook.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +set -e + +RELEASE_NAME="${1:-actions-runner}" +NAMESPACE="${2:-actions-runner}" + +LIMIT=180 + +# Note: The hook is only valid for static self-hosted runners (not created by HorizontalRunnerAutoscaler from 0) +GO_TEMPLATE=' + {{- range .items -}} + {{- if not .status.updatedReplicas -}}0{{- else if gt .status.replicas .status.updatedReplicas -}}0{{- end -}} + {{- if not .status.readyReplicas -}}0{{- else if ne .status.replicas .status.readyReplicas -}}0{{- end -}} + {{- end -}} +' + +COUNT=1 +while true; do + STATUS="$(kubectl -n "${NAMESPACE}" get runnerdeployment,runnerset -l "app.kubernetes.io/instance=${RELEASE_NAME}" -o "go-template=${GO_TEMPLATE}")" + if [[ "${STATUS}" != "" && "${COUNT}" -le "${LIMIT}" ]]; then + sleep 1 + ((++COUNT)) + elif [[ "${COUNT}" -gt "${LIMIT}" ]]; then + >2& echo "Limit exceeded." + exit 1 + else + kubectl -n "${NAMESPACE}" get runnerdeployment,runnerset -l "app.kubernetes.io/instance=${RELEASE_NAME}" + break + fi +done diff --git a/bin/add-image-pull-secrets-to-service-account.sh b/bin/add-image-pull-secrets-to-service-account.sh new file mode 100755 index 0000000..104fc33 --- /dev/null +++ b/bin/add-image-pull-secrets-to-service-account.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +set -e + +ENABLE_HOOK="${1}" +ENSURE_SA_CREATED="${2}" +NAMESPACE="${3}" +SA_NAME="${4}" +IPS_NAME="${5}" + +if [[ "${ENABLE_HOOK}" != "true" ]]; then + echo "Skipped." + exit +fi + +if [[ "${ENSURE_SA_CREATED}" == "true" ]]; then + echo "Ensuring service account \"${SA_NAME}\" created..." + if ! (kubectl -n "${NAMESPACE}" get serviceaccount "${SA_NAME}" &> /dev/null); then + # the same service account might have just been created by the hook of another PG (a race condition) + # in this case, suppress the error using "true" + kubectl -n "${NAMESPACE}" create serviceaccount "${SA_NAME}" || true + fi +fi + +kubectl -n "${NAMESPACE}" patch serviceaccount "${SA_NAME}" --type='merge' \ + -p '{"imagePullSecrets": [{"name": "'"${IPS_NAME}"'"}]}' \ + || exit 1 diff --git a/bin/annotate-k8s-resource.sh b/bin/annotate-k8s-resource.sh new file mode 100755 index 0000000..2f66565 --- /dev/null +++ b/bin/annotate-k8s-resource.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +set -e + +CURRENT_ENVIRONMENT="${1}" +EXPECTED_ENVIRONMENT="${2}" +K8S_NAMESPACE="${3}" +K8S_RESOURCE_TYPE="${4}" +K8S_RESOURCE_NAME="${5}" +# get rest of arguments +K8S_ANNOTATIONS=("${@}") +K8S_ANNOTATIONS="${K8S_ANNOTATIONS[@]:5}" + +if [[ "${CURRENT_ENVIRONMENT}" != "${EXPECTED_ENVIRONMENT}" ]]; then + echo "Environment ${CURRENT_ENVIRONMENT} skipped when annotating, expected: ${EXPECTED_ENVIRONMENT}" + exit +fi + +kubectl -n "${K8S_NAMESPACE}" annotate --overwrite "${K8S_RESOURCE_TYPE}" "${K8S_RESOURCE_NAME}" ${K8S_ANNOTATIONS} diff --git a/bin/aws-vpc-cni-presync-hook.sh b/bin/aws-vpc-cni-presync-hook.sh new file mode 100755 index 0000000..8d0a116 --- /dev/null +++ b/bin/aws-vpc-cni-presync-hook.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +set -e + +CNI_RELEASE_NAME="${1:-aws-vpc-cni}" +CNI_RELEASE_ENABLED="${2:-false}" +CNI_RESOURCE_NAME="${3:-aws-node}" +CNI_CONFIGMAP_NAME="${4:-amazon-vpc-cni}" + +function set_annotations() { + echo "Setting annotations and labels on ${1}/${2}..." + kubectl -n kube-system annotate --overwrite "${1}" "${2}" meta.helm.sh/release-name="${CNI_RELEASE_NAME}" + kubectl -n kube-system annotate --overwrite "${1}" "${2}" meta.helm.sh/release-namespace=kube-system + kubectl -n kube-system label --overwrite "${1}" "${2}" app.kubernetes.io/managed-by=Helm +} + +if [[ "${CNI_RELEASE_ENABLED}" == "true" ]]; then + for KIND in daemonSet clusterRole clusterRoleBinding serviceAccount configMap; do + if (kubectl get "${KIND}" "${CNI_RESOURCE_NAME}" -n kube-system &> /dev/null); then + set_annotations "${KIND}" "${CNI_RESOURCE_NAME}" + elif (kubectl get "${KIND}" "${CNI_CONFIGMAP_NAME}" -n kube-system &> /dev/null); then + set_annotations "${KIND}" "${CNI_CONFIGMAP_NAME}" + fi + done +fi diff --git a/bin/check-pvcs-deleted.sh b/bin/check-pvcs-deleted.sh new file mode 100755 index 0000000..7e3200e --- /dev/null +++ b/bin/check-pvcs-deleted.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +set -e + +NAMESPACE="${1}" +K8S_LABELS=("${@}") +K8S_LABELS="${K8S_LABELS[@]:1}" + +LIMIT=120 +PVC_IDS=( "$(kubectl -n "${NAMESPACE}" get pvc -l "${K8S_LABELS/ /,}" -o yaml | yq '.items[].spec.volumeName')" ) + +for PVC_ID in ${PVC_IDS[*]}; do + COUNT=1 + while (kubectl get pv "${PVC_ID}" &> /dev/null); do + if (( COUNT > LIMIT )); then + >2& echo "Limit exceeded." + exit 1 + fi + + echo "PV name: ${PVC_ID} in the process of being removed." + sleep 1 + ((++COUNT)) + done +done diff --git a/bin/clickhouse-hook.sh b/bin/clickhouse-hook.sh new file mode 100755 index 0000000..ec8fe8b --- /dev/null +++ b/bin/clickhouse-hook.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +set -e + +RELEASE_NAME="${1:-clickhouse}" +NAMESPACE="${2:-clickhouse}" +ACTION="${3}" + +LIMIT=180 +COUNT=1 + +function watcher() { + if [[ "${STATUS}" != "${PHRASE}" && "${COUNT}" -le "${LIMIT}" ]]; then + sleep 1 + ((++COUNT)) + elif [[ "${COUNT}" -gt "${LIMIT}" ]]; then + >2& echo "Limit exceeded." + exit 1 + else + kubectl -n "${NAMESPACE}" get clickHouseinstallation 2>&1 + exit 0 + fi +} + +if [[ "${ACTION}" == "delete" ]]; then + PHRASE="No resources found in ${NAMESPACE} namespace." + while true; do + STATUS=$(kubectl -n "${NAMESPACE}" get clickHouseinstallation 2>&1) + watcher + done +else + PHRASE="Completed" + while true; do + STATUS=$(kubectl -n "${NAMESPACE}" get clickHouseinstallation | head -2 | grep "${RELEASE_NAME}" | awk '{print $4}') + watcher + done +fi diff --git a/bin/common-postuninstall-hook.sh b/bin/common-postuninstall-hook.sh new file mode 100755 index 0000000..e318af5 --- /dev/null +++ b/bin/common-postuninstall-hook.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +set -e + +NAMESPACE="${1}" + +kubectl delete namespaces "${NAMESPACE}" diff --git a/bin/create-mongodb-user.sh b/bin/create-mongodb-user.sh new file mode 100755 index 0000000..0712526 --- /dev/null +++ b/bin/create-mongodb-user.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash + +set -e + +MONGODB_NAMESPACE=${1} +MONGODB_RELEASE_NAME=${2} +MONGODB_CONTAINER_NAME=mongodb + +MONGODB_USERNAME=${3} +MONGODB_PASSWORD=${4} +MONGODB_DATABASE=${5} + +MONGODB_USER_PERMISSIONS=readWrite + +function mongodb_exec() { + kubectl -n "${MONGODB_NAMESPACE}" exec --stdin "${MONGODB_RELEASE_NAME}-0" --container "${MONGODB_CONTAINER_NAME}" -- bash -c "${1}" +} + +function check_user() { + mongodb_exec 'mongo -u root -p ${MONGODB_ROOT_PASSWORD} --quiet --eval \ + "result=db.getSiblingDB(\"'"${MONGODB_DATABASE}"'\").getUser(\"'"${MONGODB_USERNAME}"'\"); \ + result.userId=result.userId.toString(); \ + print(JSON.stringify(result))"' +} + +function create_user() { + mongodb_exec 'mongo -u root -p ${MONGODB_ROOT_PASSWORD} --quiet --eval \ + "db.getSiblingDB(\"'"${MONGODB_DATABASE}"'\").createUser({ user: \"'"${MONGODB_USERNAME}"'\", \ + pwd: \"'"${MONGODB_PASSWORD}"'\", roles: [{role: \"'"${MONGODB_USER_PERMISSIONS}"'\", db: \"'"${MONGODB_DATABASE}"'\"}] })"' +} + +set +e +CHECK_USER=$(check_user 2> /dev/null) +RESULT=$(echo "${CHECK_USER}" | jq '.roles[].role' -r 2> /dev/null) +set -e + +MESSAGE="MongoDB user \"${MONGODB_USERNAME}\" with permissions \"${MONGODB_USER_PERMISSIONS}\" to database \"${MONGODB_DATABASE}\"" +if [[ "${RESULT}" != "${MONGODB_USER_PERMISSIONS}" ]]; then + echo "Creating ${MESSAGE}..." + create_user + echo "Done." +else + echo "${MESSAGE} already exists." +fi diff --git a/bin/create-postgres-user.sh b/bin/create-postgres-user.sh new file mode 100755 index 0000000..4a3f36e --- /dev/null +++ b/bin/create-postgres-user.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash + +set -e + +NAMESPACE=${1} +PG_DB_NAMES=("${2}") +PG_DB_USERNAME="${3}" +PG_CLUSTER_NAME="${4:-postgres-cluster}" +PG_NAMESPACE="${5:-postgres}" +PG_CRD_NAME="${6:-postgresql}" +PG_ENABLE_DEFAULT_USERS="${7:-false}" + +function create_default_user() { + for DB in ${PG_DB_NAMES[*]}; do + local DEFAULT_OWNER_USER="${DB}_owner_user" + if ! (kubectl get secrets -n "${NAMESPACE}" | grep "^${DEFAULT_OWNER_USER//_/-}\.${PG_CLUSTER_NAME}" &> /dev/null); then + kubectl patch "${PG_CRD_NAME}" "${PG_CLUSTER_NAME}" -n "${PG_NAMESPACE}" --type='merge' \ + -p '{"spec":{"databases":{"'"${DB}"'":"'"${DEFAULT_OWNER_USER}"'"}}}' + kubectl patch "${PG_CRD_NAME}" "${PG_CLUSTER_NAME}" -n "${PG_NAMESPACE}" --type='merge' \ + -p '{"spec":{"preparedDatabases":{"'"${DB}"'":{"defaultUsers":true,"schemas":{"public":{"defaultRoles":false}},"secretNamespace":"'"${NAMESPACE}"'"}}}}' + fi + done + + sleep 5 +} + +function create_custom_user() { + if ! (kubectl get secrets -n "${NAMESPACE}" | grep "^${NAMESPACE}\.${PG_DB_USERNAME//_/-}\.${PG_CLUSTER_NAME}" &> /dev/null); then + kubectl patch "${PG_CRD_NAME}" "${PG_CLUSTER_NAME}" -n "${PG_NAMESPACE}" --type='merge' \ + -p '{"spec":{"users":{"'"${NAMESPACE}"'.'"${PG_DB_USERNAME}"'":["createdb"]}}}' + for DB in ${PG_DB_NAMES[*]}; do + kubectl patch "${PG_CRD_NAME}" "${PG_CLUSTER_NAME}" -n "${PG_NAMESPACE}" --type='merge' \ + -p '{"spec":{"databases":{"'"${DB}"'":"'"${NAMESPACE}"'.'"${PG_DB_USERNAME}"'"}}}' + done + + sleep 5 + fi +} + +function create_user_postgresql() { + if [[ "${PG_ENABLE_DEFAULT_USERS}" == "true" ]]; then + create_default_user + return 0 + fi + + create_custom_user +} + +if ! (kubectl get namespace "${NAMESPACE}" &> /dev/null); then + kubectl create namespace "${NAMESPACE}" + create_user_postgresql +else + create_user_postgresql +fi diff --git a/bin/dagster-presync-hook.sh b/bin/dagster-presync-hook.sh new file mode 100755 index 0000000..fdc5132 --- /dev/null +++ b/bin/dagster-presync-hook.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +set -e + +NAMESPACE=${1:-dagster} +SECRET_NAME_INPUT=${2:-dagster.user.postgres-cluster.credentials.postgresql.acid.zalan.do} +SECRET_NAME_OUTPUT=${3:-dagster-postgresql-secret} +MASKS=(${4}) +LIMIT=180 + +function check_input_secret_exist() { + COUNT=0 + while true; do + if (kubectl -n "${NAMESPACE}" get secrets "${SECRET_NAME_INPUT}" --ignore-not-found | grep "${SECRET_NAME_INPUT}"); then + break + fi + + if [[ "${COUNT}" -le "${LIMIT}" ]]; then + sleep 1 + ((++COUNT)) + else + >2& echo "Limit exceeded." + exit 1 + fi + done +} + +function _delete_secret() { + if (kubectl -n "${NAMESPACE}" get secrets "${SECRET_NAME_OUTPUT}" --ignore-not-found | grep "${NAMESPACE}"); then + kubectl -n "${NAMESPACE}" delete secrets "${SECRET_NAME_OUTPUT}" --ignore-not-found + fi +} + +function get_secret_keys() { + check_input_secret_exist + KUBECTL_FLAGS="" + + for KEY_VAL in "${MASKS[@]}"; do + OUTPUT=$(kubectl -n "${NAMESPACE}" get secrets "${SECRET_NAME_INPUT}" -o yaml | yq "${KEY_VAL/*=/}" | base64 -D) + KUBECTL_FLAGS="${KUBECTL_FLAGS} --from-literal=${KEY_VAL/=*/}=${OUTPUT}" + done +} + +function create_secrets() { + _delete_secret + get_secret_keys + kubectl -n "${NAMESPACE}" create secret generic "${SECRET_NAME_OUTPUT}" ${KUBECTL_FLAGS} +} + +create_secrets diff --git a/bin/delete-failed-job.sh b/bin/delete-failed-job.sh new file mode 100755 index 0000000..5f56f39 --- /dev/null +++ b/bin/delete-failed-job.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +set -e + +JOB_NAME="${1}" +NAMESPACE="${2}" + +if (kubectl get jobs -n "${NAMESPACE}" | grep "${JOB_NAME}"); then + JOB_ID=$(kubectl get jobs -n "${NAMESPACE}" | grep "${JOB_NAME}" | awk '{print $1}') + FAILED_COUNT=$(kubectl get job "${JOB_ID}" -n "${NAMESPACE}" -o json | jq '.status.failed' -r) + if (( "${FAILED_COUNT}" > 0 )); then + kubectl delete job "${JOB_ID}" -n "${NAMESPACE}" + fi +fi diff --git a/bin/elastic-postsync-hook.sh b/bin/elastic-postsync-hook.sh new file mode 100755 index 0000000..bae9460 --- /dev/null +++ b/bin/elastic-postsync-hook.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +set -e + +if [[ "${SKIP_ELASTIC_POSTSYNC_HOOK}" == "true" ]]; then + echo "Skipped." + exit 0 +fi + +RELEASE_NAME="${1:-elastic}" +NAMESPACE="${2:-elastic}" + +LIMIT=120 +COUNT=1 + +while true; do + STATUS=$(kubectl -n "${NAMESPACE}" get elasticsearch "${RELEASE_NAME}" -o yaml | yq '.status.phase') + if [[ "${STATUS}" != "Ready" && "${COUNT}" -le "${LIMIT}" ]]; then + sleep 1 + ((++COUNT)) + elif [[ "${COUNT}" -gt "${LIMIT}" ]]; then + >2& echo "Limit exceeded." + exit 1 + else + kubectl -n "${NAMESPACE}" get elastic | grep "${RELEASE_NAME}" + break + fi +done diff --git a/bin/fix-k8s-api-mappings.sh b/bin/fix-k8s-api-mappings.sh new file mode 100755 index 0000000..27c7d0a --- /dev/null +++ b/bin/fix-k8s-api-mappings.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +set -e + +NAMESPACE="${1}" +RELEASE_NAME="${2}" + +PLUGIN_NAME="mapkubeapis" + +if ! helm plugin list | grep -q "${PLUGIN_NAME}"; then + echo "Helm plugin ${PLUGIN_NAME} not installed." + echo "Skipped." +elif [[ -z "$(helm -n "${NAMESPACE}" list --deployed --short --filter '^'${RELEASE_NAME}'$')" ]]; then + echo "Release not deployed. No need to check the API mappings." + echo "Skipped." +else + helm "${PLUGIN_NAME}" --namespace "${NAMESPACE}" "${RELEASE_NAME}" +fi diff --git a/bin/k8s-upgrade/1.27/rotate-linkerd-certs.sh b/bin/k8s-upgrade/1.27/rotate-linkerd-certs.sh new file mode 100755 index 0000000..25286d5 --- /dev/null +++ b/bin/k8s-upgrade/1.27/rotate-linkerd-certs.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +set -e + +function detect_linkerd() { + # shellcheck disable=SC2016 + KODJIN_LINKERD_STATUS="$(kubectl get deployment --namespace=fhir-server --output=yaml | \ + yq '.items[] | select(.spec.template.metadata.annotations."linkerd.io/inject" == "'"${1}"'") | .metadata | .name as $n | .namespace += "="+$n | .namespace')" +} + +function patch() { + for MAP in ${1}; do + MAP="${MAP/=/ }" + MAP=(${MAP}) + + NAMESPACE="${MAP[0]}" + NAME="${MAP[1]}" + + kubectl patch deployment "${NAME}" --patch='{"spec":{"template":{"metadata":{"annotations":{"linkerd.io/inject": "'"${2}"'"}}}}}' --namespace="${NAMESPACE}" + done +} + +detect_linkerd enabled + +patch "${KODJIN_LINKERD_STATUS}" disabled + +detect_linkerd disabled + +patch "${KODJIN_LINKERD_STATUS}" enabled diff --git a/bin/k8s-upgrade/1.27/run-tests.sh b/bin/k8s-upgrade/1.27/run-tests.sh new file mode 100755 index 0000000..89bc9fd --- /dev/null +++ b/bin/k8s-upgrade/1.27/run-tests.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash + +set -e + +export PATH="${HOME}/.local/bin:${PATH}" + +# Note: In future, fhir-postgres, elt-postgres might be added. + +readonly POSTGRES_NAMESPACE="postgres" +readonly POSTGRES_RELEASE_NAME="postgres" + +# Example output: +#- Cluster: postgres-cluster +# Host: 10.1.2.38 +# Member: postgres-cluster-0 +# Role: Leader +# State: running +# TL: 7 +#- Cluster: postgres-cluster +# Host: 10.1.6.248 +# Lag in MB: 0 +# Member: postgres-cluster-1 +# Role: Sync Standby +# State: running +# TL: 7 +echo "Showing information about Patroni cluster and its members of ${POSTGRES_RELEASE_NAME}..." +readonly POSTGRES_CLUSTER_LIST="$(kubectl -n "${POSTGRES_NAMESPACE}" exec -it -c postgres "${POSTGRES_RELEASE_NAME}-cluster-0" -- patronictl list -f yaml)" +echo "${POSTGRES_CLUSTER_LIST}" + +echo "Checking all the members are running..." +if [[ "$(echo "${POSTGRES_CLUSTER_LIST}" | yq '([.[] | select(.State == "running")] | length) == (. | length)')" == "true" ]]; then + echo "OK." +else + >&2 echo "ERROR: Not all the members are running." + exit 1 +fi + +echo "Checking all the members have correct roles..." +if [[ "$(echo "${POSTGRES_CLUSTER_LIST}" | yq '([.[] | select(.Role == "Leader")] | length) == 1')" == "true" ]] \ + && [[ "$(echo "${POSTGRES_CLUSTER_LIST}" | yq '([.[] | select(.Role == "Sync Standby")] | length) == 1')" == "true" ]]; then + echo "OK." +else + >&2 echo "ERROR: The roles are not \"Leader\" and \"Sync Standby\"." + exit 1 +fi diff --git a/bin/k8s-upgrade/1.27/upgrade-all.sh b/bin/k8s-upgrade/1.27/upgrade-all.sh new file mode 100755 index 0000000..707ec22 --- /dev/null +++ b/bin/k8s-upgrade/1.27/upgrade-all.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +set -e + +export PATH="${HOME}/.local/bin:${PATH}" + +echo "Initializing cluster configuration..." +rmk update +rmk config init +rmk cluster switch -f + +echo +"$(dirname "${BASH_SOURCE}")/upgrade-releases.sh" + +echo +"$(dirname "${BASH_SOURCE}")/upgrade-cluster.sh" + +echo +"$(dirname "${BASH_SOURCE}")/upgrade-nodes.sh" + +echo +"$(dirname "${BASH_SOURCE}")/run-tests.sh" diff --git a/bin/k8s-upgrade/1.27/upgrade-cluster.sh b/bin/k8s-upgrade/1.27/upgrade-cluster.sh new file mode 100755 index 0000000..e265a12 --- /dev/null +++ b/bin/k8s-upgrade/1.27/upgrade-cluster.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash + +set -e + +export PATH="${HOME}/.local/bin:${PATH}" + +readonly NAME="$(rmk -ll error config view | yq '.name')" +CLUSTER_NAME="$(rmk -ll error config view | yq '.exported-vars.env.CLUSTER_NAME')" +if [[ "${CLUSTER_NAME}" == "null" ]]; then + CLUSTER_NAME="${NAME}-eks" +fi +CURRENT_CLUSTER_VERSION="$(eksctl get cluster --name "${CLUSTER_NAME}" -o yaml | yq '.[0].Version')" + +export AWS_PROFILE="$(rmk -ll error config view | yq '.aws.profile')" +export AWS_CONFIG_FILE="${HOME}/.aws/config_${AWS_PROFILE}" +export AWS_SHARED_CREDENTIALS_FILE="${HOME}/.aws/credentials_${AWS_PROFILE}" + +readonly NAMESPACE="kube-system" +readonly KUBE_PROXY_RELEASE_NAME="kube-proxy" +readonly COREDNS_RELEASE_NAME="coredns" + +# https://docs.aws.amazon.com/eks/latest/userguide/managing-kube-proxy.html +KUBE_PROXY_IMAGE_PREFIX="$(kubectl -n "${NAMESPACE}" get daemonset "${KUBE_PROXY_RELEASE_NAME}" -o yaml | yq '.spec.template.spec.containers[0].image')" +KUBE_PROXY_IMAGE_PREFIX="${KUBE_PROXY_IMAGE_PREFIX%:*}" +# https://docs.aws.amazon.com/eks/latest/userguide/managing-coredns.html +COREDNS_IMAGE_PREFIX="$(kubectl -n "${NAMESPACE}" get deployment "${COREDNS_RELEASE_NAME}" -o yaml | yq '.spec.template.spec.containers[0].image')" +COREDNS_IMAGE_PREFIX="${COREDNS_IMAGE_PREFIX%:*}" + +# https://docs.aws.amazon.com/eks/latest/userguide/update-cluster.html +# https://docs.aws.amazon.com/eks/latest/userguide/kubernetes-versions.html +function upgrade_cluster() { + local DESIRED_CLUSTER_VERSION="${1}" + local KUBE_PROXY_VERSION="${2}" + local COREDNS_VERSION="${3}" + + echo + echo "Current cluster version: ${CURRENT_CLUSTER_VERSION}" + echo "Desired cluster version: ${DESIRED_CLUSTER_VERSION}" + if [[ "${CURRENT_CLUSTER_VERSION//./,}" -ge "${DESIRED_CLUSTER_VERSION//./,}" ]]; then + echo "No control plane upgrade needed." + else + eksctl upgrade cluster --name "${CLUSTER_NAME}" --version "${DESIRED_CLUSTER_VERSION}" --approve + CURRENT_CLUSTER_VERSION="${DESIRED_CLUSTER_VERSION}" + fi + + if [[ "${CURRENT_CLUSTER_VERSION//./,}" -eq "${DESIRED_CLUSTER_VERSION//./,}" ]]; then + kubectl -n "${NAMESPACE}" set image daemonset "${KUBE_PROXY_RELEASE_NAME}" kube-proxy="${KUBE_PROXY_IMAGE_PREFIX}:${KUBE_PROXY_VERSION}" + kubectl -n "${NAMESPACE}" rollout status daemonset "${KUBE_PROXY_RELEASE_NAME}" + kubectl -n "${NAMESPACE}" set image deployment "${COREDNS_RELEASE_NAME}" coredns="${COREDNS_IMAGE_PREFIX}:${COREDNS_VERSION}" + kubectl -n "${NAMESPACE}" rollout status deployment "${COREDNS_RELEASE_NAME}" + fi +} + +echo "Upgrading K8S cluster iteratively..." +upgrade_cluster "1.24" "v1.24.17-minimal-eksbuild.2" "v1.9.3-eksbuild.7" +upgrade_cluster "1.25" "v1.25.14-minimal-eksbuild.2" "v1.9.3-eksbuild.7" +upgrade_cluster "1.26" "v1.26.9-minimal-eksbuild.2" "v1.9.3-eksbuild.7" +upgrade_cluster "1.27" "v1.27.6-minimal-eksbuild.2" "v1.10.1-eksbuild.4" + +echo +echo "Provisioning latest AMI IDs and K8S version..." +rmk cluster provision diff --git a/bin/k8s-upgrade/1.27/upgrade-kafka-operator.sh b/bin/k8s-upgrade/1.27/upgrade-kafka-operator.sh new file mode 100755 index 0000000..e674f85 --- /dev/null +++ b/bin/k8s-upgrade/1.27/upgrade-kafka-operator.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +set -e + +export PATH="${HOME}/.local/bin:${PATH}" + +readonly RELEASE_NAME="kafka-operator" + +readonly CHART_REPO="core-charts" +readonly CHART_NAME="strimzi-kafka-operator" +readonly CHART_VERSION="0.37.0" # kodjin v3.8.3 / deps v2.7.4 +#readonly CHART_VERSION="0.39.0" # kodjin v4.1.0+ / deps v2.10.0+ + +echo "Checking whether ${RELEASE_NAME} release installed..." +if [[ "$(rmk --log-level error release -- -l "app=${RELEASE_NAME}" --log-level error list --output json | yq '.[0].installed')" != "true" ]]; then + echo "Skipped." + exit +fi + +echo "Upgrading CRDs for chart ${CHART_NAME} to version ${CHART_VERSION}..." +"$(dirname "${BASH_SOURCE}")/../../upgrade-crds.sh" "${CHART_REPO}/${CHART_NAME}" "${CHART_VERSION}" diff --git a/bin/k8s-upgrade/1.27/upgrade-linkerd-planes.sh b/bin/k8s-upgrade/1.27/upgrade-linkerd-planes.sh new file mode 100755 index 0000000..21430f9 --- /dev/null +++ b/bin/k8s-upgrade/1.27/upgrade-linkerd-planes.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash + +set -e + +export PATH="${HOME}/.local/bin:${PATH}" + +readonly ELASTIC_NAMESPACE="elastic" +readonly ELASTIC_RELEASE_NAME="elastic" + +readonly LINKERD_RELEASE_NAME="linkerd" +readonly LINKERD_STACK_NAME="linkerd" + +readonly KAFKA_NAMESPACE="kafka" +readonly KAFKA_RELEASE_NAME="kafka" + +readonly POSTGRES_NAMESPACE="postgres" +readonly POSTGRES_RELEASE_NAME="postgres" + +readonly REDIS_NAMESPACE="redis" +readonly REDIS_RELEASE_NAME="redis" + +readonly ENVIRONMENT="$(rmk -ll error config view | yq '.environment')" + +echo "Checking whether old or new ${LINKERD_RELEASE_NAME} releases installed..." +if [[ "$(rmk --log-level error release -- -l "app=${LINKERD_RELEASE_NAME}" -l "app=${LINKERD_RELEASE_NAME}-control-plane" --log-level error list --output json | yq '[.[] | select(.installed == true)] | length > 0')" != "true" ]]; then + echo "Skipped." + echo + echo "Synchronizing all releases..." + rmk release sync + exit +fi + +ALL_SCOPES_COUNT="$(find etc -depth 1 | wc -l)" +((ALL_SCOPES_COUNT--)) # do not count "cluster" directory +readonly DISABLED_SCOPES_COUNT="$(rmk --log-level error release -- -l app=linkerd --log-level error build | yq '.renderedvalues.configs.linkerd | select(.await == false and .inject == "disabled")' | grep await | wc -l)" + +echo +echo "Validating changes to configs.linkerd in the global files..." +if [[ "${ALL_SCOPES_COUNT}" -gt "${DISABLED_SCOPES_COUNT}" ]]; then + >&2 echo "ERROR: All scopes must have the etc//${ENVIRONMENT}/globals.yaml.gotmpl file with linkerd temporary disabled:" + >&2 echo "configs:" + >&2 echo " linkerd:" + >&2 echo " await: false" + >&2 echo " inject: disabled" + >&2 echo "Do not commit the changes, they will be auto-reverted later. Update the global files and retry." + exit 1 +fi +echo "OK." + +echo +echo "Destroying old ${LINKERD_RELEASE_NAME} releases before disabling await/injection in other services..." +rmk release -- -l "app=${LINKERD_RELEASE_NAME}" -l "app=${LINKERD_RELEASE_NAME}-multicluster" -l "app=service-mirror-watcher" destroy + +if [[ "$(rmk --log-level error release -- -l "app=${ELASTIC_RELEASE_NAME}" --log-level error list --output json | yq '.[0].installed')" == "true" ]]; then + echo + echo "Forcing rolling update of ${ELASTIC_RELEASE_NAME}..." + SKIP_ELASTIC_POSTSYNC_HOOK=true rmk release -- -l "app=${ELASTIC_RELEASE_NAME}-operator" -l "app=${ELASTIC_RELEASE_NAME}" sync + kubectl -n "${ELASTIC_NAMESPACE}" delete pod -l "elasticsearch.k8s.elastic.co/cluster-name=${ELASTIC_RELEASE_NAME}" + "$(dirname "${BASH_SOURCE}")/../../elastic-postsync-hook.sh" "${ELASTIC_RELEASE_NAME}" "${ELASTIC_NAMESPACE}" +fi + +if [[ "$(rmk --log-level error release -- -l "app=${KAFKA_RELEASE_NAME}" --log-level error list --output json | yq '.[0].installed')" == "true" ]]; then + echo + echo "Forcing rolling update of ${KAFKA_RELEASE_NAME} (needed because of an unsupported update from 2.8.X to 3.5.X)..." + SKIP_KAFKA_POSTSYNC_HOOK=true rmk release -- -l "app=${KAFKA_RELEASE_NAME}-operator" -l "app=${KAFKA_RELEASE_NAME}" sync + kubectl -n "${KAFKA_NAMESPACE}" delete pod -l "app.kubernetes.io/instance=${KAFKA_RELEASE_NAME},strimzi.io/name=${KAFKA_RELEASE_NAME}-kafka" + "$(dirname "${BASH_SOURCE}")/../../kafka-postsync-hook.sh" "${KAFKA_RELEASE_NAME}" "${KAFKA_NAMESPACE}" +fi + +if [[ "$(rmk --log-level error release -- -l "app=${POSTGRES_RELEASE_NAME}" --log-level error list --output json | yq '.[0].installed')" == "true" ]]; then + echo + echo "Forcing rolling update of ${POSTGRES_RELEASE_NAME}..." + SKIP_POSTGRES_POSTSYNC_HOOK=true rmk release -- -l "app=${POSTGRES_RELEASE_NAME}-operator" -l "app=${POSTGRES_RELEASE_NAME}" sync + kubectl -n "${POSTGRES_NAMESPACE}" delete pod -l "application=spilo,cluster-name=${POSTGRES_RELEASE_NAME}-cluster,linkerd.io/control-plane-ns" + # store current number of replicas of the connection pooler to rollback to the value after all upgrades" + POSTGRES_CLUSTER_POOLER_REPLICAS="$(kubectl -n "${POSTGRES_NAMESPACE}" get postgresql "${POSTGRES_RELEASE_NAME}-cluster" -o yaml | yq '.spec.connectionPooler.numberOfInstances')" + kubectl -n "${POSTGRES_NAMESPACE}" scale deployment "${POSTGRES_RELEASE_NAME}-cluster-pooler" --replicas=0 + kubectl -n "${POSTGRES_NAMESPACE}" scale deployment "${POSTGRES_RELEASE_NAME}-cluster-pooler" --replicas="${POSTGRES_CLUSTER_POOLER_REPLICAS}" + kubectl -n "${POSTGRES_NAMESPACE}" rollout status deployment "${POSTGRES_RELEASE_NAME}-cluster-pooler" + "$(dirname "${BASH_SOURCE}")/../../postgres-postsync-hook.sh" +fi + +if [[ "$(rmk --log-level error release -- -l "app=${REDIS_RELEASE_NAME}" --log-level error list --output json | yq '.[0].installed')" == "true" ]]; then + echo + echo "Forcing rolling update of ${REDIS_RELEASE_NAME}..." + kubectl -n "${REDIS_NAMESPACE}" delete pod -l "app.kubernetes.io/instance=${REDIS_RELEASE_NAME},app.kubernetes.io/component=replica,linkerd.io/control-plane-ns" + kubectl -n "${REDIS_NAMESPACE}" rollout status statefulset "${REDIS_RELEASE_NAME}-replicas" +fi + +echo +echo "Synchronizing all releases except new ${LINKERD_STACK_NAME} stack..." +rmk release -- -l "stack!=${LINKERD_STACK_NAME}" sync + +echo +echo "Resetting all changes to the global files..." +git restore etc/*/*/globals.yaml.gotmpl +git status + +echo +echo "Synchronizing new ${LINKERD_STACK_NAME} stack..." +rmk release -- -l "stack=${LINKERD_STACK_NAME}" sync + +if [[ "$(rmk --log-level error release -- -l "app=${ELASTIC_RELEASE_NAME}" --log-level error list --output json | yq '.[0].installed')" == "true" ]]; then + echo + echo "Synchronizing ${ELASTIC_RELEASE_NAME}..." + SKIP_ELASTIC_POSTSYNC_HOOK=true rmk release -- -l "app=${ELASTIC_RELEASE_NAME}-operator" -l "app=${ELASTIC_RELEASE_NAME}" sync + kubectl -n "${ELASTIC_NAMESPACE}" delete pod -l "elasticsearch.k8s.elastic.co/cluster-name=${ELASTIC_RELEASE_NAME}" + "$(dirname "${BASH_SOURCE}")/../../elastic-postsync-hook.sh" "${ELASTIC_RELEASE_NAME}" "${ELASTIC_NAMESPACE}" +fi + +if [[ "$(rmk --log-level error release -- -l "app=${POSTGRES_RELEASE_NAME}" --log-level error list --output json | yq '.[0].installed')" == "true" ]]; then + echo + echo "Synchronizing ${POSTGRES_RELEASE_NAME}..." + SKIP_POSTGRES_POSTSYNC_HOOK=true rmk release -- -l "app=${POSTGRES_RELEASE_NAME}-operator" -l "app=${POSTGRES_RELEASE_NAME}" sync + kubectl -n "${POSTGRES_NAMESPACE}" rollout restart statefulset "${POSTGRES_RELEASE_NAME}-cluster" + # store current number of replicas of the connection pooler to rollback to the value after all upgrades" + POSTGRES_CLUSTER_POOLER_REPLICAS="$(kubectl -n "${POSTGRES_NAMESPACE}" get postgresql "${POSTGRES_RELEASE_NAME}-cluster" -o yaml | yq '.spec.connectionPooler.numberOfInstances')" + kubectl -n "${POSTGRES_NAMESPACE}" scale deployment "${POSTGRES_RELEASE_NAME}-cluster-pooler" --replicas=0 + kubectl -n "${POSTGRES_NAMESPACE}" scale deployment "${POSTGRES_RELEASE_NAME}-cluster-pooler" --replicas="${POSTGRES_CLUSTER_POOLER_REPLICAS}" + kubectl -n "${POSTGRES_NAMESPACE}" rollout status deployment "${POSTGRES_RELEASE_NAME}-cluster-pooler" + "$(dirname "${BASH_SOURCE}")/../../postgres-postsync-hook.sh" +fi + +echo "Checking all the kafka connectors' tasks are running..." +for KAFKA_CONNECTOR in $(kubectl -n "${KAFKA_NAMESPACE}" get kafkaconnector -o yaml | yq '.items[].metadata.name'); do + echo "${KAFKA_CONNECTOR}:" + if [[ "$(kubectl -n "${KAFKA_NAMESPACE}" get kafkaconnector "${KAFKA_CONNECTOR}" -o yaml | yq '.status.connectorStatus.tasks | ([.[] | select(.state == "RUNNING")] | length) == (. | length)')" == "true" ]]; then + echo "OK." + else + echo "Not all the kafka connector's tasks are running. Deleting its pods to force a restart..." + kubectl -n "${KAFKA_NAMESPACE}" delete pod -l "app.kubernetes.io/instance=${KAFKA_CONNECTOR}" + fi +done + +echo +echo "Synchronizing all releases..." +rmk release sync diff --git a/bin/k8s-upgrade/1.27/upgrade-loki-stack.sh b/bin/k8s-upgrade/1.27/upgrade-loki-stack.sh new file mode 100755 index 0000000..3b89f2a --- /dev/null +++ b/bin/k8s-upgrade/1.27/upgrade-loki-stack.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +set -e + +export PATH="${HOME}/.local/bin:${PATH}" + +readonly NAMESPACE="loki" +readonly RELEASE_NAME="loki-stack" +readonly PROMTAIL_NAME="promtail" + +echo "Checking whether ${RELEASE_NAME} release installed..." +if [[ "$(rmk --log-level error release -- -l "app=${RELEASE_NAME}" --log-level error list --output json | yq '.[0].installed')" != "true" ]]; then + echo "Skipped." + exit +fi + +# Error log: +# Error: UPGRADE FAILED: cannot patch "loki-stack-promtail" with kind DaemonSet: DaemonSet.apps "loki-stack-promtail" is invalid: spec.selector: Invalid value: v1.LabelSelector{MatchLabels:map[string]string{"app.kubernetes.io/instance":"loki-stack", "app.kubernetes.io/name":"promtail"}, MatchExpressions:[]v1.LabelSelectorRequirement(nil)}: field is immutable + +# Old selectors: +# selector: +# matchLabels: +# app: promtail +# release: loki-stack + +# New selectors: +# selector: +# matchLabels: +# app.kubernetes.io/instance: loki-stack +# app.kubernetes.io/name: promtail + +readonly PROMTAIL_DS_NAME="$(kubectl -n "${NAMESPACE}" get daemonset -l "app.kubernetes.io/instance=${RELEASE_NAME},app.kubernetes.io/name=${PROMTAIL_NAME}" -o name)" + +if [[ "${PROMTAIL_DS_NAME}" != "" ]]; then + echo "New daemonset ${RELEASE_NAME}-${PROMTAIL_NAME} already exists." + echo "Skipped." + exit +fi + +echo "Deleting old daemonset ${RELEASE_NAME}-${PROMTAIL_NAME} without cascade because of changed immutable selector.matchLabels..." +kubectl -n "${NAMESPACE}" delete daemonset --ignore-not-found=true --wait=true "${RELEASE_NAME}-${PROMTAIL_NAME}" diff --git a/bin/k8s-upgrade/1.27/upgrade-nodes.sh b/bin/k8s-upgrade/1.27/upgrade-nodes.sh new file mode 100755 index 0000000..0876cc4 --- /dev/null +++ b/bin/k8s-upgrade/1.27/upgrade-nodes.sh @@ -0,0 +1,147 @@ +#!/usr/bin/env bash + +set -e + +# optional argument +# e.g. postgres|minio +# find all possible node group names in etc/**/worker-groups.auto.tfvars of a tenant repository +NODE_GROUP_NAME="${1}" + +export PATH="${HOME}/.local/bin:${PATH}" + +# disable client-side pager +export AWS_PAGER= +export AWS_PROFILE="$(rmk --log-level error config view | yq '.aws.profile')" +export AWS_CONFIG_FILE="${HOME}/.aws/config_${AWS_PROFILE}" +export AWS_SHARED_CREDENTIALS_FILE="${HOME}/.aws/credentials_${AWS_PROFILE}" + +readonly NAME="$(rmk --log-level error config view | yq '.name')" +CLUSTER_NAME="$(rmk --log-level error config view | yq '.exported-vars.env.CLUSTER_NAME')" +if [[ "${CLUSTER_NAME}" == "null" ]]; then + CLUSTER_NAME="${NAME}-eks" +fi + +NODE_GROUP_FILTER="" +if [[ -n "${NODE_GROUP_NAME}" ]]; then + NODE_GROUP_FILTER="Name=tag-value,Values=${CLUSTER_NAME}-${NODE_GROUP_NAME}-eks_asg" +fi + +ASG_TAGS=($(aws autoscaling describe-auto-scaling-groups \ + --filters "Name=tag-key,Values=kubernetes.io/cluster/${CLUSTER_NAME}" ${NODE_GROUP_FILTER} \ + --output yaml | yq '.AutoScalingGroups[].Tags[] | select(.Key == "Name") | .Value')) +ASG_NAMES=() + +if [[ ${#ASG_TAGS[@]} -eq 0 ]]; then + >&2 echo "ERROR: No autoscaling group found." + exit 1 +fi + +echo "Rolling-updating nodes..." + +for ASG_TAG in ${ASG_TAGS[@]}; do + ASG_NAME="$(aws autoscaling describe-auto-scaling-groups \ + --filters "Name=tag-value,Values=${ASG_TAG}" \ + --query 'AutoScalingGroups[0].AutoScalingGroupName' \ + --output text + )" + ASG_NAMES+=("${ASG_NAME}") + # nodes with STS/PVC/PV need up to 10 minutes or more to warm up/check health and mount devices + ASG_UPDATE_TIMEOUT_SECONDS=600 + + # remove prefix and suffix from ASG tag to get node group name + NODE_GROUP_NAME="${ASG_TAG#${CLUSTER_NAME}-}" + NODE_GROUP_NAME="${NODE_GROUP_NAME%-eks_asg}" + IS_NODE_GROUP_STATEFUL="true" + PVC_LABELS=""; + case "${NODE_GROUP_NAME}" in + "clickhouse") PVC_LABELS="clickhouse.altinity.com/chi=clickhouse" ;; + "elt-postgres") PVC_LABELS="cluster-name=elt-postgres-cluster" ;; + "es") PVC_LABELS="elasticsearch.k8s.elastic.co/cluster-name=elastic" ;; + "es-jaeger") PVC_LABELS="elasticsearch.k8s.elastic.co/cluster-name=elastic-jaeger" ;; + "fhir-postgres") PVC_LABELS="cluster-name=fhir-postgres-cluster" ;; + "kafka") PVC_LABELS="app.kubernetes.io/instance=kafka" ;; + "loki-stack") PVC_LABELS="release=loki-stack" ;; + "minio") PVC_LABELS="release=minio" ;; + "mongodb") PVC_LABELS="app.kubernetes.io/instance=mongodb" ;; + "postgres") PVC_LABELS="cluster-name=postgres-cluster" ;; + "redis") PVC_LABELS="app.kubernetes.io/instance=redis" ;; + *) IS_NODE_GROUP_STATEFUL="false"; ASG_UPDATE_TIMEOUT_SECONDS=60 ;; + esac + + echo + echo "Node group name: ${NODE_GROUP_NAME}" + echo "Stateful: ${IS_NODE_GROUP_STATEFUL}" + echo "ASG tag: ${ASG_TAG}" + echo "ASG name: ${ASG_NAME}" + echo "ASG update timeout: ${ASG_UPDATE_TIMEOUT_SECONDS}s" + + if [[ "${IS_NODE_GROUP_STATEFUL}" == "true" && "${PVC_LABELS}" != "" ]]; then + echo "PVC labels: ${PVC_LABELS}" + + PV_NAMES="$(kubectl get pvc --all-namespaces -l "${PVC_LABELS}" -o yaml | yq '.items[].spec.volumeName')" + echo "PV names: ${PV_NAMES}" + + # adding pv-dummy to return list of items even for cases when we have only 1 PV found + ASG_AZS="$(kubectl get pv pv-dummy ${PV_NAMES} --ignore-not-found -o yaml | yq '.items[].spec.nodeAffinity.required.nodeSelectorTerms[0].matchExpressions[0].values[0]' | sort | uniq)" + echo "ASG availability zones: ${ASG_AZS}" + + ASG_SUBNETS="" + for ASG_AZ in ${ASG_AZS}; do + echo "Getting private subnet for ${ASG_AZ}..." + ASG_SUBNET="$(aws ec2 describe-subnets --filters "Name=tag-value,Values=${NAME}-vpc-private-${ASG_AZ}" --output yaml | yq '.Subnets[0].SubnetId')" + echo "Subnet ID: ${ASG_SUBNET}" + ASG_SUBNETS="${ASG_SUBNETS} ${ASG_SUBNET}" + done + echo "ASG subnets: ${ASG_SUBNETS}" + + aws autoscaling update-auto-scaling-group --auto-scaling-group-name "${ASG_NAME}" \ + --availability-zones ${ASG_AZS} \ + --vpc-zone-identifier "${ASG_SUBNETS// /,}" \ + --default-cooldown ${ASG_UPDATE_TIMEOUT_SECONDS} \ + --default-instance-warmup ${ASG_UPDATE_TIMEOUT_SECONDS} \ + --health-check-grace-period ${ASG_UPDATE_TIMEOUT_SECONDS} || true + else + echo "No ASG AZ update needed for stateless node group." + fi + + # rolling-update node group OR skip in case it is being updated already + echo "Starting instance refresh..." + aws autoscaling start-instance-refresh --auto-scaling-group-name "${ASG_NAME}" || true +done + +echo +echo "Checking instance refresh status.." +while true; do + IN_PROGRESS_ASG_COUNT="${#ASG_NAMES[@]}" + for ASG_NAME in ${ASG_NAMES[@]}; do + ASG_INSTANCE_REFRESH="$(aws autoscaling describe-instance-refreshes \ + --auto-scaling-group-name "${ASG_NAME}" \ + --max-records 1 \ + --output yaml | yq '.InstanceRefreshes[0] | select(.Status != "Successful") | .AutoScalingGroupName')" + if [[ -n "${ASG_INSTANCE_REFRESH}" && "${ASG_INSTANCE_REFRESH}" != "null" ]]; then + echo "ASG ${ASG_NAME} in progress..." + else + ((IN_PROGRESS_ASG_COUNT--)) + fi + done + + if [[ "${IN_PROGRESS_ASG_COUNT}" -gt 0 ]]; then + sleep 10 + else + break + fi +done +echo "Done." + +echo +echo "Fixing pods with a missing linkerd sidecar after the instance refresh..." +PODS_WITH_MISSING_LINKERD_SIDECAR="$(kubectl get pods --all-namespaces -l "!linkerd.io/control-plane-ns" -o yaml | yq '.items[].metadata | select(.annotations["linkerd.io/inject"] == "enabled") | (.namespace + " " + .name)')" +# iterate over lines ignoring spaces +while IFS= read -r NAMESPACE_WITH_POD; do + if [[ -z "${NAMESPACE_WITH_POD}" ]]; then + # no pods found + break + fi + kubectl delete pod --wait=true -n ${NAMESPACE_WITH_POD} +done <<< "${PODS_WITH_MISSING_LINKERD_SIDECAR}" +echo "Done." diff --git a/bin/k8s-upgrade/1.27/upgrade-postgres-operator.sh b/bin/k8s-upgrade/1.27/upgrade-postgres-operator.sh new file mode 100755 index 0000000..6ef63b6 --- /dev/null +++ b/bin/k8s-upgrade/1.27/upgrade-postgres-operator.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +set -e + +export PATH="${HOME}/.local/bin:${PATH}" + +readonly RELEASE_NAME="postgres-operator" + +readonly CHART_NAME="postgres-operator" +readonly CHART_VERSION="v1.10.1" + +readonly CRDS=("operatorconfigurations" "postgresqls" "postgresteams") + +echo "Checking whether ${RELEASE_NAME} release installed..." +if [[ "$(rmk --log-level error release -- -l "app=${RELEASE_NAME}" --log-level error list --output json | yq '.[0].installed')" != "true" ]]; then + echo "Skipped." + exit +fi + +echo "Upgrading CRDs for chart ${CHART_NAME} to version ${CHART_VERSION}..." + +for CRD in "${CRDS[@]}"; do + kubectl apply --wait=true -f "https://raw.githubusercontent.com/zalando/${CHART_NAME}/${CHART_VERSION}/charts/${CHART_NAME}/crds/${CRD}.yaml" +done diff --git a/bin/k8s-upgrade/1.27/upgrade-releases.sh b/bin/k8s-upgrade/1.27/upgrade-releases.sh new file mode 100755 index 0000000..f05ad0e --- /dev/null +++ b/bin/k8s-upgrade/1.27/upgrade-releases.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +set -e + +# Upgrading kafka operator explicitly for old versions of deps/hooks. +# New versions of deps/hooks should use the "upgrade-crds.sh" hook for upgrading chart CRDs automatically, +# e.g. for operators like kafka-operator +echo +"$(dirname "${BASH_SOURCE}")/upgrade-kafka-operator.sh" + +echo +"$(dirname "${BASH_SOURCE}")/upgrade-postgres-operator.sh" + +echo +"$(dirname "${BASH_SOURCE}")/upgrade-loki-stack.sh" + +echo +"$(dirname "${BASH_SOURCE}")/upgrade-linkerd-planes.sh" diff --git a/bin/kafka-config-postsync-hook.sh b/bin/kafka-config-postsync-hook.sh new file mode 100755 index 0000000..94489bb --- /dev/null +++ b/bin/kafka-config-postsync-hook.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +set -e + +RELEASE_NAME="${1}" +NAMESPACE="${2:-kafka}" + +LIMIT=180 + +# for kafkaconnector also check connector and tasks' states in .status.connectorStatus +# for kafkamirrormaker2 also check connectors' and tasks' states in .status.connectors +GO_TEMPLATE=' + {{- range .items }} + {{- if not .status }}0{{- end }} + {{- range .status.conditions }} + {{- if ne .type "Ready" }}0{{- end }} + {{- if ne .status "True" }}0{{- end }} + {{- end }} + {{- with .status.connectorStatus }} + {{- if ne .connector.state "RUNNING" }}0{{- end }} + {{- range .tasks }} + {{- if ne .state "RUNNING" }}0{{- end }} + {{- end }} + {{- end }} + {{- with .status.connectors }} + {{- range . }} + {{- if ne .connector.state "RUNNING" }}0{{- end }} + {{- range .tasks }} + {{- if ne .state "RUNNING" }}0{{- end }} + {{- end }} + {{- end }} + {{- end }} + {{- end -}} +' + +COUNT=1 +K8S_RESOURCES="kafkaconnect,kafkaconnector,kafkamirrormaker2,kafkatopic" +while true; do + STATUS="$(kubectl -n "${NAMESPACE}" get "${K8S_RESOURCES}" -l "app.kubernetes.io/instance=${RELEASE_NAME}" -o "go-template=${GO_TEMPLATE}")" + if [[ "${STATUS}" != "" && "${COUNT}" -le "${LIMIT}" ]]; then + sleep 1 + ((++COUNT)) + elif [[ "${COUNT}" -gt "${LIMIT}" ]]; then + >2& echo "Limit exceeded." + exit 1 + else + echo + kubectl -n "${NAMESPACE}" get "${K8S_RESOURCES}" -l "app.kubernetes.io/instance=${RELEASE_NAME}" + break + fi +done diff --git a/bin/kafka-postsync-hook.sh b/bin/kafka-postsync-hook.sh new file mode 100755 index 0000000..04f081f --- /dev/null +++ b/bin/kafka-postsync-hook.sh @@ -0,0 +1,94 @@ +#!/usr/bin/env bash + +set -e + +if [[ "${SKIP_KAFKA_POSTSYNC_HOOK}" == "true" ]]; then + echo "Skipped." + exit 0 +fi + +RELEASE_NAME="${1:-kafka}" +NAMESPACE="${2:-kafka}" + +LIMIT=180 +# higher sleep is needed to wait till the operator starts updating the resources +SLEEP=5 + +# kafka status conditions might also be of type=Warning && status=True which is acceptable as well, e.g.: +# status: +# clusterId: MHiqPXwfSHGO1y3C11-lbw +# conditions: +# - lastTransitionTime: "2022-04-29T12:47:24.824258Z" +# message: The desired Kafka storage configuration contains changes which are not +# allowed. As a result, all storage changes will be ignored. Use DEBUG level logging +# for more information about the detected changes. +# reason: KafkaStorage +# status: "True" +# type: Warning +# - lastTransitionTime: "2022-04-29T12:47:29.478Z" +# status: "True" +# type: Ready +# +# another kafka status during upgrade to higher broker/operator versions: +# status: +# clusterId: pduk-sk9SziVmwABVWpnRQ +# conditions: +# - lastTransitionTime: "2023-09-29T10:14:22.526074050Z" +# message: An error while trying to determine the possibility of updating Kafka +# pods +# reason: ForceableProblem +# status: "True" +# type: NotReady +# observedGeneration: 2 +# +# strimzipodset status example: +# status: +# currentPods: 3 +# observedGeneration: 1 +# pods: 3 +# readyPods: 3 +# +# kafkanodepool is not checked for "replicas" +GO_TEMPLATE=' + {{- range .items }} + {{- if not .status }}0{{- end }} + {{- with .status.conditions }} + {{- range . }} + {{- if eq .type "NotReady" }}0{{- end }} + {{- if ne .status "True" }}0{{- end }} + {{- end }} + {{- end }} + {{- if ne .kind "KafkaNodePool" }} + {{- if .status.replicas }} + {{- if not .status.updatedReplicas }}0{{ else if gt .status.replicas .status.updatedReplicas}}0{{- end }} + {{- if not .status.readyReplicas }}0{{ else if ne .status.replicas .status.readyReplicas }}0{{- end }} + {{- end }} + {{- end }} + {{- if .status.pods }} + {{- if not .status.currentPods }}0{{ else if gt .status.pods .status.currentPods}}0{{- end }} + {{- if not .status.readyPods }}0{{ else if ne .status.pods .status.readyPods }}0{{- end }} + {{- end }} + {{- end -}} +' +# initial sleep for the operator +sleep ${SLEEP} + +COUNT=1 +K8S_RESOURCES="deployment,kafka,kafkanodepool,statefulset,strimzipodset" +while true; do + STATUS=$(kubectl -n "${NAMESPACE}" get "${K8S_RESOURCES}" -l "app.kubernetes.io/instance=${RELEASE_NAME}" -o "go-template=${GO_TEMPLATE}") + if [[ "${STATUS}" != "" && "${COUNT}" -le "${LIMIT}" ]]; then + sleep ${SLEEP} + ((++COUNT)) + elif [[ "${COUNT}" -gt "${LIMIT}" ]]; then + >2& echo "Limit exceeded." + exit 1 + else + echo + kubectl -n "${NAMESPACE}" get "${K8S_RESOURCES}" -l "app.kubernetes.io/instance=${RELEASE_NAME}" + break + fi +done + +# Note: KRaft is alpha in Strimzi 0.39.0 and not recommended to run in production. +# Old ZooKeeper nodes should be removed manually after a successful migration or the hook should handle this automatically. diff --git a/bin/keycloak-postsync-hook.sh b/bin/keycloak-postsync-hook.sh new file mode 100755 index 0000000..4ebd1ef --- /dev/null +++ b/bin/keycloak-postsync-hook.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash + +set -e + +RELEASE_NAME="${1}" +NAMESPACE="${2:-keycloak}" +STATUS_TYPE="${3:-Ready}" +KEYCLOAK_RESOURCE="${4:-keycloak}" + +LIMIT=180 + +[[ "${STATUS_TYPE}" == "Done" ]] && KEYCLOAK_RESOURCE="keycloakrealmimport" + +# for keycloak cluster +GO_TEMPLATE=' + {{- range .items }} + {{- if not .status }}0{{- end }} + {{- if not .status.conditions}}0{{- end }} + {{- with .status.conditions }} + {{- if ne (index . 0).status "True" }}0{{- end }} + {{- if ne (index . 0).type "'"${STATUS_TYPE}"'" }}0{{- end }} + {{- end }} + {{- end -}} +' + +function cmd() { + kubectl -n "${NAMESPACE}" get "${KEYCLOAK_RESOURCE}" -l "app.kubernetes.io/instance=${RELEASE_NAME}" "${@}" +} + +COUNT=1 +while true; do + STATUS="$(cmd -o "go-template=${GO_TEMPLATE}")" + if [[ "${STATUS}" != "" && "${COUNT}" -le "${LIMIT}" ]]; then + sleep 1 + ((++COUNT)) + elif [[ "${COUNT}" -gt "${LIMIT}" ]]; then + >2& echo "Limit exceeded." + exit 1 + else + echo + cmd + break + fi +done diff --git a/bin/label-k8s-resource.sh b/bin/label-k8s-resource.sh new file mode 100755 index 0000000..c2bc62b --- /dev/null +++ b/bin/label-k8s-resource.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +set -e + +CURRENT_ENVIRONMENT="${1}" +EXPECTED_ENVIRONMENT="${2}" +K8S_NAMESPACE="${3}" +K8S_RESOURCE_TYPE="${4}" +K8S_RESOURCE_NAME="${5}" +# get rest of arguments +K8S_LABELS=("${@}") +K8S_LABELS="${K8S_LABELS[@]:5}" + +if [[ "${CURRENT_ENVIRONMENT}" != "${EXPECTED_ENVIRONMENT}" ]]; then + echo "Environment ${CURRENT_ENVIRONMENT} skipped when labeling, expected: ${EXPECTED_ENVIRONMENT}" + exit +fi + +kubectl -n "${K8S_NAMESPACE}" label --overwrite "${K8S_RESOURCE_TYPE}" "${K8S_RESOURCE_NAME}" ${K8S_LABELS} diff --git a/bin/postgres-postsync-hook.sh b/bin/postgres-postsync-hook.sh new file mode 100755 index 0000000..cee7ebe --- /dev/null +++ b/bin/postgres-postsync-hook.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash + +set -e + +if [[ "${SKIP_POSTGRES_POSTSYNC_HOOK}" == "true" ]]; then + echo "Skipped." + exit 0 +fi + +CLUSTER_NAME="${1:-postgres-cluster}" +NAMESPACE="${2:-postgres}" +CRD_NAME="${3:-postgresql}" + +readonly LIMIT=600 +COUNT=1 + +function disable_pooler_metrics_scraping() { + local POOLER_NAME="${1}" + + echo + if ! (kubectl -n "${NAMESPACE}" get deployment "${POOLER_NAME}" &> /dev/null); then + echo "Pooler ${POOLER_NAME} not enabled." + echo "Skipped." + return + fi + + local POOLER_YAML="$(kubectl -n "${NAMESPACE}" get deployment "${POOLER_NAME}" -o yaml)" + local POOLER_MINIMAL_REPLICAS=1 + local POOLER_CURRENT_REPLICAS="$(echo "${POOLER_YAML}" | yq '.spec.replicas')" + + if [[ "$(echo "${POOLER_YAML}" | yq '.spec.template.metadata.annotations["prometheus.io/scrape"]')" == "true" ]]; then + echo "Scaling ${POOLER_NAME} replicas to ${POOLER_MINIMAL_REPLICAS} to avoid pending pods during rolling update..." + kubectl -n "${NAMESPACE}" scale deployment "${POOLER_NAME}" --replicas="${POOLER_MINIMAL_REPLICAS}" + kubectl -n "${NAMESPACE}" rollout status deployment "${POOLER_NAME}" + + echo "Disabling ${POOLER_NAME} metrics scraping..." + kubectl -n "${NAMESPACE}" patch deployment "${POOLER_NAME}" --type='merge' \ + -p '{"spec": {"template": {"metadata": {"annotations": {"prometheus.io/scrape": "false"}}}}}' + kubectl -n "${NAMESPACE}" rollout status deployment "${POOLER_NAME}" + + echo "Scaling ${POOLER_NAME} replicas back to ${POOLER_CURRENT_REPLICAS}..." + kubectl -n "${NAMESPACE}" scale deployment "${POOLER_NAME}" --replicas="${POOLER_CURRENT_REPLICAS}" + kubectl -n "${NAMESPACE}" rollout status deployment "${POOLER_NAME}" + else + echo "Metrics scraping for ${POOLER_NAME} not enabled or already disabled." + echo "Skipped." + fi +} + +while true; do + STATUS=$(kubectl -n "${NAMESPACE}" get "${CRD_NAME}" "${CLUSTER_NAME}" -o yaml | yq '.status.PostgresClusterStatus') + if [[ "${STATUS}" != "Running" && "${COUNT}" -le "${LIMIT}" ]]; then + sleep 1 + ((++COUNT)) + elif [[ "${COUNT}" -gt "${LIMIT}" ]]; then + >2& echo "Limit exceeded." + exit 1 + else + kubectl -n "${NAMESPACE}" get "${CRD_NAME}" "${CLUSTER_NAME}" + break + fi +done + +disable_pooler_metrics_scraping "${CLUSTER_NAME}-pooler" +disable_pooler_metrics_scraping "${CLUSTER_NAME}-pooler-repl" diff --git a/bin/restart-airbyte-worker.sh b/bin/restart-airbyte-worker.sh new file mode 100755 index 0000000..cb6fdee --- /dev/null +++ b/bin/restart-airbyte-worker.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash + +# This hook restarts the airbyte-worker pods if they were not restarted +# during the latest airbyte's release sync. This helps to resolve an issue +# when the airbyte-worker uses a revoked airbyte-admin ServiceAccount (https://github.com/airbytehq/airbyte/issues/7211). +# This happens because the airbyte's charts are implemented in such a way that +# the airbyte-admin ServiceAccount is recreated each time even if there were no +# changes in any of the airbyte components. As a result we have the airbyte-worker pod +# using the old ServiceAccount that has been already revoked. This leads to crashes +# during the airbyte's operations when it tries to create new pods. After a restart +# the airbyte-worker pod begins using the actual ServiceAccount. +# The hook is intended to be used on the postsync event of the airbyte's release. + +set -e + +function get_os() { + UNAME_OUT="$(uname -s)" + case "${UNAME_OUT}" in + Linux*) MACHINE="Linux";; + Darwin*) MACHINE="Mac";; + *) MACHINE="UNKNOWN:${UNAME_OUT}" + esac + echo "${MACHINE}" +} + +OS="$(get_os)" +NAMESPACE="${1:-airbyte}" +RELEASE_NAME="${2:-airbyte}" +# default: 1min +ALLOWED_TIME_SEC="${3:-60}" +HAS_ROLLOUT="true" + +if [[ -z "${ALLOWED_TIME_SEC}" || "${ALLOWED_TIME_SEC}" == "0" ]] ; then + echo "ALLOWED_TIME_SEC must be more than 0"; +fi + +# GET SELECTOR OF PODS +SELECTORS="$(kubectl get deployment -n "${NAMESPACE}" "${RELEASE_NAME}-worker" --output="json" | jq -j '.spec.selector.matchLabels | to_entries | .[] | "\(.key)=\(.value),"')" +SELECTORS="$(echo "${SELECTORS}" | sed 's/,*$//g')" # TRIM SYMBOLS + +PODS_AIRBYTE_WORKERS="$(kubectl get pods -n "${NAMESPACE}" -o jsonpath="{.items[*].metadata.name}" --selector="${SELECTORS}" --field-selector="status.phase=Running")" + +# HAS RUN PROCESSING ROLLOUT +POD_CREATION_TIMESTAMP="" +for POD_NAME in ${PODS_AIRBYTE_WORKERS}; do + POD_EVENTS="$(kubectl get events -n "${NAMESPACE}" --field-selector="involvedObject.kind=Pod,involvedObject.name=${POD_NAME},type=Warning,reason=Failed" --chunk-size=1 -o jsonpath="{.items[*].reason}")" + if [[ ! -z "${POD_EVENTS}" ]]; then + HAS_ROLLOUT="false" + break + fi + + CURRENT_POD_CREATION_TIMESTAMP="$(kubectl get pods -n "${NAMESPACE}" "${POD_NAME}" -o jsonpath="{.metadata.creationTimestamp}")" + if [[ "${CURRENT_POD_CREATION_TIMESTAMP}" > "${POD_CREATION_TIMESTAMP}" ]]; then + POD_CREATION_TIMESTAMP="${CURRENT_POD_CREATION_TIMESTAMP}" + fi +done + +if [[ "${HAS_ROLLOUT}" == "true" ]] && [[ ! -z "${POD_CREATION_TIMESTAMP}" ]]; then + SA_CREATION_TIMESTAMP="$(kubectl get sa -n "${NAMESPACE}" "${RELEASE_NAME}-admin" -o jsonpath="{.metadata.creationTimestamp}")" + + if [[ "${OS}" == "Linux" ]]; then + POD_DATE="$(date -d "$(echo ${POD_CREATION_TIMESTAMP} | sed 's/T/ /; s/Z//')" "+%s")" + SA_DATE="$(date -d "$(echo ${SA_CREATION_TIMESTAMP} | sed 's/T/ /; s/Z//')" "+%s")" + elif [[ "${OS}" == "Mac" ]]; then + POD_DATE="$(date -jf "%Y-%m-%dT%H:%M:%SZ" "${POD_CREATION_TIMESTAMP}" "+%s")" + SA_DATE="$(date -jf "%Y-%m-%dT%H:%M:%SZ" "${SA_CREATION_TIMESTAMP}" "+%s")" + else + echo "Not supported OS ${OS}. Supported: (Mac|Linux)" + exit 1 + fi + + DIFF_SEC="$((SA_DATE - POD_DATE))" + echo "Diff of timestamps: ${DIFF_SEC} seconds" + + HAS_ROLLING="$((DIFF_SEC > ALLOWED_TIME_SEC))" + if [[ -z "${HAS_ROLLING}" || "${HAS_ROLLING}" == "1" ]]; then + echo "Forcing rolling update of ${RELEASE_NAME} resources ${RELEASE_NAME}-worker..." + kubectl -n "${NAMESPACE}" rollout restart deployment "${RELEASE_NAME}-worker" + kubectl -n "${NAMESPACE}" rollout status deployment "${RELEASE_NAME}-worker" + fi +fi diff --git a/bin/restore-volume-snapshot/inventory.yaml b/bin/restore-volume-snapshot/inventory.yaml new file mode 100644 index 0000000..4ff6da1 --- /dev/null +++ b/bin/restore-volume-snapshot/inventory.yaml @@ -0,0 +1,58 @@ +releases: + clickhouse: + cluster: + name: chi-clickhouse-cluster-0-0 + replicas: 1 + resourceType: statefulset + namespace: clickhouse + operator: + name: clickhouse-operator + replicas: 1 + resourceType: deployment + namespace: clickhouse + minio: + cluster: + name: minio + replicas: 4 + resourceType: statefulset + namespace: minio + mongodb: + cluster: + name: mongodb + replicas: 3 + resourceType: statefulset + namespace: mongodb + postgres: + cluster: + name: postgres-cluster + replicas: 2 + resourceType: statefulset + namespace: postgres + claimSelector: + matchLabels: + cluster-name: postgres-cluster + operator: + name: postgres-operator + replicas: 1 + resourceType: deployment + namespace: postgres + elt-postgres: + cluster: + name: elt-postgres-cluster + replicas: 2 + resourceType: statefulset + namespace: postgres + claimSelector: + matchLabels: + cluster-name: elt-postgres-cluster + operator: + name: postgres-operator + replicas: 1 + resourceType: deployment + namespace: postgres + zookeeper: + cluster: + name: zookeeper + replicas: 3 + resourceType: statefulset + namespace: zookeeper diff --git a/bin/restore-volume-snapshot/restore-volume-snapshot.sh b/bin/restore-volume-snapshot/restore-volume-snapshot.sh new file mode 100755 index 0000000..0784287 --- /dev/null +++ b/bin/restore-volume-snapshot/restore-volume-snapshot.sh @@ -0,0 +1,320 @@ +#!/usr/bin/env bash + +### RESTORE VOLUME SNAPSHOT script ### +# Requirements: +# - yq >= 4.28.* +# - Initialized tenant repo via rmk. +# - Previously installed and running snapshot scheduler for the required release. + +set -e + +readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +readonly WORK_DIR="${SCRIPT_DIR}" +readonly PVC_DATA_FILE="${WORK_DIR}/pvc-data.yaml" +readonly PVC_PREPARE_FILE="${WORK_DIR}/pvc-prepare.yaml" +readonly PVC_RESTORE_FILE="${WORK_DIR}/pvc-restore.yaml" +readonly INVENTORY_FILE="${WORK_DIR}/inventory.yaml" +readonly RELEASE_NAME="${2}" +readonly SNAPSHOT_DATE="${3}" +readonly OLD_IFS="${IFS}" + +function clear_work_dir() { + rm -rf "${WORK_DIR}"/pvc-*.yaml +} + +function create_work_dir() { + mkdir -p "${WORK_DIR}" +} + +### +# Handling exceptions. +### +function check_release_name() { + if [[ -z "${RELEASE_NAME}" ]]; then + >&2 echo "ERROR: release name not specified." + return 1 + fi +} + +function check_snapshot_date() { + if [[ -z "${SNAPSHOT_DATE}" ]]; then + >&2 echo "ERROR: snapshot date not specified." + return 1 + fi +} + +function check_inventory() { + if [[ ! -f "${INVENTORY_FILE}" ]]; then + >&2 echo "ERROR: ${INVENTORY_FILE} - not exist." + return 1 + fi + + COUNT_RELEASES="$(yq '.releases | length' "${INVENTORY_FILE}")" + if ((COUNT_RELEASES == 0)); then + >&2 echo "ERROR: the inventory file does not contain the listed releases." + return 1 + fi +} + +function check_inventory_release_resource() { + check_inventory + COUNT_RESOURCES="$(yq '.releases.'"${RELEASE_NAME}"' | length' "${INVENTORY_FILE}")" + if ((COUNT_RESOURCES == 0)); then + >&2 echo "ERROR: the inventory file does not contain the listed resources for selected release ${RELEASE_NAME}." + return 1 + fi +} + +### +# Reading and validating an inventory file. +### +function validate_inventory_release_options() { + KEY_OPTION="$(yq '.releases.'"${RELEASE_NAME}"'.'"${1}"' | has("'"${2}"'")' "${INVENTORY_FILE}")" + LEN_VALUE_OPTION="$(yq '.releases.'"${RELEASE_NAME}"'.'"${1}"'.'"${2}"'| length' "${INVENTORY_FILE}")" + if [[ "${KEY_OPTION}" == "false" ]] || ((LEN_VALUE_OPTION == 0)); then + >&2 echo "ERROR: the inventory file does not contain the option ${2} in resource ${1} for selected release ${RELEASE_NAME}." + return 1 + fi +} + +function get_inventory_release_options() { + check_inventory_release_resource + if [[ "$(yq '.releases.'"${RELEASE_NAME}"' | has("'"${1}"'")' "${INVENTORY_FILE}")" == "true" ]]; then + validate_inventory_release_options "${1}" resourceType + INVENTORY_RELEASE_RESOURCE_TYPE="$(yq '.releases.'"${RELEASE_NAME}"'.'"${1}"'.resourceType' "${INVENTORY_FILE}")" + validate_inventory_release_options "${1}" namespace + INVENTORY_RELEASE_NAMESPACE="$(yq '.releases.'"${RELEASE_NAME}"'.'"${1}"'.namespace' "${INVENTORY_FILE}")" + validate_inventory_release_options "${1}" replicas + INVENTORY_RELEASE_REPLICAS_COUNT="$(yq '.releases.'"${RELEASE_NAME}"'.'"${1}"'.replicas' "${INVENTORY_FILE}")" + validate_inventory_release_options "${1}" name + INVENTORY_RELEASE_RESOURCE_NAME="$(yq '.releases.'"${RELEASE_NAME}"'.'"${1}"'.name' "${INVENTORY_FILE}")" + + INVENTORY_RELEASE_CLAIM_SELECTOR_MATCH_LABELS="" + if [[ "$(yq '.releases.'"${RELEASE_NAME}"'.'"${1}"' | has("claimSelector")' "${INVENTORY_FILE}")" != "true" ]]; then + return 0 + fi + + validate_inventory_release_options "${1}.claimSelector" matchLabels + + LABELS_COUNT=0 + IFS=$'\n' + for ITEM in $(yq -r '.releases.'"${RELEASE_NAME}"'.'"${1}"'.claimSelector.matchLabels' "${INVENTORY_FILE}"); do + if ((LABELS_COUNT == 0)); then + INVENTORY_RELEASE_CLAIM_SELECTOR_MATCH_LABELS="${INVENTORY_RELEASE_CLAIM_SELECTOR_MATCH_LABELS}${ITEM/: /=}" + else + INVENTORY_RELEASE_CLAIM_SELECTOR_MATCH_LABELS="${INVENTORY_RELEASE_CLAIM_SELECTOR_MATCH_LABELS},${ITEM/: /=}" + fi + + ((++LABELS_COUNT)) + done + + INVENTORY_RELEASE_CLAIM_SELECTOR_MATCH_LABELS="-l ${INVENTORY_RELEASE_CLAIM_SELECTOR_MATCH_LABELS}" + else + false + fi +} + +### +# Processing snapshots and PVСs for the selected release. +### +function get_existing_pvc() { + clear_work_dir + eval kubectl get pvc -n "${INVENTORY_RELEASE_NAMESPACE}" -o yaml "${INVENTORY_RELEASE_CLAIM_SELECTOR_MATCH_LABELS}" | yq '. | + del(.items[].status, + .items[].spec.volumeMode, + .items[].spec.volumeName, + .items[].spec.dataSource, + .items[].metadata.annotations."kubectl.kubernetes.io/last-applied-configuration", + .items[].metadata.finalizers, + .items[].metadata.creationTimestamp, + .items[].metadata.uid, + .items[].metadata.resourceVersion) | .items' > "${PVC_DATA_FILE}" +} + +function add_separators() { + if ((${1} <= PVC_LENGTH-1)); then + echo "---" >> "${2}" + fi +} + +function get_current_pvc_data() { + if [[ ! -f "${PVC_DATA_FILE}" ]]; then + >&2 echo "ERROR: ${PVC_DATA_FILE} - not created." + return 1 + fi + + readonly PVC_DATA="$(yq '.' "${PVC_DATA_FILE}")" + readonly PVC_LENGTH="$(yq 'length' "${PVC_DATA_FILE}")" +} + +function prepare_pvc() { + COUNT=0 + touch "${PVC_PREPARE_FILE}" + while [ "${COUNT}" -lt "${PVC_LENGTH}" ]; do + PVC_NAME="$(echo "${PVC_DATA}" | yq '.['"${COUNT}"'].metadata.name')" + echo "Found PVC: ${PVC_NAME}" + if (kubectl get volumesnapshot "${PVC_NAME}-${RELEASE_NAME}-ebs-csi-snapshot-${SNAPSHOT_DATE}" -n "${INVENTORY_RELEASE_NAMESPACE}" 1> /dev/null); then + echo "${PVC_DATA}" | yq '.['"${COUNT}"'] | .spec += + {"dataSource": + {"apiGroup":"snapshot.storage.k8s.io", + "kind":"VolumeSnapshot", + "name":"'"${PVC_NAME}"'-'"${RELEASE_NAME}"'-ebs-csi-snapshot-'"${SNAPSHOT_DATE}"'"}}' >> "${PVC_PREPARE_FILE}" + fi + + ((++COUNT)) + add_separators "${COUNT}" "${PVC_PREPARE_FILE}" + done +} + +function restore_pvc() { + if [[ ! -f "${PVC_PREPARE_FILE}" ]]; then + >&2 echo "ERROR: ${PVC_PREPARE_FILE} - not created." + return 1 + fi + + COUNT=0 + touch "${PVC_RESTORE_FILE}" + while [ "${COUNT}" -lt "${PVC_LENGTH}" ]; do + PVC_NAME="$(echo "${PVC_DATA}" | yq '.['"${COUNT}"'].metadata.name')" + PV_NAME="$(kubectl get pv -o yaml | yq '.items[] | select(.spec.claimRef.name == "'"${PVC_NAME}"'") | .metadata.name')" + echo "Found PV: ${PV_NAME} for PVC: ${PVC_NAME}" + if [[ -n "${PV_NAME}" ]]; then + yq 'select(document_index == '"${COUNT}"') | .spec += + {"volumeMode": "Filesystem", "volumeName": "'"${PV_NAME}"'"}' "${PVC_PREPARE_FILE}" >> "${PVC_RESTORE_FILE}" + fi + + ((++COUNT)) + add_separators "${COUNT}" "${PVC_RESTORE_FILE}" + done +} + +### +# Downscale or upscale resources for the selected release. +### +function get_available_replicas() { + AVAILABLE_REPLICAS="$(kubectl get -n "${INVENTORY_RELEASE_NAMESPACE}" "${INVENTORY_RELEASE_RESOURCE_TYPE}" \ + "${INVENTORY_RELEASE_RESOURCE_NAME}" -o yaml | yq '.status.availableReplicas')" +} + +function scale_replicas() { + RESOURCES=("${1}") + COUNT=0 + IFS="${OLD_IFS}" + + for RESOURCE in ${RESOURCES[*]}; do + if get_inventory_release_options "${RESOURCE}"; then + if [[ "${3}" == "upscale" ]]; then + COUNT="${INVENTORY_RELEASE_REPLICAS_COUNT}" + fi + + get_available_replicas + echo "Inventory ${INVENTORY_RELEASE_RESOURCE_NAME} replicas count: ${INVENTORY_RELEASE_REPLICAS_COUNT}" + echo "Available ${INVENTORY_RELEASE_RESOURCE_NAME} replicas count: ${AVAILABLE_REPLICAS}" + if (("${2}")); then + kubectl -n "${INVENTORY_RELEASE_NAMESPACE}" scale "${INVENTORY_RELEASE_RESOURCE_TYPE}" "${INVENTORY_RELEASE_RESOURCE_NAME}" --replicas="${COUNT}" + echo -ne "Wait ${3} ${RELEASE_NAME} release for resource: ${INVENTORY_RELEASE_RESOURCE_TYPE}, name: ${INVENTORY_RELEASE_RESOURCE_NAME}" + while (("${4}")); do + get_available_replicas + echo -ne " . " + sleep 1 + done + + echo -en "\n" + echo "${5}" + fi + fi + done +} + +function downscale_release() { + # Required parameters + # 1 - resources list + # 2 - first check of the process start condition + # 3 - process + # 4 - condition to wait for execution + # 5 - final message + scale_replicas "operator cluster" \ + "INVENTORY_RELEASE_REPLICAS_COUNT == AVAILABLE_REPLICAS" \ + "downscale" \ + "AVAILABLE_REPLICAS > 0" \ + "Release: ${RELEASE_NAME}, reducing the number of replicas to 0." +} + +function upscale_release() { + # Required parameters + # 1 - resources list + # 2 - first check of the process start condition + # 3 - process + # 4 - condition to wait for execution + # 5 - final message + scale_replicas "operator cluster" \ + "AVAILABLE_REPLICAS == 0" \ + "upscale" \ + "AVAILABLE_REPLICAS < INVENTORY_RELEASE_REPLICAS_COUNT" \ + "Release: ${RELEASE_NAME}, count replicas upscale according inventory file." +} + +### +# Calling main commands. +### +case ${1} in +help|h|-h|--help) + HELP='RESTORE VOLUME SNAPSHOT script - automation of restore snapshots process for selected stateful Helm release. + +COMMANDS: + list | l - listing all available Helm releases defined in inventory file. + list-snapshots | ls - listing all snapshots for the selected Helm release. + args: + 1. - release name. + prepare | p - preparation of an intermediate PVCs manifest from the specified snapshot time and Helm release. + args: + 1. - release name. + 2. - snapshot date by format [202210130000] (