Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Incremental version checks during migration #2399

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions bin/ck8s
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ usage() {
echo " upgrade <wc|sc|both> <vX.Y> apply runs all apply steps upgrading the environment" 1>&2
echo " upgrade <wc|sc|both> <vX.Y> prepare runs all prepare steps upgrading the configuration" 1>&2
echo " validate <wc|sc> validates config files" 1>&2
echo " version <wc|sc> shows apps version" 1>&2
exit 1
}

Expand Down Expand Up @@ -153,5 +154,17 @@ diagnostics)
shift
"${here}/diagnostics.bash" "${@}"
;;
version)
shift
case "${1:-both}" in
both)
log_info "sc version:" "$(get_version "sc")"
log_info "wc version:" "$(get_version "wc")"
;;
sc | wc)
log_info "${1} version:" "$(get_version "${1}")"
;;
esac
;;
*) usage ;;
esac
5 changes: 5 additions & 0 deletions bin/common.bash
Original file line number Diff line number Diff line change
Expand Up @@ -636,3 +636,8 @@ with_s3cfg() {
# bucket needs to be created.
sops_exec_file_no_fifo "${s3cfg}" 'S3COMMAND_CONFIG_FILE="{}" '"${*}"
}

# Retrieve apps version from configmap
get_version() {
"${here}/ops.bash" kubectl "${1}" get cm -n kube-system apps-meta -o jsonpath --template='{.data.version}'
}
55 changes: 55 additions & 0 deletions bin/upgrade.bash
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,18 @@ prepare() {

snippets_check prepare "${snippets}"

# Create a configmap. This fails if done twice.
if [[ "${CK8S_CLUSTER:-}" =~ ^(sc|both)$ ]]; then
if ! record_migration_prepare_begin sc; then
log_fatal "prepare already started in sc (kubectl delete -n kube-system configmap apps-upgrade to try again)"
fi
fi
if [[ "${CK8S_CLUSTER:-}" =~ ^(wc|both)$ ]]; then
if ! record_migration_prepare_begin wc; then
log_fatal "prepare already started in wc (kubectl delete -n kube-system configmap apps-upgrade to try again)"
fi
Comment on lines +57 to +64
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: Move the log_fatal inside the function instead of assuming what a non-zero exit code means.

fi

for snippet in ${snippets}; do
if [[ "$(basename "${snippet}")" == "00-template.sh" ]]; then
continue
Expand All @@ -60,6 +72,12 @@ prepare() {
log_info "prepare snippet \"${snippet##"${ROOT}/migration/"}\":"
if "${snippet}"; then
log_info "prepare snippet success\n---"
if [[ "${CK8S_CLUSTER:-}" == "both" ]]; then
record_migration_prepare_step "sc" "${snippet}"
record_migration_prepare_step "wc" "${snippet}"
else
record_migration_prepare_step "${CK8S_CLUSTER}" "${snippet}"
fi
else
log_fatal "prepare snippet failure"
fi
Expand All @@ -68,9 +86,11 @@ prepare() {
config_validate secrets
if [[ "${CK8S_CLUSTER:-}" =~ ^(sc|both)$ ]]; then
config_validate sc
record_migration_prepare_done sc
fi
if [[ "${CK8S_CLUSTER:-}" =~ ^(wc|both)$ ]]; then
config_validate wc
record_migration_prepare_done wc
fi
}

Expand All @@ -93,6 +113,28 @@ apply() {

snippets_check apply "${snippets}"

local prepared_version
if [[ "${CK8S_CLUSTER:-}" =~ ^(sc|both)$ ]]; then
prepared_version="$(get_prepared_version sc || true)"
if [ -z "${prepared_version}" ]; then
log_fatal "'prepare' step does not appear to have been run, do so first"
fi

if [[ "${prepared_version}" != "${CK8S_TARGET_VERSION}" ]]; then
log_fatal "'prepare' step in sc appears to have been run for version ${prepared_version}, not ${CK8S_TARGET_VERSION}"
fi
Comment on lines +118 to +125
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: Extract into a separate function and reuse for both sc and wc.

fi
if [[ "${CK8S_CLUSTER:-}" =~ ^(wc|both)$ ]]; then
prepared_version="$(get_prepared_version wc || true)"
if [ -z "${prepared_version}" ]; then
log_fatal "'prepare' step does not appear to have been run, do so first"
fi

if [[ "${prepared_version}" != "${CK8S_TARGET_VERSION}" ]]; then
log_fatal "'prepare' step in wc appears to have been run for version ${prepared_version}, not ${CK8S_TARGET_VERSION}"
fi
fi

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note from @Xartos - record beginning of apply step

for snippet in ${snippets}; do
if [[ "$(basename "${snippet}")" == "00-template.sh" ]]; then
continue
Expand All @@ -101,6 +143,12 @@ apply() {
log_info "apply snippet \"${snippet##"${ROOT}/migration/"}\":"
if "${snippet}" execute; then
log_info "apply snippet success\n---"
if [[ "${CK8S_CLUSTER:-}" == "both" ]]; then
record_migration_apply_step "sc" "${snippet}"
record_migration_apply_step "wc" "${snippet}"
else
record_migration_apply_step "${CK8S_CLUSTER}" "${snippet}"
fi
else
local return="${?}"
log_error "apply snippet execute failure"
Expand All @@ -118,6 +166,13 @@ apply() {
fi
fi
done

if [[ "${CK8S_CLUSTER:-}" =~ ^(sc|both)$ ]]; then
record_migration_done sc
fi
if [[ "${CK8S_CLUSTER:-}" =~ ^(wc|both)$ ]]; then
record_migration_done wc
fi
}

usage() {
Expand Down
69 changes: 69 additions & 0 deletions scripts/migration/lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ check_version() {
return
fi

# `--exit-status` can be used instead of comparing to "null"
common_override=$(yq4 '.global.ck8sVersion' "${CK8S_CONFIG_PATH}/common-config.yaml")
sc_wc_override=$(yq4 '.global.ck8sVersion' "${CK8S_CONFIG_PATH}/${1}-config.yaml")
if [ "$common_override" != "null" ] || [ "$sc_wc_override" != "null" ]; then
Expand All @@ -281,6 +282,14 @@ check_version() {
fi
fi

cluster_version="$(get_apps_version "${1}" >/dev/null 2>&1 || true)"
if [ -n "${cluster_version}" ]; then
log_info "Currently running ${cluster_version} according to cluster"
if [ "${common_override}" != "null" ] && [ "${cluster_version%.*}" != "${common_override}" ]; then
log_warn "Version mismatch, cluster ${cluster_version}, config ${common_override}"
fi
fi

if [[ ! "${VERSION["${1}-config"]}" =~ v[0-9]+\.[0-9]+\.[0-9]+ ]]; then
log_warn "reducing version validation of ${1}-config for version \"${VERSION["${1}-config"]}\""
else
Expand Down Expand Up @@ -367,6 +376,66 @@ append_trap() {
trap "$(new_trap)" "${signal}"
}

# usage: [[ "$(get_apps_version)" == "0.x" ]]
get_apps_version() {
kubectl_do "${1}" get cm -n kube-system apps-meta -o jsonpath --template="{.data.version}"
}

# Usage: record_migration_prepare_begin sc|wc
record_migration_prepare_begin() {
# This ConfigMap should only exist while doing an upgrade.
# Abort if it already exists
kubectl_do "${1}" create configmap --dry-run=client -o yaml \
-n kube-system apps-upgrade --from-literal "prepare=${CK8S_TARGET_VERSION}" 2>/dev/null |
yq4 '.metadata.labels["app.kubernetes.io/managed-by"] = "welck8s-apps-upgrade"' - |
kubectl_do "${1}" create -f - >/dev/null 2>/dev/null
}

# Usage: record_migration_prepare_done sc|wc
record_migration_prepare_done() {
# assert that the above begin has been done and from the expected version
# yq4 should trip pipefail on version mismatch
kubectl_do "${1}" get -n kube-system cm apps-upgrade -o yaml 2>/dev/null |
yq4 --exit-status 'select(.data.prepare == strenv(CK8S_TARGET_VERSION)) |
.data.prepared = strenv(CK8S_TARGET_VERSION) |
del(.data.last_prepare_step)' |
kubectl_do "${1}" replace -f - >/dev/null 2>/dev/null
Comment on lines +398 to +402
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: While one-lines are nice I think reducing the magic a bit here could help with readability and make it less error-prone, something like:

current_version=$(kubectl get configmap apps-upgrade -o jsonpath='{.data.prepare}')
[ $current_version == $CK8S_TARGET_VERSION ] || log_fatal "version mismatch"
kubectl patch configmap apps-upgrade --type=json -p='[{"op": "replace", "path": "/data/prepared", "value":"$CK8S_TARGET_VERSION"}]'
kubectl patch configmap apps-upgrade --type=json -p='[{"op": "remove", "path": "/data/last_prepare_step"}]'

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does that resist TOCTOU issues? I was hoping that doing this as a single pipeline would ensure that the version could not be changed between the test and the patching. Sadly the JSON Patch subset used by kubernetes does not support "op":"test" 😭

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As long as record_migration_begin isn't suffering from it I think we should be fine, right? Since that is essentially acting as a lock.

If we aren't worried that the version is changed out of bounds that is, but then we have bigger issues than a race condition between the test and the patch. 😄

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Speaking of resisting issues. Tests would be nice!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests would be nice! Figuring out how to test whole migrations feels like a whole task in itself however, unless we already have it somewhere? Maybe @aarnq or @viktor-f have thoughts on that?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually testing migrations would be awesome but feels like too big of an undertaking here.

I think something as simple as testing that running two migrations concurrently is prohibited and that migrations are actually being tracked. The migrations in the tests could just be dummies.

}

# Get currently prepared version
get_prepared_version() {
kubectl_do "${1}" get cm -n kube-system apps-upgrade -o jsonpath --template="{.data.prepared}" 2>/dev/null
}

# Usage: record_migration_prepare_step sc|wc step-description
record_migration_prepare_step() {
kubectl_do "${1}" get -n kube-system cm apps-upgrade -o yaml 2>/dev/null |
last_step="${2##*/}" \
yq4 --exit-status 'select(.data.prepare == strenv(CK8S_TARGET_VERSION)) |
.data.last_prepare_step = strenv(last_step)' |
kubectl_do "${1}" replace -f - >/dev/null 2>/dev/null
}

# Usage: record_migration_apply_step sc|wc step-description
record_migration_apply_step() {
kubectl_do "${1}" get -n kube-system cm apps-upgrade -o yaml 2>/dev/null |
last_step="${2##*/}" \
yq4 --exit-status 'select(.data.prepared == strenv(CK8S_TARGET_VERSION)) |
.data.last_apply_step = strenv(last_step)' |
kubectl_do "${1}" replace -f - >/dev/null 2>/dev/null
}

# Usage: record_migration_done sc|wc
record_migration_done() {
# Record the upgraded-to version. Create if it does not already exist.
kubectl_do "${1}" patch -n kube-system cm apps-meta --type=merge \
-p "$(yq4 --null-input --output-format json '.data.version = strenv(CK8S_TARGET_VERSION)')" 2>/dev/null ||
kubectl_do "${1}" create configmap -n kube-system apps-meta \
--from-literal "version=${CK8S_TARGET_VERSION}" 2>/dev/null
# Complete the migration.
kubectl_do "${1}" delete configmap -n kube-system apps-upgrade >/dev/null 2>/dev/null
}

# shellcheck source=scripts/migration/helm.sh
source "${ROOT}/scripts/migration/helm.sh"
# shellcheck source=scripts/migration/helmfile.sh
Expand Down
Loading