From 78224683c44ce6135f4d3c4d7995716c92f4b654 Mon Sep 17 00:00:00 2001 From: Eric Fried Date: Fri, 19 Feb 2021 13:33:32 -0600 Subject: [PATCH] Update deployment target and script Tidy up Makefile variables. Tighten up and document the deploy.sh script. Add some features, including the ability to skip RBAC and to do dry runs. --- Makefile | 66 ++++++++++++++++----------------------- README.md | 62 ++++++++++++++++++++++++++++--------- hack/deploy.sh | 83 ++++++++++++++++++++++++++++++++++---------------- 3 files changed, 131 insertions(+), 80 deletions(-) diff --git a/Makefile b/Makefile index dbc75a4..4c8b691 100644 --- a/Makefile +++ b/Makefile @@ -1,55 +1,41 @@ -IMAGE_URI ?= $(IMAGE_REPO)/$(IMAGE_ORG)/$(IMAGE_NAME) - # Project specific values -DOCKER_IMAGE_REGISTRY?=docker.io -QUAY_IMAGE_REGISTRY?=quay.io -IMAGE_REPOSITORY?=openshift-sre -IMAGE_NAME?=osd-cluster-ready -DOCKERFILE=./Dockerfile +IMAGE_REGISTRY ?= quay.io +IMAGE_USER ?= openshift-sre +IMAGE_NAME ?= osd-cluster-ready + +DOCKERFILE := ./Dockerfile -# Podman by default, fall back to docker -CONTAINER_ENGINE=$(shell command -v podman 2>/dev/null || command -v docker 2>/dev/null) +# Podman by default, fall back to docker, allow override +CONTAINER_ENGINE ?= $(shell command -v podman 2>/dev/null || command -v docker 2>/dev/null) # Gather commit number for Z and short SHA -COMMIT_NUMBER=$(shell git rev-list `git rev-list --parents HEAD | egrep "^[a-f0-9]{40}$$"`..HEAD --count) -CURRENT_COMMIT=$(shell git rev-parse --short=7 HEAD) +COMMIT_NUMBER := $(shell git rev-list `git rev-list --parents HEAD | egrep "^[a-f0-9]{40}$$"`..HEAD --count) +CURRENT_COMMIT := $(shell git rev-parse --short=7 HEAD) # Build container version -VERSION_MAJOR?=0 -VERSION_MINOR?=1 -CONTAINER_VERSION=$(VERSION_MAJOR).$(VERSION_MINOR).$(COMMIT_NUMBER)-$(CURRENT_COMMIT) +VERSION_MAJOR ?= 0 +VERSION_MINOR ?= 1 +IMAGE_VERSION := $(VERSION_MAJOR).$(VERSION_MINOR).$(COMMIT_NUMBER)-$(CURRENT_COMMIT) -# Quay.io image -QUAY_IMG?=$(QUAY_IMAGE_REGISTRY)/$(IMAGE_REPOSITORY)/$(IMAGE_NAME):v$(CONTAINER_VERSION) -QUAY_IMAGE_URI=${QUAY_IMG} -QUAY_IMAGE_URI_LATEST=$(QUAY_IMAGE_REGISTRY)/$(IMAGE_REPOSITORY)/$(IMAGE_NAME):latest - -# Docker image -DOCKER_IMG?=$(DOCKER_IMAGE_REGISTRY)/$(IMAGE_REPOSITORY)/$(IMAGE_NAME):v$(CONTAINER_VERSION) -DOCKER_IMAGE_URI=${DOCKER_IMG} -DOCKER_IMAGE_URI_LATEST=$(DOCKER_IMAGE_REGISTRY)/$(IMAGE_REPOSITORY)/$(IMAGE_NAME):latest +IMAGE_URI_BASE := $(IMAGE_REGISTRY)/$(IMAGE_USER)/$(IMAGE_NAME) +IMAGE_URI_LATEST=$(IMAGE_URI_BASE):latest +IMAGE_URI := $(IMAGE_URI_BASE):v$(IMAGE_VERSION) +# Used by deploy.sh +export IMAGE_URI .PHONY: build build: GOOS=linux go build -o ./bin/main main.go -.PHONY: docker-build -docker-build: build - # Build and tag images for quay.io - ${CONTAINER_ENGINE} build . -f $(DOCKERFILE) -t $(QUAY_IMAGE_URI) - ${CONTAINER_ENGINE} tag $(QUAY_IMAGE_URI) $(QUAY_IMAGE_URI_LATEST) - # Tag docker images - ${CONTAINER_ENGINE} tag $(QUAY_IMAGE_URI) $(DOCKER_IMAGE_URI) - ${CONTAINER_ENGINE} tag $(DOCKER_IMAGE_URI) $(DOCKER_IMAGE_URI_LATEST) - -.PHONY: docker-push -docker-push: - # Push Quay.io images - ${CONTAINER_ENGINE} push $(QUAY_IMAGE_URI) - ${CONTAINER_ENGINE} push $(QUAY_IMAGE_URI_LATEST) - # Push Docker images - # ${CONTAINER_ENGINE} push $(DOCKER_IMAGE_URI) - # ${CONTAINER_ENGINE} push $(DOCKER_IMAGE_URI_LATEST) +.PHONY: image-build +image-build: build + ${CONTAINER_ENGINE} build . -f $(DOCKERFILE) -t $(IMAGE_URI) + ${CONTAINER_ENGINE} tag $(IMAGE_URI) $(IMAGE_URI_LATEST) + +.PHONY: image-push +image-push: + ${CONTAINER_ENGINE} push $(IMAGE_URI) + ${CONTAINER_ENGINE} push $(IMAGE_URI_LATEST) .PHONY: deploy deploy: diff --git a/README.md b/README.md index fd2e7da..da8c3aa 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,10 @@ # OSD Cluster Readiness Job - [OSD Cluster Readiness Job](#osd-cluster-readiness-job) - - [Deploying the Image](#deploying-the-image) - - [Deploying the Job](#deploying-the-job) + - [Deploying](#deploying) + - [Build the Image](#build-the-image) + - [Deploy the Job](#deploy-the-job) + - [Example](#example) - [Tunables](#tunables) - [`MAX_CLUSTER_AGE_MINUTES`](#max_cluster_age_minutes) - [`CLEAN_CHECK_RUNS`](#clean_check_runs) @@ -24,26 +26,57 @@ By default, we will clear any active silence and exit successfully if the cluste If the silence expires while health checks are failing, we reinstate it. (This means it is theoretically possible for alerts to fire for up to one minute if the silence expires right after a health check fails. [FIXME](#to-do).) -## Deploying the Image +## Deploying + +### Build the Image ``` -make build -make docker-build -make docker-push +make image-build +make image-push ``` -This builds the binary for linux, builds the docker image (which requires the binary to be built externally as of right now) and then pushes the updated image to quay. +This builds the binary for linux, builds the docker image, and then pushes the image to a repository. + +If you wish to push to a specific registry, repository, or image name, you may override the `IMAGE_REGISTRY`, `IMAGE_USER`, or `IMAGE_NAME` variables, respectively, when invoking the `image-build` and `image-push` targets. +For example, for development purposes, you may wish to `export IMAGE_USER=my_quay_user`. +See the [Makefile](Makefile) for the default values. + +### Deploy the Job + +``` +make deploy +``` -If you wish to push to a specific repository, org, or image name, you may override the `IMAGE_REPO`, `IMAGE_ORG`, or `IMAGE_NAME` variables, respectively, when invoking the `docker-build` and `docker-push` targets. -For example, for development purposes, you may wish to `export IMAGE_ORG=my_quay_namespace`. +This will do the following on your currently logged-in cluster. **NOTE:** You must have elevated permissions. +- Delete any existing `osd-cluster-ready` Job. +- Deploy each of the manifests in the [deploy/](deploy) folder in alphanumeric order, except the [Job](deploy/60-osd-ready.Job.yaml) itself. +- Create a temporary Job manifest with the following overrides, and deploy it: + - The `image` is set using any of the `IMAGE_*` overrides described above. + - The [`MAX_CLUSTER_AGE_MINUTES` environment variable](#max_cluster_age_minutes) is set to a high value to prevent the job from exiting early. ([FIXME: make this configurable](#to-do).) +- Wait for the Job's Pod to start and follow its logs. -## Deploying the Job +In addition to the `IMAGE_*` overrides, `make deploy` will also observe the following environment variables: +- `JOB_ONLY`: If set (to any `true`-ish value), only deploy the overridden Job manifest. + Use this to streamline the deployment process if the other manifests (RBAC, etc.) are already deployed and unchanged. +- `DRY_RUN`: Don't actually do anything to the cluster; just print the overridden Job manifest and the commands that _would_ have been run. -Deploy each of the manifests in the [deploy/](deploy) folder in alphanumeric order. +### Example -If you are overriding any of the `IMAGE_*` variables for development purposes, be sure to (temporarily) edit the [Job](deploy/60-osd-ready.Job.yaml), setting the `image` appropriately. +Build, push to, and deploy from my personal namespace, `i_am_a_docker`, in the docker.io registry, skipping RBAC manifests, and first doing a dry run: -You can iterate by deleting the Job (which will delete its Pod) and recreating it. +``` +# Set these in the environment to save passing them to each `make` command. +export IMAGE_REGISTRY=docker.io +export IMAGE_USER=i_am_a_docker + +make image-build image-push + +# Do a deploy dry run first +make JOB_ONLY=1 DRY_RUN=1 deploy + +# Now deploy for real +make JOB_ONLY=1 deploy +``` ## Tunables The following environment variables can be set in the container, e.g. by editing the [Job](deploy/60-osd-ready.Job.yaml) to include them in `spec.template.spec.containers[0].env`. @@ -90,4 +123,5 @@ Don't forget to [build](#deploying-the-image) and [test](#deploying-the-job) wit - [x] Look for existing active silences before creating a new one - [x] Implement _actual_ healthchecks (steal them from osde2e) to determine cluster stability - [ ] Find if there's a better and more secure way to talk to the alertmanager API using oauth and serviceaccount tokens. -- [ ] Make the default silence expiry shorter; and extend it when health checks fail ([OSD-6384](https://issues.redhat.com/browse/OSD-6384)). \ No newline at end of file +- [ ] Make the default silence expiry shorter; and extend it when health checks fail ([OSD-6384](https://issues.redhat.com/browse/OSD-6384)). +- [ ] Make [tunables](#tunables) configurable via `make deploy`. \ No newline at end of file diff --git a/hack/deploy.sh b/hack/deploy.sh index d88d5f2..c3b9232 100755 --- a/hack/deploy.sh +++ b/hack/deploy.sh @@ -1,34 +1,65 @@ -#!/bin/bash +#!/bin/bash -e -if [ -z "$IMAGE_REPOSITORY" ]; then - echo "Not set" -else - echo "$IMAGE_REPOSITORY" +usage() { + cat < $TMP_MANIFEST -sed -i "s/\/osd-cluster-ready/\/osd-cluster-ready:${CONTAINER_VERSION}/" $TMP_MANIFEST -sed -i "s/value: \"240\"/value: \"339860\"/" $TMP_MANIFEST + +TMP_MANIFEST=$(mktemp -t osd-cluster-ready-Job.XXXXX.yaml) trap "rm -fr $TMP_MANIFEST" EXIT +sed "s,\(^ *image: \).*,\1${IMAGE_URI}," deploy/60-osd-ready.Job.yaml > $TMP_MANIFEST +sed -i 's/value: "240"/value: "339860"/' $TMP_MANIFEST +echo "===== $TMP_MANIFEST =====" cat $TMP_MANIFEST +echo "=========================" + +# In case the job is already deleted, don't let -e fail this, and don't wait +# for the pod to go away. +WAIT_FOR_POD=yes +maybe oc delete job -n openshift-monitoring osd-cluster-ready || WAIT_FOR_POD=no + +if [[ -z "$JOB_ONLY" ]]; then + echo "Deploying all the things. Set JOB_ONLY=1 to deploy only the Job." + for manifest in $(ls deploy/ | grep -v "60-osd-ready.Job.yaml") + do + maybe oc apply -f deploy/${manifest} + done +else + echo "Deploying only the Job. To redeploy RBAC etc., unset JOB_ONLY." +fi -oc delete job -n openshift-monitoring osd-cluster-ready +# Before deploying the new job, make sure the pod from the old one is gone +if [[ $WAIT_FOR_POD == "yes" ]]; then + maybe oc wait --for=delete pod -l job-name=osd-cluster-ready --timeout=30s +fi -for manifest in $(ls deploy/ | grep -v "60-osd-ready.Job.yaml") -do - oc apply -f deploy/${manifest} -done +maybe oc create -f $TMP_MANIFEST -oc apply -f $TMP_MANIFEST +if [[ -z "$DRY_RUN" ]]; then + POD=$(oc get po -l job-name=osd-cluster-ready -o name) +else + POD=osd-cluster-ready-XXXXX +fi -# oc logs -f jobs/osd-cluster-ready -n openshift-monitoring +maybe oc wait --for=condition=Ready $POD --timeout=15s +if [[ $? -eq 0 ]]; then + maybe oc logs -f jobs/osd-cluster-ready -n openshift-monitoring +fi