Skip to content

Commit

Permalink
Merge pull request openshift#4 from 2uasimojo/deploy
Browse files Browse the repository at this point in the history
Update deployment target and script
  • Loading branch information
2uasimojo authored Feb 19, 2021
2 parents 614bf59 + 7822468 commit e083867
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 80 deletions.
66 changes: 26 additions & 40 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,55 +1,41 @@
IMAGE_URI ?= $(IMAGE_REPO)/$(IMAGE_ORG)/$(IMAGE_NAME)

# Project specific values
DOCKER_IMAGE_REGISTRY?=docker.io
QUAY_IMAGE_REGISTRY?=quay.io
IMAGE_REPOSITORY?=openshift-sre
IMAGE_NAME?=osd-cluster-ready
DOCKERFILE=./Dockerfile
IMAGE_REGISTRY ?= quay.io
IMAGE_USER ?= openshift-sre
IMAGE_NAME ?= osd-cluster-ready

DOCKERFILE := ./Dockerfile

# Podman by default, fall back to docker
CONTAINER_ENGINE=$(shell command -v podman 2>/dev/null || command -v docker 2>/dev/null)
# Podman by default, fall back to docker, allow override
CONTAINER_ENGINE ?= $(shell command -v podman 2>/dev/null || command -v docker 2>/dev/null)

# Gather commit number for Z and short SHA
COMMIT_NUMBER=$(shell git rev-list `git rev-list --parents HEAD | egrep "^[a-f0-9]{40}$$"`..HEAD --count)
CURRENT_COMMIT=$(shell git rev-parse --short=7 HEAD)
COMMIT_NUMBER := $(shell git rev-list `git rev-list --parents HEAD | egrep "^[a-f0-9]{40}$$"`..HEAD --count)
CURRENT_COMMIT := $(shell git rev-parse --short=7 HEAD)

# Build container version
VERSION_MAJOR?=0
VERSION_MINOR?=1
CONTAINER_VERSION=$(VERSION_MAJOR).$(VERSION_MINOR).$(COMMIT_NUMBER)-$(CURRENT_COMMIT)
VERSION_MAJOR ?= 0
VERSION_MINOR ?= 1
IMAGE_VERSION := $(VERSION_MAJOR).$(VERSION_MINOR).$(COMMIT_NUMBER)-$(CURRENT_COMMIT)

# Quay.io image
QUAY_IMG?=$(QUAY_IMAGE_REGISTRY)/$(IMAGE_REPOSITORY)/$(IMAGE_NAME):v$(CONTAINER_VERSION)
QUAY_IMAGE_URI=${QUAY_IMG}
QUAY_IMAGE_URI_LATEST=$(QUAY_IMAGE_REGISTRY)/$(IMAGE_REPOSITORY)/$(IMAGE_NAME):latest

# Docker image
DOCKER_IMG?=$(DOCKER_IMAGE_REGISTRY)/$(IMAGE_REPOSITORY)/$(IMAGE_NAME):v$(CONTAINER_VERSION)
DOCKER_IMAGE_URI=${DOCKER_IMG}
DOCKER_IMAGE_URI_LATEST=$(DOCKER_IMAGE_REGISTRY)/$(IMAGE_REPOSITORY)/$(IMAGE_NAME):latest
IMAGE_URI_BASE := $(IMAGE_REGISTRY)/$(IMAGE_USER)/$(IMAGE_NAME)
IMAGE_URI_LATEST=$(IMAGE_URI_BASE):latest
IMAGE_URI := $(IMAGE_URI_BASE):v$(IMAGE_VERSION)
# Used by deploy.sh
export IMAGE_URI

.PHONY: build
build:
GOOS=linux go build -o ./bin/main main.go

.PHONY: docker-build
docker-build: build
# Build and tag images for quay.io
${CONTAINER_ENGINE} build . -f $(DOCKERFILE) -t $(QUAY_IMAGE_URI)
${CONTAINER_ENGINE} tag $(QUAY_IMAGE_URI) $(QUAY_IMAGE_URI_LATEST)
# Tag docker images
${CONTAINER_ENGINE} tag $(QUAY_IMAGE_URI) $(DOCKER_IMAGE_URI)
${CONTAINER_ENGINE} tag $(DOCKER_IMAGE_URI) $(DOCKER_IMAGE_URI_LATEST)

.PHONY: docker-push
docker-push:
# Push Quay.io images
${CONTAINER_ENGINE} push $(QUAY_IMAGE_URI)
${CONTAINER_ENGINE} push $(QUAY_IMAGE_URI_LATEST)
# Push Docker images
# ${CONTAINER_ENGINE} push $(DOCKER_IMAGE_URI)
# ${CONTAINER_ENGINE} push $(DOCKER_IMAGE_URI_LATEST)
.PHONY: image-build
image-build: build
${CONTAINER_ENGINE} build . -f $(DOCKERFILE) -t $(IMAGE_URI)
${CONTAINER_ENGINE} tag $(IMAGE_URI) $(IMAGE_URI_LATEST)

.PHONY: image-push
image-push:
${CONTAINER_ENGINE} push $(IMAGE_URI)
${CONTAINER_ENGINE} push $(IMAGE_URI_LATEST)

.PHONY: deploy
deploy:
Expand Down
62 changes: 48 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# OSD Cluster Readiness Job

- [OSD Cluster Readiness Job](#osd-cluster-readiness-job)
- [Deploying the Image](#deploying-the-image)
- [Deploying the Job](#deploying-the-job)
- [Deploying](#deploying)
- [Build the Image](#build-the-image)
- [Deploy the Job](#deploy-the-job)
- [Example](#example)
- [Tunables](#tunables)
- [`MAX_CLUSTER_AGE_MINUTES`](#max_cluster_age_minutes)
- [`CLEAN_CHECK_RUNS`](#clean_check_runs)
Expand All @@ -24,26 +26,57 @@ By default, we will clear any active silence and exit successfully if the cluste
If the silence expires while health checks are failing, we reinstate it.
(This means it is theoretically possible for alerts to fire for up to one minute if the silence expires right after a health check fails. [FIXME](#to-do).)

## Deploying the Image
## Deploying

### Build the Image

```
make build
make docker-build
make docker-push
make image-build
make image-push
```

This builds the binary for linux, builds the docker image (which requires the binary to be built externally as of right now) and then pushes the updated image to quay.
This builds the binary for linux, builds the docker image, and then pushes the image to a repository.

If you wish to push to a specific registry, repository, or image name, you may override the `IMAGE_REGISTRY`, `IMAGE_USER`, or `IMAGE_NAME` variables, respectively, when invoking the `image-build` and `image-push` targets.
For example, for development purposes, you may wish to `export IMAGE_USER=my_quay_user`.
See the [Makefile](Makefile) for the default values.

### Deploy the Job

```
make deploy
```

If you wish to push to a specific repository, org, or image name, you may override the `IMAGE_REPO`, `IMAGE_ORG`, or `IMAGE_NAME` variables, respectively, when invoking the `docker-build` and `docker-push` targets.
For example, for development purposes, you may wish to `export IMAGE_ORG=my_quay_namespace`.
This will do the following on your currently logged-in cluster. **NOTE:** You must have elevated permissions.
- Delete any existing `osd-cluster-ready` Job.
- Deploy each of the manifests in the [deploy/](deploy) folder in alphanumeric order, except the [Job](deploy/60-osd-ready.Job.yaml) itself.
- Create a temporary Job manifest with the following overrides, and deploy it:
- The `image` is set using any of the `IMAGE_*` overrides described above.
- The [`MAX_CLUSTER_AGE_MINUTES` environment variable](#max_cluster_age_minutes) is set to a high value to prevent the job from exiting early. ([FIXME: make this configurable](#to-do).)
- Wait for the Job's Pod to start and follow its logs.

## Deploying the Job
In addition to the `IMAGE_*` overrides, `make deploy` will also observe the following environment variables:
- `JOB_ONLY`: If set (to any `true`-ish value), only deploy the overridden Job manifest.
Use this to streamline the deployment process if the other manifests (RBAC, etc.) are already deployed and unchanged.
- `DRY_RUN`: Don't actually do anything to the cluster; just print the overridden Job manifest and the commands that _would_ have been run.

Deploy each of the manifests in the [deploy/](deploy) folder in alphanumeric order.
### Example

If you are overriding any of the `IMAGE_*` variables for development purposes, be sure to (temporarily) edit the [Job](deploy/60-osd-ready.Job.yaml), setting the `image` appropriately.
Build, push to, and deploy from my personal namespace, `i_am_a_docker`, in the docker.io registry, skipping RBAC manifests, and first doing a dry run:

You can iterate by deleting the Job (which will delete its Pod) and recreating it.
```
# Set these in the environment to save passing them to each `make` command.
export IMAGE_REGISTRY=docker.io
export IMAGE_USER=i_am_a_docker
make image-build image-push
# Do a deploy dry run first
make JOB_ONLY=1 DRY_RUN=1 deploy
# Now deploy for real
make JOB_ONLY=1 deploy
```

## Tunables
The following environment variables can be set in the container, e.g. by editing the [Job](deploy/60-osd-ready.Job.yaml) to include them in `spec.template.spec.containers[0].env`.
Expand Down Expand Up @@ -90,4 +123,5 @@ Don't forget to [build](#deploying-the-image) and [test](#deploying-the-job) wit
- [x] Look for existing active silences before creating a new one
- [x] Implement _actual_ healthchecks (steal them from osde2e) to determine cluster stability
- [ ] Find if there's a better and more secure way to talk to the alertmanager API using oauth and serviceaccount tokens.
- [ ] Make the default silence expiry shorter; and extend it when health checks fail ([OSD-6384](https://issues.redhat.com/browse/OSD-6384)).
- [ ] Make the default silence expiry shorter; and extend it when health checks fail ([OSD-6384](https://issues.redhat.com/browse/OSD-6384)).
- [ ] Make [tunables](#tunables) configurable via `make deploy`.
83 changes: 57 additions & 26 deletions hack/deploy.sh
Original file line number Diff line number Diff line change
@@ -1,34 +1,65 @@
#!/bin/bash
#!/bin/bash -e

if [ -z "$IMAGE_REPOSITORY" ]; then
echo "Not set"
else
echo "$IMAGE_REPOSITORY"
usage() {
cat <<EOF
Usage: $0
Environment:
IMAGE_URI (required): E.g. quay.io/my_repo/osd-cluster-ready:0.1.38-614bf59
JOB_ONLY (optional): If set, only deploy the Job manifest (skip RBAC etc.).
DRY_RUN (optional): If set, don't actually deploy anything, just print what would have happened.
EOF
exit -1
}

maybe() {
echo "+ $@"
if [[ -z "$DRY_RUN" ]]; then
$@
fi
}

if [[ -z "$IMAGE_URI" ]]; then
echo "IMAGE_URI not set"
usage
fi
# Gather commit number for Z and short SHA
COMMIT_NUMBER=$(git rev-list `git rev-list --parents HEAD | egrep "^[a-f0-9]{40}$"`..HEAD --count)
CURRENT_COMMIT=$(git rev-parse --short=7 HEAD)

# Build container version
VERSION_MAJOR=0
VERSION_MINOR=1
CONTAINER_VERSION="v$VERSION_MAJOR.$VERSION_MINOR.$COMMIT_NUMBER-$CURRENT_COMMIT"

TMP_MANIFEST=$(mktemp)
echo "Created $TMP_MANIFEST"
cat deploy/60-osd-ready.Job.yaml | sed "s/openshift-sre/${IMAGE_REPOSITORY}/" > $TMP_MANIFEST
sed -i "s/\/osd-cluster-ready/\/osd-cluster-ready:${CONTAINER_VERSION}/" $TMP_MANIFEST
sed -i "s/value: \"240\"/value: \"339860\"/" $TMP_MANIFEST

TMP_MANIFEST=$(mktemp -t osd-cluster-ready-Job.XXXXX.yaml)
trap "rm -fr $TMP_MANIFEST" EXIT
sed "s,\(^ *image: \).*,\1${IMAGE_URI}," deploy/60-osd-ready.Job.yaml > $TMP_MANIFEST
sed -i 's/value: "240"/value: "339860"/' $TMP_MANIFEST
echo "===== $TMP_MANIFEST ====="
cat $TMP_MANIFEST
echo "========================="

# In case the job is already deleted, don't let -e fail this, and don't wait
# for the pod to go away.
WAIT_FOR_POD=yes
maybe oc delete job -n openshift-monitoring osd-cluster-ready || WAIT_FOR_POD=no

if [[ -z "$JOB_ONLY" ]]; then
echo "Deploying all the things. Set JOB_ONLY=1 to deploy only the Job."
for manifest in $(ls deploy/ | grep -v "60-osd-ready.Job.yaml")
do
maybe oc apply -f deploy/${manifest}
done
else
echo "Deploying only the Job. To redeploy RBAC etc., unset JOB_ONLY."
fi

oc delete job -n openshift-monitoring osd-cluster-ready
# Before deploying the new job, make sure the pod from the old one is gone
if [[ $WAIT_FOR_POD == "yes" ]]; then
maybe oc wait --for=delete pod -l job-name=osd-cluster-ready --timeout=30s
fi

for manifest in $(ls deploy/ | grep -v "60-osd-ready.Job.yaml")
do
oc apply -f deploy/${manifest}
done
maybe oc create -f $TMP_MANIFEST

oc apply -f $TMP_MANIFEST
if [[ -z "$DRY_RUN" ]]; then
POD=$(oc get po -l job-name=osd-cluster-ready -o name)
else
POD=osd-cluster-ready-XXXXX
fi

# oc logs -f jobs/osd-cluster-ready -n openshift-monitoring
maybe oc wait --for=condition=Ready $POD --timeout=15s
if [[ $? -eq 0 ]]; then
maybe oc logs -f jobs/osd-cluster-ready -n openshift-monitoring
fi

0 comments on commit e083867

Please sign in to comment.