From 96624db4d3cc9c67446d9174543d2985618e98c9 Mon Sep 17 00:00:00 2001 From: Jeremy Lewi Date: Sun, 29 Oct 2017 18:57:40 -0700 Subject: [PATCH] Update release.py so we can run it continuously. * release.py will regularly check for postsubmit results and cut new releases. * Create a Docker image to run and it a Kubernetes spec file to deploy it on a K8s cluster. --- py/release.py | 116 ++++++++++++++++++++++++++++++------- py/util.py | 4 +- release/Dockerfile.release | 81 ++++++++++++++++++++++++++ release/Makefile | 30 ++++++++++ release/README.md | 7 +++ release/releaser.yaml | 25 ++++++++ 6 files changed, 240 insertions(+), 23 deletions(-) create mode 100644 release/Dockerfile.release create mode 100644 release/Makefile create mode 100644 release/README.md create mode 100644 release/releaser.yaml diff --git a/py/release.py b/py/release.py index a3cda4719b..bbffaaf0fa 100644 --- a/py/release.py +++ b/py/release.py @@ -10,17 +10,18 @@ import logging import os import tempfile +import time import yaml from google.cloud import storage # pylint: disable=no-name-in-module from py import util -REPO_ORG = "jlewi" -REPO_NAME = "mlkube.io" +REPO_ORG = "tensorflow" +REPO_NAME = "k8s" RESULTS_BUCKET = "mlkube-testing-results" -JOB_NAME = "mlkube-build-postsubmit" +JOB_NAME = "tf-k8s-postsubmit" def get_latest_green_presubmit(gcs_client): @@ -62,34 +63,77 @@ def update_chart(chart_file, version): yaml.dump(info, hf) -def main(): # pylint: disable=too-many-locals - logging.getLogger().setLevel(logging.INFO) # pylint: disable=too-many-locals - parser = argparse.ArgumentParser( - description="Release artifacts for TfJob.") +def get_last_release(bucket): + """Return the sha of the last release. - parser.add_argument( - "--releases_bucket", - default="tf-on-k8s-dogfood-releases", - type=str, - help="The bucket to publish releases to.") + Args: + bucket: A google cloud storage bucket object - # TODO(jlewi): Should pass along unknown arguments to build and push. - args, _ = parser.parse_known_args() + Returns: + sha: The sha of the latest release. + """ + + path = "latest_release.json" + blob = bucket.blob(path) + + if not blob.exists(): + logging.info("File %s doesn't exist.", util.to_gcs_uri(bucket.name, path)) + return "" + + + data = blob.download_to_string() + + contents = blob.download_as_string() + + data = json.dumps(contents) + return data.get("sha", "") + +def create_latest(bucket, sha, target): + """Create a file in GCS with information about the latest release. + + Args: + bucket: A google cloud storage bucket object + sha: SHA of the release we just created + target: The GCS path of the release we just produced. + """ + path = os.path.join("latest_release.json") + + logging.info("Creating GCS output: %s", util.to_gcs_uri(bucket.name, path)) + + data = { + "sha": sha, + "target": target, + } + blob = bucket.blob(path) + blob.upload_from_string(json.dumps(data)) + +def build_once(bucket_name): # pylint: disable=too-many-locals gcs_client = storage.Client() sha = get_latest_green_presubmit(gcs_client) + bucket = gcs_client.get_bucket(bucket_name) + + logging.info("Latest passing postsubmit is %s", sha) + + last_release_sha = get_last_release(bucket) + logging.info("Most recent release was for %s", last_release_sha) + + if sha == last_release_sha: + logging.info("Already cut release for %s", sha) + return + src_dir = tempfile.mkdtemp(prefix="tmpTfJobSrc") logging.info("src_dir: %s", src_dir) sha = util.clone_repo(src_dir, util.MASTER_REPO_OWNER, util.MASTER_REPO_NAME, - sha) + sha) # TODO(jlewi): We should check if we've already done a push. We could # check if the .tar.gz for the helm package exists. build_info_file = os.path.join(src_dir, "build_info.yaml") util.run([os.path.join(src_dir, "images", "tf_operator", "build_and_push.py"), - "--output=" + build_info_file], cwd=src_dir) + "--output=" + build_info_file], cwd=src_dir) with open(build_info_file) as hf: build_info = yaml.load(hf) @@ -113,21 +157,51 @@ def main(): # pylint: disable=too-many-locals release_path = version - bucket = gcs_client.get_bucket(args.releases_bucket) - targets = [ os.path.join(release_path, os.path.basename(chart_archive)), - "latest/tf-job-operator-chart-latest.tgz", - ] + "latest/tf-job-operator-chart-latest.tgz", + ] for t in targets: blob = bucket.blob(t) - gcs_path = util.to_gcs_uri(args.releases_bucket, t) + gcs_path = util.to_gcs_uri(bucket_name, t) if blob.exists() and not t.startswith("latest"): logging.warn("%s already exists", gcs_path) continue logging.info("Uploading %s to %s.", chart_archive, gcs_path) blob.upload_from_filename(chart_archive) + create_latest(bucket, sha, os.path.join(bucket_name, targets[0])) + +def main(): # pylint: disable=too-many-locals + logging.getLogger().setLevel(logging.INFO) # pylint: disable=too-many-locals + parser = argparse.ArgumentParser( + description="Release artifacts for TfJob.") + + parser.add_argument( + "--releases_bucket", + default="tf-on-k8s-dogfood-releases", + type=str, + help="The bucket to publish releases to.") + + parser.add_argument( + "--check_interval_secs", + default=0, + type=int, + help=("How often to periodically check to see if there is a new passing " + "postsubmit. If set to 0 (default) script will run once and exit.")) + + # TODO(jlewi): Should pass along unknown arguments to build and push. + args, _ = parser.parse_known_args() + + while True: + logging.info("Checking latest postsubmit results") + build_once(args.releases_bucket) + + if args.check_interval_secs > 0: + logging.info("Sleep %s seconds before checking for a postsubmit.", + args.check_interval_secs) + time.sleep(args.check_interval_secs) + if __name__ == "__main__": main() diff --git a/py/util.py b/py/util.py index 70b96e3041..e4e870b1ad 100644 --- a/py/util.py +++ b/py/util.py @@ -8,8 +8,8 @@ # Default name for the repo organization and name. # This should match the values used in Go imports. -MASTER_REPO_OWNER = "jlewi" -MASTER_REPO_NAME = "mlkube.io" +MASTER_REPO_OWNER = "tensorflow" +MASTER_REPO_NAME = "k8s" def run(command, cwd=None): diff --git a/release/Dockerfile.release b/release/Dockerfile.release new file mode 100644 index 0000000000..5c5557e33f --- /dev/null +++ b/release/Dockerfile.release @@ -0,0 +1,81 @@ +# Copyright 2017 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This Dockerfile is used to create a docker image suitable for building +# and releasing the TfJob operator. +FROM golang:1.8.2 +LABEL authors="Jeremy Lewi " + +WORKDIR /workspace +RUN mkdir -p /workspace +ENV WORKSPACE=/workspace \ + TERM=xterm + +# common util tools +# https://github.com/GoogleCloudPlatform/gsutil/issues/446 for python-openssl +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + curl \ + file \ + rsync \ + ca-certificates \ + build-essential \ + openssh-client \ + git \ + pkg-config \ + zip \ + unzip \ + xz-utils \ + zlib1g-dev \ + python \ + python-setuptools \ + python-openssl \ + && apt-get clean + +RUN easy_install pip +RUN pip install --upgrade six pyyaml google-api-python-client \ + google-cloud-storage pylint + +# Install gcloud + +ENV PATH=/google-cloud-sdk/bin:/workspace:${PATH} \ + CLOUDSDK_CORE_DISABLE_PROMPTS=1 + +RUN wget -q https://dl.google.com/dl/cloudsdk/channels/rapid/google-cloud-sdk.tar.gz && \ + tar xzf google-cloud-sdk.tar.gz -C / && \ + rm google-cloud-sdk.tar.gz && \ + /google-cloud-sdk/install.sh \ + --disable-installation-options \ + --bash-completion=false \ + --path-update=false \ + --usage-reporting=false && \ + gcloud components install alpha beta kubectl && \ + gcloud info | tee /workspace/gcloud-info.txt + +# Install glide +RUN cd /tmp && \ + wget -O glide-v0.13.0-linux-amd64.tar.gz \ + https://github.com/Masterminds/glide/releases/download/v0.13.0/glide-v0.13.0-linux-amd64.tar.gz && \ + tar -xvf glide-v0.13.0-linux-amd64.tar.gz && \ + mv ./linux-amd64/glide /usr/local/bin/ + +# Install docker +# Docker is used when running locally to build the images. +# Note: 1.11+ changes the tarball format +RUN curl -L "https://get.docker.com/builds/Linux/x86_64/docker-1.9.1.tgz" \ + | tar -C /usr/bin -xvzf- --strip-components=3 usr/local/bin/docker + +RUN mkdir -p /opt +RUN git clone https://github.com/tensorflow/k8s.git \ + /opt/git_tensorflow_k8s \ No newline at end of file diff --git a/release/Makefile b/release/Makefile new file mode 100644 index 0000000000..849077c8d2 --- /dev/null +++ b/release/Makefile @@ -0,0 +1,30 @@ +# Copyright 2017 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +IMG = gcr.io/tf-on-k8s-releasing/releaser +TAG = $(shell date +v%Y%m%d)-$(shell git describe --tags --always --dirty) + +all: build + +build: + @echo {\"image\": \"$(IMG):$(TAG)\"} > version.json + docker build -t $(IMG):$(TAG) -f Dockerfile.release . + docker tag $(IMG):$(TAG) $(IMG):latest + @echo Built $(IMG):$(TAG) and tagged with latest + rm -f version.json + +push: build + gcloud docker -- push $(IMG):$(TAG) + gcloud docker -- push $(IMG):latest + @echo Pushed $(IMG) with :latest and :$(TAG) tags diff --git a/release/README.md b/release/README.md new file mode 100644 index 0000000000..a2baa9c7bf --- /dev/null +++ b/release/README.md @@ -0,0 +1,7 @@ +## Releasing + +* The script [release.py](../py/release.py) will build and push a release + based on the latest green postsubmit. + +* The script can be run continuously and will periodically check for new + postsubmit results. diff --git a/release/releaser.yaml b/release/releaser.yaml new file mode 100644 index 0000000000..34bd27a697 --- /dev/null +++ b/release/releaser.yaml @@ -0,0 +1,25 @@ +apiVersion: extensions/v1beta1 # for versions before 1.6.0 use extensions/v1beta1 +kind: ReplicaSet +metadata: + name: releaser + labels: + app: releaser +spec: + replicas: 1 + selector: + matchLabels: + app: releaser + template: + metadata: + labels: + app: releaser + spec: + containers: + - name: releaser + image: gcr.io/tf-on-k8s-releasing/releaser:latest + workingDir: /opt/git_tensorflow_k8s + command: + - python + - -m + - py.release + - --check_interval_secs=3600