diff --git a/.eslintrc.js b/.eslintrc.js index 326d68cbd..a47c269ee 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -4,10 +4,7 @@ module.exports = { commonjs: true, es6: true }, - extends: [ - 'standard', - 'prettier' - ], + extends: ['standard', 'prettier'], globals: { Atomics: 'readonly', SharedArrayBuffer: 'readonly' @@ -15,15 +12,13 @@ module.exports = { parserOptions: { ecmaVersion: 2018 }, - ignorePatterns: ['assets/', 'dist/', 'node_modules/'], + ignorePatterns: ['assets/', 'dist/', 'node_modules/'], rules: { - 'camelcase': [1, {'properties': 'never'}], + camelcase: [1, { properties: 'never' }], 'prettier/prettier': 'error' }, - plugins: [ - 'prettier' - ], + plugins: ['prettier'], env: { jest: true } -} +}; diff --git a/.github/workflows/gitlab.yml b/.github/workflows/gitlab.yml index 24fe1732a..6e45ca8e6 100644 --- a/.github/workflows/gitlab.yml +++ b/.github/workflows/gitlab.yml @@ -1,8 +1,5 @@ name: GitLab -on: - workflow_dispatch: - pull_request: - push: +on: pull_request jobs: test: runs-on: ubuntu-latest diff --git a/.github/workflows/test-deploy.yml b/.github/workflows/test-deploy.yml index 75173f743..bbe2817f1 100644 --- a/.github/workflows/test-deploy.yml +++ b/.github/workflows/test-deploy.yml @@ -1,7 +1,7 @@ name: Test & Deploy on: schedule: - - cron: 0 0 * * * + - cron: 0 1 * * 6 # Sat 01:00 release: types: [published] pull_request_target: @@ -46,7 +46,7 @@ jobs: TEST_GITLAB_SHA: c4c13286e78dc252dd2611f31a755f10d343fbd4 TEST_BBCLOUD_TOKEN: ${{ secrets.TEST_BBCLOUD_TOKEN }} TEST_BBCLOUD_REPO: https://bitbucket.org/iterative-ai/cml-qa-tests-dummy - TEST_BBCLOUD_SHA: 9bb9131ce0af294fe1c6eedca1f2bce3983e80bd + TEST_BBCLOUD_SHA: b511535a89f76d3d311b1c15e3e712b15c0b94e3 packages: needs: [lint, test] runs-on: ubuntu-latest diff --git a/.restyled.yaml b/.restyled.yaml index 769c3aaa2..ce2652b7e 100644 --- a/.restyled.yaml +++ b/.restyled.yaml @@ -1,8 +1,4 @@ restylers: - name: prettier arguments: ['--write'] - include: - - 'src/**/*.js' - - 'bin/**/*.js' - - 'assets/*.json' - - './**/*.{yaml,yml,md,json}' + include: ['**/*.{js,json,md,yaml,yml}'] diff --git a/Dockerfile b/Dockerfile index d97d5c74c..2e52f3b62 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,7 @@ ARG BASE_IMAGE=ubuntu:20.04 FROM ${BASE_IMAGE} -# TODO: consider using iterative.ai or something else -LABEL maintainer="dvcorg " +LABEL maintainer="CML " # CONFIGURE NON-INTERACTIVE APT ENV DEBIAN_FRONTEND=noninteractive diff --git a/README.md b/README.md index e098a499a..5aed6ed7e 100644 --- a/README.md +++ b/README.md @@ -5,28 +5,32 @@ [![GHA](https://img.shields.io/github/v/tag/iterative/setup-cml?label=GitHub%20Actions&logo=GitHub)](https://github.com/iterative/setup-cml) [![npm](https://img.shields.io/npm/v/@dvcorg/cml?logo=npm)](https://www.npmjs.com/package/@dvcorg/cml) -**What is CML?** Continuous Machine Learning (CML) is an open-source library for -implementing continuous integration & delivery (CI/CD) in machine learning -projects. Use it to automate parts of your development workflow, including model -training and evaluation, comparing ML experiments across your project history, -and monitoring changing datasets. +**What is CML?** Continuous Machine Learning (CML) is an open-source CLI tool +for implementing continuous integration & delivery (CI/CD) with a focus on +MLOps. Use it to automate development workflows — including machine +provisioning, model training and evaluation, comparing ML experiments across +project history, and monitoring changing datasets. -![](https://static.iterative.ai/img/cml/github_cloud_case_lessshadow.png) _On -every pull request, CML helps you automatically train and evaluate models, then -generates a visual report with results and metrics. Above, an example report for -a [neural style transfer model](https://github.com/iterative/cml_cloud_case)._ +CML can help train and evaluate models — and then generate a visual report with +results and metrics — automatically on every pull request. -We built CML with these principles in mind: +![](https://static.iterative.ai/img/cml/github_cloud_case_lessshadow.png) _An +example report for a +[neural style transfer model](https://github.com/iterative/cml_cloud_case)._ + +CML principles: - **[GitFlow](https://nvie.com/posts/a-successful-git-branching-model/) for data science.** Use GitLab or GitHub to manage ML experiments, track who trained ML models or modified data and when. Codify data and models with [DVC](#using-cml-with-dvc) instead of pushing to a Git repo. - **Auto reports for ML experiments.** Auto-generate reports with metrics and - plots in each Git Pull Request. Rigorous engineering practices help your team + plots in each Git pull request. Rigorous engineering practices help your team make informed, data-driven decisions. -- **No additional services.** Build your own ML platform using just GitHub or - GitLab and your favourite cloud services: AWS, Azure, GCP. No databases, +- **No additional services.** Build your own ML platform using GitLab, + Bitbucket, or GitHub. Optionally, use + [cloud storage](#configuring-cloud-storage-providers) as well as either + self-hosted or cloud runners (such as AWS EC2, Azure, or GCP). No databases, services or complex setup needed. :question: Need help? Just want to chat about continuous integration for ML? @@ -36,29 +40,40 @@ We built CML with these principles in mind: [YouTube video series](https://www.youtube.com/playlist?list=PL7WG7YrwYcnDBDuCkFbcyjnZQrdskFsBz) for hands-on MLOps tutorials using CML! -## Table of contents +## Table of Contents -1. [Usage](#usage) -2. [Getting started (tutorial)](#getting-started) -3. [Using CML with DVC](#using-cml-with-dvc) -4. [Using self-hosted runners](#using-self-hosted-runners) -5. [Install CML as a package](#install-cml-as-a-package) -6. [Example Projects](#see-also) +1. [Setup (GitLab, GitHub, Bitbucket)](#setup) +2. [Usage](#usage) +3. [Getting started (tutorial)](#getting-started) +4. [Using CML with DVC](#using-cml-with-dvc) +5. [Advanced Setup (Self-hosted, local package)](#advanced-setup) +6. [Example projects](#see-also) -## Usage +## Setup -You'll need a GitHub or GitLab account to begin. Users may wish to familiarize -themselves with [Github Actions](https://help.github.com/en/actions) or +You'll need a GitLab, GitHub, or Bitbucket account to begin. Users may wish to +familiarize themselves with [Github Actions](https://help.github.com/en/actions) +or [GitLab CI/CD](https://about.gitlab.com/stages-devops-lifecycle/continuous-integration). Here, will discuss the GitHub use case. -- **GitLab users**: Please see our - [docs about configuring CML with GitLab](https://github.com/iterative/cml/wiki/CML-with-GitLab). -- **Bitbucket Cloud users**: Please see our - [docs on CML with Bitbucket Cloud](https://github.com/iterative/cml/wiki/CML-with-Bitbucket-Cloud). - _Bitbucket Server support estimated to arrive by May 2021._ -- **GitHub Actions users**: The key file in any CML project is - `.github/workflows/cml.yaml`: +### GitLab + +Please see our docs on +[CML with GitLab CI/CD](https://github.com/iterative/cml/wiki/CML-with-GitLab) +and in particular the +[personal access token](https://github.com/iterative/cml/wiki/CML-with-GitLab#variables) +requirement. + +### Bitbucket + +Please see our docs on +[CML with Bitbucket Cloud](https://github.com/iterative/cml/wiki/CML-with-Bitbucket-Cloud). +_Bitbucket Server support estimated to arrive by mid 2021._ + +### GitHub + +The key file in any CML project is `.github/workflows/cml.yaml`: ```yaml name: your-workflow-name @@ -68,6 +83,7 @@ jobs: runs-on: [ubuntu-latest] # optionally use a convenient Ubuntu LTS + CUDA + DVC + CML image # container: docker://dvcorg/cml:0-dvc2-base1-gpu + # container: docker://ghcr.io/iterative/cml:0-dvc2-base1-gpu steps: - uses: actions/checkout@v2 # may need to setup NodeJS & Python3 on e.g. self-hosted @@ -92,38 +108,42 @@ jobs: cml-send-comment report.md ``` +## Usage + We helpfully provide CML and other useful libraries pre-installed on our [custom Docker images](https://github.com/iterative/cml/blob/master/Dockerfile). In the above example, uncommenting the field -`container: docker://dvcorg/cml:0-dvc2-base1-gpu` will make the GitHub Actions +`container: docker://dvcorg/cml:0-dvc2-base1-gpu` (or +`container: docker://ghcr.io/iterative/cml:0-dvc2-base1-gpu`) will make the runner pull the CML Docker image. The image already has NodeJS, Python 3, DVC and CML set up on an Ubuntu LTS base with CUDA libraries and [Terraform](https://www.terraform.io) installed for convenience. ### CML Functions -CML provides a number of helper functions to help package the outputs of ML -workflows (including numeric data and visualizations about model performance) -into a CML report. +CML provides a number of functions to help package the outputs of ML workflows +(including numeric data and visualizations about model performance) into a CML +report. Below is a table of CML functions for writing markdown reports and delivering -those reports to your CI system (GitHub Actions or GitLab CI). +those reports to your CI system. -| Function | Description | Inputs | -| ----------------------- | -------------------------------------------------------------- | ----------------------------------------------------------- | -| `cml-runner` | Starts a runner locally or in cloud providers | See [Arguments](https://github.com/iterative/cml#arguments) | -| `cml-publish` | Publish an image for writing to CML report. | ` --title --md` | -| `cml-send-comment` | Return CML report as a comment in your GitHub/GitLab workflow. | ` --head-sha ` | -| `cml-send-github-check` | Return CML report as a check in GitHub | ` --head-sha ` | -| `cml-pr` | Create a pull request. | TODO | -| `cml-tensorboard-dev` | Return a link to a Tensorboard.dev page | `--logdir --title --md` | +| Function | Description | Example Inputs | +| ----------------------- | ---------------------------------------------------------------- | ----------------------------------------------------------- | +| `cml-runner` | Launch a runner locally or hosted by a cloud provider | See [Arguments](https://github.com/iterative/cml#arguments) | +| `cml-publish` | Publicly host an image for displaying in a CML report | ` --title --md` | +| `cml-send-comment` | Return CML report as a comment in your GitLab/GitHub workflow | ` --head-sha ` | +| `cml-send-github-check` | Return CML report as a check in GitHub | ` --head-sha ` | +| `cml-pr` | Commit the given files to a new branch and create a pull request | `...` | +| `cml-tensorboard-dev` | Return a link to a Tensorboard.dev page | `--logdir --title --md` | -### Customizing your CML report +#### CML Reports -CML reports are written in -[GitHub Flavored Markdown](https://github.github.com/gfm/). That means they can -contain images, tables, formatted text, HTML blocks, code snippets and more — -really, what you put in a CML report is up to you. Some examples: +The `cml-send-comment` command can be used to post reports. CML reports are +written in [GitHub Flavored Markdown](https://github.github.com/gfm/). That +means they can contain images, tables, formatted text, HTML blocks, code +snippets and more — really, what you put in a CML report is up to you. Some +examples: :spiral_notepad: **Text** Write to your report using whatever method you prefer. For example, copy the contents of a text file containing the results of ML model @@ -142,7 +162,7 @@ report. For example, if `graph.png` is output by `python train.py`, run: cml-publish graph.png --md >> report.md ``` -## Getting Started +### Getting Started 1. Fork our [example project repository](https://github.com/iterative/example_cml). @@ -196,13 +216,13 @@ git add . && git commit -m "modify forest depth" git push origin experiment ``` -5. In GitHub, open up a Pull Request to compare the `experiment` branch to +5. In GitHub, open up a pull request to compare the `experiment` branch to `master`. ![](https://static.iterative.ai/img/cml/make_pr.png) -Shortly, you should see a comment from `github-actions` appear in the Pull -Request with your CML report. This is a result of the `cml-send-comment` +Shortly, you should see a comment from `github-actions` appear in the pull +request with your CML report. This is a result of the `cml-send-comment` function in your workflow. ![](https://static.iterative.ai/img/cml/first_report.png) @@ -218,7 +238,7 @@ performance metrics and visualizations — in GitHub checks and comments. What kind of workflow you want to run, and want to put in your CML report, is up to you. -## Using CML with DVC +### Using CML with DVC In many ML projects, data isn't stored in a Git repository, but needs to be downloaded from external sources. [DVC](https://dvc.org) is a common way to @@ -235,7 +255,7 @@ on: [push] jobs: run: runs-on: [ubuntu-latest] - container: docker://dvcorg/cml:0-dvc2-base1 + container: docker://ghcr.io/iterative/cml:0-dvc2-base1 steps: - uses: actions/checkout@v2 - name: Train model @@ -273,7 +293,11 @@ jobs: > :warning: If you're using DVC with cloud storage, take note of environment > variables for your storage format. -### Environment variables for supported cloud providers +#### Configuring Cloud Storage Providers + +There are many +[supported could storage providers](https://dvc.org/doc/command-reference/remote/modify#available-parameters-per-storage-type). +Here are a few examples for some of the most frequently used providers:
@@ -356,7 +380,9 @@ env:
-## Using self-hosted runners +## Advanced Setup + +### Self-hosted Runners GitHub Actions are run on GitHub-hosted runners by default. However, there are many great reasons to use your own runners: to take advantage of GPUs; to @@ -367,7 +393,7 @@ data. > [official GitHub documentation](https://help.github.com/en/actions/hosting-your-own-runners/about-self-hosted-runners) > to get started setting up your own self-hosted runner. -### Allocating cloud resources with CML +#### Allocating Cloud Compute Resources with CML When a workflow requires computational resources (such as GPUs), CML can automatically allocate cloud instances using `cml-runner`. You can spin up @@ -400,8 +426,8 @@ jobs: cml-runner \ --cloud aws \ --cloud-region us-west \ - --cloud-type=t2.micro \ - --labels=cml-runner + --cloud-type t2.micro \ + --labels cml-runner model-training: needs: [deploy-runner] runs-on: [self-hosted, cml-runner] @@ -424,10 +450,12 @@ instance in the `us-west` region. The `model-training` step then runs on the newly-launched instance. > :tada: **Note that you can use any container with this workflow!** While you -> must [have CML and its dependencies set up](#install-cml-as-a-package) to use -> functions such `cml-send-comment` from your instance, you can create your -> favourite training environment in the cloud by pulling the Docker container of -> your choice. +> must [have CML and its dependencies set up](#local-package) to use functions +> such `cml-send-comment` from your instance, you can create your favourite +> training environment in the cloud by pulling the Docker container of your +> choice. + +#### Docker Images We like the CML container (`docker://dvcorg/cml`) because it comes loaded with Python, CUDA, `git`, `node` and other essentials for full-stack data science. @@ -442,7 +470,7 @@ image tags. The tag convention is `{CML_VER}-dvc{DVC_VER}-base{BASE_VER}{-gpu}`: For example, `docker://dvcorg/cml:0-dvc2-base1-gpu`, or `docker://ghcr.io/iterative/cml:0-dvc2-base1`. -### Arguments +#### Arguments The `cml-runner` function accepts the following arguments: @@ -502,10 +530,10 @@ Options: -h Show help [boolean] ``` -### Environment variables +#### Environment Variables > :warning: You will need to -> [create a personal access token](https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token-for-the-command-line) +> [create a personal access token (PAT)](https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token-for-the-command-line) > with repository read/write access and workflow privileges. In the example > workflow, this token is stored as `PERSONAL_ACCESS_TOKEN`. @@ -514,26 +542,25 @@ compute resources as secrets. In the above example, `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` are required to deploy EC2 instances. Please see our docs about -[environment variables needed to authenticate with supported cloud services](#environment-variables-for-supported-cloud-providers). +[configuring cloud storage providers](#configuring-cloud-storage-providers). -### On-premise (local) runners +#### On-premise (Local) Runners This means using on-premise machines as self-hosted runners. The `cml-runner` function is used to set up a local self-hosted runner. On your local machine or -on-premise GPU cluster, [install CML as a package](#install-cml-as-a-package) -and then run: +on-premise GPU cluster, [install CML as a package](#local-package) and then run: ```bash cml-runner \ --repo $your_project_repository_url \ - --token=$PERSONAL_ACCESS_TOKEN \ + --token $PERSONAL_ACCESS_TOKEN \ --labels tf \ --idle-timeout 180 ``` Now your machine will be listening for workflows from your project repository. -## Install CML as a package +### Local Package In the examples above, CML is installed by the `setup-cml` action, or comes pre-installed in a custom Docker image pulled by a CI runner. You can also @@ -555,11 +582,11 @@ npm install -g vega-cli vega-lite CML and Vega-Lite package installation require the NodeJS package manager (`npm`) which ships with NodeJS. Installation instructions are below. -### Install NodeJS in GitHub +#### Install NodeJS -This is probably not necessary when using GitHub's default containers or one of -CML's Docker containers. Self-hosted runners may need to use a set up action to -install NodeJS: +- **GitHub**: This is probably not necessary when using GitHub's default + containers or one of CML's Docker containers. Self-hosted runners may need to + use a set up action to install NodeJS: ```bash uses: actions/setup-node@v2 @@ -567,9 +594,7 @@ uses: actions/setup-node@v2 node-version: '12' ``` -### Install NodeJS in GitLab - -GitLab requires direct installation of NodeJS: +- **GitLab**: Requires direct installation. ```bash curl -sL https://deb.nodesource.com/setup_12.x | bash @@ -585,4 +610,7 @@ These are some example projects using CML. - [CML with DVC to pull data](https://github.com/iterative/cml_dvc_case) - [CML with Tensorboard](https://github.com/iterative/cml_tensorboard_case) - [CML with a small EC2 instance](https://github.com/iterative/cml-runner-base-case) -- [CML with EC2 GPU](https://github.com/iterative/cml_cloud_case) + :key: +- [CML with EC2 GPU](https://github.com/iterative/cml_cloud_case) :key: + +:key: needs a [PAT](#environment-variables). diff --git a/bin/cml-runner.js b/bin/cml-runner.js index 32a81a25c..ac280f4ad 100755 --- a/bin/cml-runner.js +++ b/bin/cml-runner.js @@ -50,6 +50,8 @@ const shutdown = async (opts) => { if (error) console.error(error); const unregisterRunner = async () => { + if (!RUNNER) return; + try { console.log(`Unregistering runner ${name}...`); await cml.unregisterRunner({ name }); @@ -60,7 +62,23 @@ const shutdown = async (opts) => { } }; - const shutdownDockerMachine = async () => { + const retryWorkflows = async () => { + try { + if (!noRetry && RUNNER_JOBS_RUNNING.length) { + await Promise.all( + RUNNER_JOBS_RUNNING.map( + async (job) => await cml.pipelineRestart({ jobId: job.id }) + ) + ); + } + } catch (err) { + console.log(err); + } + }; + + const destroyDockerMachine = async () => { + if (!DOCKER_MACHINE) return; + console.log('docker-machine destroy...'); console.log( 'Docker machine is deprecated and will be removed!! Check how to deploy using our tf provider.' @@ -74,6 +92,8 @@ const shutdown = async (opts) => { }; const destroyTerraform = async () => { + if (!tfResource) return; + try { console.log(await tf.destroy({ dir: tfPath })); } catch (err) { @@ -85,38 +105,20 @@ const shutdown = async (opts) => { if (cloud) { await destroyTerraform(); } else { - await sleep(RUNNER_DESTROY_DELAY); + await unregisterRunner(); + await retryWorkflows(); - try { - if (!noRetry && RUNNER_JOBS_RUNNING.length) { - await Promise.all( - RUNNER_JOBS_RUNNING.map( - async (job) => await cml.pipelineRestart({ jobId: job.id }) - ) - ); - } - } catch (err) { - console.log(err); - } - - RUNNER && (await unregisterRunner()); - - if (!tfResource) { - console.log(`\tNo TF resource found`); - } else { - await destroyTerraform(); - } + if (DOCKER_MACHINE || tfResource) await sleep(RUNNER_DESTROY_DELAY); + await destroyDockerMachine(); + await destroyTerraform(); - DOCKER_MACHINE && (await shutdownDockerMachine()); + RUNNER && RUNNER.kill('SIGINT'); } - RUNNER && RUNNER.kill('SIGINT'); process.exit(error ? 1 : 0); }; const runCloud = async (opts) => { - const { cloudSshPrivateVisible } = opts; - const runTerraform = async (opts) => { console.log('Terraform apply...'); @@ -185,21 +187,31 @@ const runCloud = async (opts) => { console.log('Deploying cloud runner plan...'); const tfstate = await runTerraform(opts); const { resources } = tfstate; - for (let i = 0; i < resources.length; i++) { - const resource = resources[i]; - + for (const resource of resources) { if (resource.type.startsWith('iterative_')) { - const { instances } = resource; - - for (let j = 0; j < instances.length; j++) { - const instance = instances[j]; - - if (!cloudSshPrivateVisible) { - instance.attributes.ssh_private = '[MASKED]'; - } - - instance.attributes.token = '[MASKED]'; - console.log(JSON.stringify(instance)); + for (const { attributes } of resource.instances) { + const nonSensitiveValues = { + awsSecurityGroup: attributes.aws_security_group, + cloud: attributes.cloud, + driver: attributes.driver, + id: attributes.id, + idleTimeout: attributes.idle_timeout, + image: attributes.image, + instanceGpu: attributes.instance_gpu, + instanceHddSize: attributes.instance_hdd_size, + instanceIp: attributes.instance_ip, + instanceLaunchTime: attributes.instance_launch_time, + instanceType: attributes.instance_type, + labels: attributes.labels, + name: attributes.name, + region: attributes.region, + repo: attributes.repo, + single: attributes.single, + spot: attributes.spot, + spotPrice: attributes.spot_price, + timeouts: attributes.timeouts + }; + console.log(JSON.stringify(nonSensitiveValues)); } } } @@ -293,7 +305,7 @@ const run = async (opts) => { cml = new CML({ driver, repo, token }); - await tf.checkMinVersion(); + if (cloud || tfResource) await tf.checkMinVersion(); // prepare tf if (tfResource) { @@ -325,8 +337,11 @@ const run = async (opts) => { process.exit(0); } - if (reuse && (await cml.runnersByLabels({ labels })).length > 0) { - console.log(`Reusing existing runners with the ${labels} labels...`); + if ( + reuse && + (await cml.runnersByLabels({ labels })).find((runner) => runner.online) + ) { + console.log(`Reusing existing online runners with the ${labels} labels...`); process.exit(0); } @@ -411,11 +426,7 @@ const opts = yargs 'cloud-ssh-private', 'Custom private RSA SSH key. If not provided an automatically generated throwaway key will be used' ) - .boolean('cloud-ssh-private-visible') - .describe( - 'cloud-ssh-private-visible', - 'Show the private SSH key in the output with the rest of the instance properties (not recommended)' - ) + .coerce('cloud-ssh-private', (val) => val.replace(/\n/g, '\\n')) .boolean('cloud-spot') .describe('cloud-spot', 'Request a spot instance') .default('cloud-spot-price', '-1') diff --git a/bin/cml-send-comment.js b/bin/cml-send-comment.js index 6ee5d1034..266e463cc 100755 --- a/bin/cml-send-comment.js +++ b/bin/cml-send-comment.js @@ -1,5 +1,6 @@ #!/usr/bin/env node +const print = console.log; console.log = console.error; const fs = require('fs').promises; @@ -11,7 +12,7 @@ const run = async (opts) => { const path = opts._[0]; const report = await fs.readFile(path, 'utf-8'); const cml = new CML(opts); - await cml.commentCreate({ ...opts, report }); + print(await cml.commentCreate({ ...opts, report })); }; const opts = yargs @@ -23,6 +24,11 @@ const opts = yargs 'Commit SHA linked to this comment. Defaults to HEAD.' ) .alias('commit-sha', 'head-sha') + .boolean('update') + .describe( + 'update', + 'Update the last CML comment (if any) instead of creating a new one' + ) .boolean('rm-watermark') .describe( 'rm-watermark', @@ -36,10 +42,10 @@ const opts = yargs .default('token') .describe( 'token', - 'Personal access token to be used. If not specified in extracted from ENV REPO_TOKEN.' + 'Personal access token to be used. If not specified is extracted from ENV REPO_TOKEN.' ) .default('driver') - .choices('driver', ['github', 'gitlab']) + .choices('driver', ['github', 'gitlab', 'bitbucket']) .describe('driver', 'If not specify it infers it from the ENV.') .help('h') .demand(1).argv; diff --git a/bin/cml-send-comment.test.js b/bin/cml-send-comment.test.js index 1eb390206..ad914ca9b 100644 --- a/bin/cml-send-comment.test.js +++ b/bin/cml-send-comment.test.js @@ -21,15 +21,17 @@ describe('Comment integration tests', () => { Options: --version Show version number [boolean] --commit-sha, --head-sha Commit SHA linked to this comment. Defaults to HEAD. + --update Update the last CML comment (if any) instead of + creating a new one [boolean] --rm-watermark Avoid watermark. CML needs a watermark to be able to distinguish CML reports from other comments in order to provide extra functionality. [boolean] --repo Specifies the repo to be used. If not specified is extracted from the CI ENV. --token Personal access token to be used. If not specified - in extracted from ENV REPO_TOKEN. + is extracted from ENV REPO_TOKEN. --driver If not specify it infers it from the ENV. - [choices: \\"github\\", \\"gitlab\\"] + [choices: \\"github\\", \\"gitlab\\", \\"bitbucket\\"] -h Show help [boolean]" `); }); diff --git a/package-lock.json b/package-lock.json index 1037c5668..91fc324a4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "@dvcorg/cml", - "version": "0.4.5", + "version": "0.4.7", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/package.json b/package.json index e2a0bb8bd..5dcd82138 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@dvcorg/cml", - "version": "0.4.5", + "version": "0.4.7", "author": { "name": "DVC", "url": "http://cml.dev" @@ -39,7 +39,7 @@ "cml-pr": "bin/cml-pr.js" }, "scripts": { - "lintfix": "eslint --fix ./", + "lintfix": "eslint --fix ./ && prettier --write '**/*.{js,json,md,yaml,yml}'", "lint": "eslint ./", "test": "jest --passWithNoTests", "do_snapshots": "jest --updateSnapshot" @@ -51,9 +51,10 @@ }, "lint-staged": { "*.js": [ - "eslint --fix" + "eslint --fix", + "prettier --write" ], - "*.{md,yaml,yml}": [ + "*.{json,md,yaml,yml}": [ "prettier --write" ] }, diff --git a/src/cml.js b/src/cml.js index 230aa2ef9..b3ba264d0 100644 --- a/src/cml.js +++ b/src/cml.js @@ -6,7 +6,7 @@ const git = require('simple-git/promise')('./'); const Gitlab = require('./drivers/gitlab'); const Github = require('./drivers/github'); -const BitBucketCloud = require('./drivers/bitbucket_cloud'); +const BitbucketCloud = require('./drivers/bitbucket_cloud'); const { upload, exec, watermarkUri } = require('./utils'); const { @@ -65,7 +65,7 @@ const getDriver = (opts) => { if (driver === GITHUB) return new Github({ repo, token }); if (driver === GITLAB) return new Gitlab({ repo, token }); - if (driver === BB) return new BitBucketCloud({ repo, token }); + if (driver === BB) return new BitbucketCloud({ repo, token }); throw new Error(`driver ${driver} unknown!`); }; @@ -96,8 +96,11 @@ class CML { const { report: userReport, commitSha = await this.headSha(), - rmWatermark + rmWatermark, + update } = opts; + if (rmWatermark && update) + throw new Error('watermarks are mandatory for updateable comments'); const watermark = rmWatermark ? '' : ' \n\n ![CML watermark](https://raw.githubusercontent.com/iterative/cml/master/assets/watermark.svg)'; @@ -106,7 +109,8 @@ class CML { return await getDriver(this).commentCreate({ ...opts, report, - commitSha + commitSha, + watermark }); } @@ -209,15 +213,26 @@ class CML { } async unregisterRunner(opts = {}) { - return await getDriver(this).unregisterRunner(opts); + const { id: runnerId } = await this.runnerByName(opts); + return await getDriver(this).unregisterRunner({ runnerId, ...opts }); + } + + async getRunners(opts = {}) { + return await getDriver(this).getRunners(opts); } async runnerByName(opts = {}) { - return await getDriver(this).runnerByName(opts); + const { name } = opts; + const runners = await this.getRunners(opts); + return runners.find((runner) => runner.name === name); } async runnersByLabels(opts = {}) { - return await getDriver(this).runnersByLabels(opts); + const { labels } = opts; + const runners = await this.getRunners(opts); + return runners.filter((runner) => + labels.split(',').every((label) => runner.labels.includes(label)) + ); } async repoTokenCheck() { @@ -297,6 +312,7 @@ class CML { } } + await exec(`git fetch ${remote} ${sha}`); await exec(`git checkout -B ${target} ${sha}`); await exec(`git checkout -b ${source}`); await exec(`git add ${paths.join(' ')}`); diff --git a/src/cml.test.js b/src/cml.test.js index b307b4923..ab7d6af3a 100644 --- a/src/cml.test.js +++ b/src/cml.test.js @@ -1,5 +1,6 @@ const CML = require('../src/cml').default; +jest.setTimeout(60000); describe('Github tests', () => { const OLD_ENV = process.env; diff --git a/src/drivers/bitbucket_cloud.js b/src/drivers/bitbucket_cloud.js index 29353a62a..cbb8b139d 100644 --- a/src/drivers/bitbucket_cloud.js +++ b/src/drivers/bitbucket_cloud.js @@ -2,7 +2,7 @@ const fetch = require('node-fetch'); const { URL } = require('url'); const { BITBUCKET_COMMIT, BITBUCKET_BRANCH } = process.env; -class BitBucketCloud { +class BitbucketCloud { constructor(opts = {}) { const { repo, token } = opts; @@ -22,71 +22,91 @@ class BitBucketCloud { async commentCreate(opts = {}) { const { projectPath } = this; - const { commitSha, report } = opts; - - // Make a comment in the commit - const commitEndpoint = `/repositories/${projectPath}/commit/${commitSha}/comments/`; - const commitBody = JSON.stringify({ content: { raw: report } }); - const commitOutput = await this.request({ - endpoint: commitEndpoint, - method: 'POST', - body: commitBody - }); + const { commitSha, report, update, watermark } = opts; // Check for a corresponding PR. If it exists, also put the comment there. - const getPrEndpt = `/repositories/${projectPath}/commit/${commitSha}/pullrequests`; - const { values: prs } = await this.request({ endpoint: getPrEndpt }); + let prs; + try { + const getPrEndpoint = `/repositories/${projectPath}/commit/${commitSha}/pullrequests`; + prs = await this.paginatedRequest({ endpoint: getPrEndpoint }); + } catch (err) { + if (err.message === 'Not Found Resource not found') + err.message = + "Click 'Go to pull request' on any commit details page to enable this API"; + throw err; + } if (prs && prs.length) { for (const pr of prs) { - try { - // Append a watermark to the report with a link to the commit - const commitLink = commitSha.substr(0, 7); - const longReport = `${commitLink} \n${report}`; - const prBody = JSON.stringify({ content: { raw: longReport } }); - - // Write a comment on the PR - const prEndpoint = `/repositories/${projectPath}/pullrequests/${pr.id}/comments`; - await this.request({ - endpoint: prEndpoint, - method: 'POST', - body: prBody - }); - } catch (err) { - console.debug(err.message); - } + // Append a watermark to the report with a link to the commit + const commitLink = commitSha.substr(0, 7); + const longReport = `${commitLink}\n\n${report}`; + const prBody = JSON.stringify({ content: { raw: longReport } }); + + // Write a comment on the PR + const prEndpoint = `/repositories/${projectPath}/pullrequests/${pr.id}/comments/`; + const existingPr = ( + await this.paginatedRequest({ endpoint: prEndpoint, method: 'GET' }) + ) + .filter((comment) => { + const { content: { raw = '' } = {} } = comment; + return raw.endsWith(watermark); + }) + .sort((first, second) => first.id < second.id) + .pop(); + await this.request({ + endpoint: prEndpoint + (update && existingPr ? existingPr.id : ''), + method: update && existingPr ? 'PUT' : 'POST', + body: prBody + }); } } - return commitOutput; + const commitEndpoint = `/repositories/${projectPath}/commit/${commitSha}/comments/`; + + const existingCommmit = ( + await this.paginatedRequest({ endpoint: commitEndpoint, method: 'GET' }) + ) + .filter((comment) => { + const { content: { raw = '' } = {} } = comment; + return raw.endsWith(watermark); + }) + .sort((first, second) => first.id < second.id) + .pop(); + + return ( + await this.request({ + endpoint: + commitEndpoint + + (update && existingCommmit ? existingCommmit.id : ''), + method: update && existingCommmit ? 'PUT' : 'POST', + body: JSON.stringify({ content: { raw: report } }) + }) + ).links.html.href; } async checkCreate() { - throw new Error('BitBucket Cloud does not support check!'); + throw new Error('Bitbucket Cloud does not support check!'); } async upload(opts = {}) { - throw new Error('BitBucket Cloud does not support upload!'); + throw new Error('Bitbucket Cloud does not support upload!'); } async runnerToken() { - throw new Error('BitBucket Cloud does not support runnerToken!'); + throw new Error('Bitbucket Cloud does not support runnerToken!'); } async registerRunner(opts = {}) { - throw new Error('BitBucket Cloud does not support registerRunner!'); + throw new Error('Bitbucket Cloud does not support registerRunner!'); } async unregisterRunner(opts = {}) { - throw new Error('BitBucket Cloud does not support unregisterRunner!'); + throw new Error('Bitbucket Cloud does not support unregisterRunner!'); } - async runnerByName(opts = {}) { - throw new Error('BitBucket Cloud does not support runnerByName!'); - } - - async runnersByLabels(opts = {}) { - throw new Error('BitBucket Cloud does not support runnerByLabels!'); + async getRunners(opts = {}) { + throw new Error('Bitbucket Cloud does not support getRunners!'); } async prCreate(opts = {}) { @@ -154,15 +174,18 @@ class BitBucketCloud { async request(opts = {}) { const { token, api } = this; - const { endpoint, method = 'GET', body } = opts; - - if (!endpoint) throw new Error('BitBucket Cloud API endpoint not found'); + const { url, endpoint, method = 'GET', body } = opts; + if (!(url || endpoint)) + throw new Error('Bitbucket Cloud API endpoint not found'); const headers = { 'Content-Type': 'application/json', Authorization: 'Basic ' + `${token}` }; - const url = `${api}${endpoint}`; - const response = await fetch(url, { method, headers, body }); + const response = await fetch(url || `${api}${endpoint}`, { + method, + headers, + body + }); if (response.status > 300) { const { @@ -174,6 +197,22 @@ class BitBucketCloud { return await response.json(); } + async paginatedRequest(opts = {}) { + const { method = 'GET', body } = opts; + const { next, values } = await this.request(opts); + + if (next) { + const nextValues = await this.paginatedRequest({ + url: next, + method, + body + }); + values.push(...nextValues); + } + + return values; + } + get sha() { return BITBUCKET_COMMIT; } @@ -187,4 +226,4 @@ class BitBucketCloud { get userName() {} } -module.exports = BitBucketCloud; +module.exports = BitbucketCloud; diff --git a/src/drivers/bitbucket_cloud.test.js b/src/drivers/bitbucket_cloud.test.js index f619ea876..52143747e 100644 --- a/src/drivers/bitbucket_cloud.test.js +++ b/src/drivers/bitbucket_cloud.test.js @@ -1,12 +1,12 @@ -jest.setTimeout(20000); -const BitBucketCloud = require('./bitbucket_cloud'); +jest.setTimeout(120000); +const BitbucketCloud = require('./bitbucket_cloud'); const { TEST_BBCLOUD_TOKEN: TOKEN, TEST_BBCLOUD_REPO: REPO, TEST_BBCLOUD_SHA: SHA } = process.env; describe('Non Enviromental tests', () => { - const client = new BitBucketCloud({ repo: REPO, token: TOKEN }); + const client = new BitbucketCloud({ repo: REPO, token: TOKEN }); test('test repo and token', async () => { expect(client.repo).toBe(REPO); expect(client.token).toBe(TOKEN); @@ -19,18 +19,18 @@ describe('Non Enviromental tests', () => { }); test('Check', async () => { await expect(client.checkCreate()).rejects.toThrow( - 'BitBucket Cloud does not support check!' + 'Bitbucket Cloud does not support check!' ); }); test('Publish', async () => { const path = `${__dirname}/../../assets/logo.png`; await expect(client.upload({ path })).rejects.toThrow( - 'BitBucket Cloud does not support upload!' + 'Bitbucket Cloud does not support upload!' ); }); test('Runner token', async () => { await expect(client.runnerToken()).rejects.toThrow( - 'BitBucket Cloud does not support runnerToken!' + 'Bitbucket Cloud does not support runnerToken!' ); }); }); diff --git a/src/drivers/github.js b/src/drivers/github.js index c4d8f6cbd..b14f4e90a 100644 --- a/src/drivers/github.js +++ b/src/drivers/github.js @@ -15,6 +15,7 @@ const { GITHUB_REPOSITORY, GITHUB_SHA, GITHUB_REF, + GITHUB_HEAD_REF, GITHUB_EVENT_NAME } = process.env; @@ -70,18 +71,40 @@ class Github { } async commentCreate(opts = {}) { - const { report: body, commitSha } = opts; + const { report: body, commitSha, update, watermark } = opts; - const { url: commitUrl } = await octokit( - this.token, - this.repo - ).repos.createCommitComment({ - ...ownerRepo({ uri: this.repo }), - body, - commit_sha: commitSha - }); + const { paginate, repos } = octokit(this.token, this.repo); - return commitUrl; + const existing = Object.values( + await paginate(repos.listCommentsForCommit, { + ...ownerRepo({ uri: this.repo }), + commit_sha: commitSha + }) + ) + .filter((comment) => { + const { body = '' } = comment; + return body.endsWith(watermark); + }) + .sort((first, second) => first.id < second.id) + .pop(); + + if (update && existing) { + return ( + await repos.updateCommitComment({ + ...ownerRepo({ uri: this.repo }), + comment_id: existing.id, + body + }) + ).data.html_url; + } else { + return ( + await repos.createCommitComment({ + ...ownerRepo({ uri: this.repo }), + commit_sha: commitSha, + body + }) + ).data.html_url; + } } async checkCreate(opts = {}) { @@ -141,21 +164,20 @@ class Github { } async unregisterRunner(opts) { - const { name } = opts; + const { runnerId } = opts; const { owner, repo } = ownerRepo({ uri: this.repo }); const { actions } = octokit(this.token, this.repo); - const { id } = await this.runnerByName({ name }); if (typeof repo !== 'undefined') { await actions.deleteSelfHostedRunnerFromRepo({ owner, repo, - runner_id: id + runner_id: runnerId }); } else { await actions.deleteSelfHostedRunnerFromOrg({ org: owner, - runner_id: id + runner_id: runnerId }); } } @@ -200,48 +222,27 @@ class Github { async getRunners(opts = {}) { const { owner, repo } = ownerRepo({ uri: this.repo }); - const { actions } = octokit(this.token, this.repo); - let runners = []; + const { paginate, actions } = octokit(this.token, this.repo); - if (typeof repo !== 'undefined') { - ({ - data: { runners } - } = await actions.listSelfHostedRunnersForRepo({ - owner, - repo, - per_page: 100 - })); + let runners; + if (typeof repo === 'undefined') { + runners = await paginate(actions.listSelfHostedRunnersForOrg, { + org: owner + }); } else { - ({ - data: { runners } - } = await actions.listSelfHostedRunnersForOrg({ - org: owner, - per_page: 100 - })); + runners = await paginate(actions.listSelfHostedRunnersForRepo, { + owner, + repo + }); } - return runners; - } - - async runnerByName(opts = {}) { - const { name } = opts; - const runners = await this.getRunners(opts); - const runner = runners.find((runner) => runner.name === name); - if (runner) return { id: runner.id, name: runner.name }; - } - - async runnersByLabels(opts = {}) { - const { labels } = opts; - const runners = await this.getRunners(opts); - return runners - .filter((runner) => - labels - .split(',') - .every((label) => - runner.labels.map(({ name }) => name).includes(label) - ) - ) - .map((runner) => ({ id: runner.id, name: runner.name })); + return runners.map(({ id, name, busy, status, labels }) => ({ + id, + name, + labels: labels.map(({ name }) => name), + online: status === 'online', + busy + })); } async prCreate(opts = {}) { @@ -402,7 +403,7 @@ class Github { } get branch() { - return branchName(GITHUB_REF); + return branchName(GITHUB_HEAD_REF || GITHUB_REF); } get userEmail() { diff --git a/src/drivers/github.test.js b/src/drivers/github.test.js index 6a9cf68f0..772648c43 100644 --- a/src/drivers/github.test.js +++ b/src/drivers/github.test.js @@ -1,4 +1,4 @@ -jest.setTimeout(20000); +jest.setTimeout(40000); const GithubClient = require('./github'); diff --git a/src/drivers/gitlab.js b/src/drivers/gitlab.js index dfaa3826b..7e641addd 100644 --- a/src/drivers/gitlab.js +++ b/src/drivers/gitlab.js @@ -59,9 +59,7 @@ class Gitlab { }) ); - this.detectedBase = possibleBases.find( - (base) => base.constructor !== Error - ); + this.detectedBase = possibleBases.find((base) => typeof base === 'string'); if (!this.detectedBase) { if (possibleBases.length) throw possibleBases[0]; throw new Error('Invalid repository address'); @@ -71,7 +69,9 @@ class Gitlab { } async commentCreate(opts = {}) { - const { commitSha, report } = opts; + const { commitSha, report, update } = opts; + + if (update) throw new Error('GitLab does not support comment updates!'); const projectPath = await this.projectPath(); const endpoint = `/projects/${projectPath}/repository/commits/${commitSha}/comments`; @@ -127,10 +127,8 @@ class Gitlab { } async unregisterRunner(opts = {}) { - const { name } = opts; - - const { id } = await this.runnerByName({ name }); - const endpoint = `/runners/${id}`; + const { runnerId } = opts; + const endpoint = `/runners/${runnerId}`; return await this.request({ endpoint, method: 'DELETE', raw: true }); } @@ -174,23 +172,20 @@ class Gitlab { } } - async runnerByName(opts = {}) { - const { name } = opts; - + async getRunners(opts = {}) { const endpoint = `/runners?per_page=100`; const runners = await this.request({ endpoint, method: 'GET' }); - const runner = runners.filter( - (runner) => runner.name === name || runner.description === name - )[0]; - - if (runner) return { id: runner.id, name: runner.name }; - } - - async runnersByLabels(opts = {}) { - const { labels } = opts; - const endpoint = `/runners?per_page=100?tag_list=${labels}`; - const runners = await this.request({ endpoint, method: 'GET' }); - return runners.map((runner) => ({ id: runner.id, name: runner.name })); + return await Promise.all( + runners.map(async ({ id, name, description, active, online }) => ({ + id, + name: description, + labels: ( + await this.request({ endpoint: `/runners/${id}`, method: 'GET' }) + ).tag_list, + online, + busy: active && online + })) + ); } async prCreate(opts = {}) {