From 9e26856d71a7a54dffdb768ee3876d4972ecfcd0 Mon Sep 17 00:00:00 2001 From: Alex Eagle Date: Tue, 13 Aug 2019 14:32:40 -0700 Subject: [PATCH] feat(worker): new worker package --- WORKSPACE | 1 + commitlint.config.js | 1 + examples/BUILD.bazel | 21 +++++++++++ examples/worker/.bazelrc | 1 + examples/worker/BUILD.bazel | 35 ++++++++++++++++++ examples/worker/README.md | 25 +++++++++++++ examples/worker/WORKSPACE | 34 +++++++++++++++++ examples/worker/foo.js | 3 ++ examples/worker/package.json | 6 +++ examples/worker/tool.js | 52 ++++++++++++++++++++++++++ examples/worker/uses_workers.bzl | 39 ++++++++++++++++++++ packages/index.bzl | 1 + packages/worker/BUILD.bazel | 63 ++++++++++++++++++++++++++++++++ packages/worker/README.md | 23 ++++++++++++ packages/worker/package.json | 20 ++++++++++ 15 files changed, 325 insertions(+) create mode 100644 examples/worker/.bazelrc create mode 100644 examples/worker/BUILD.bazel create mode 100644 examples/worker/README.md create mode 100644 examples/worker/WORKSPACE create mode 100644 examples/worker/foo.js create mode 100644 examples/worker/package.json create mode 100644 examples/worker/tool.js create mode 100644 examples/worker/uses_workers.bzl create mode 100644 packages/worker/BUILD.bazel create mode 100644 packages/worker/README.md create mode 100644 packages/worker/package.json diff --git a/WORKSPACE b/WORKSPACE index bbe36fb479..cea347a6f6 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -370,4 +370,5 @@ local_repository( "vendored_node_and_yarn", "web_testing", "webapp", + "worker", ]] diff --git a/commitlint.config.js b/commitlint.config.js index 7362e28c63..1b1a0901b9 100644 --- a/commitlint.config.js +++ b/commitlint.config.js @@ -18,6 +18,7 @@ module.exports = { 'stylus', 'rollup', 'typescript', + 'worker', ] ] } diff --git a/examples/BUILD.bazel b/examples/BUILD.bazel index 09306a1af5..b95572f4b2 100644 --- a/examples/BUILD.bazel +++ b/examples/BUILD.bazel @@ -228,3 +228,24 @@ bazel_integration_test( ], workspace_files = "@examples_vendored_node//:all_files", ) + +bazel_integration_test( + name = "examples_worker", + # There are no tests in this example + bazel_commands = [ + # By default this will build with worker enabled + "build //:do_work", + # Build again without the worker + "build --define=cache_bust=true --strategy=DoWork=standalone //:do_work", + ], + bazelrc_imports = { + "//:common.bazelrc": "import %workspace%/../../common.bazelrc", + }, + check_npm_packages = NPM_PACKAGES, + npm_packages = {"//packages/worker:npm_package": "@bazel/worker"}, + repositories = { + "//:release": "build_bazel_rules_nodejs", + }, + tags = ["examples"], + workspace_files = "@examples_worker//:all_files", +) diff --git a/examples/worker/.bazelrc b/examples/worker/.bazelrc new file mode 100644 index 0000000000..3431057af6 --- /dev/null +++ b/examples/worker/.bazelrc @@ -0,0 +1 @@ +import %workspace%/../../common.bazelrc diff --git a/examples/worker/BUILD.bazel b/examples/worker/BUILD.bazel new file mode 100644 index 0000000000..a79c3b70bd --- /dev/null +++ b/examples/worker/BUILD.bazel @@ -0,0 +1,35 @@ +load("@build_bazel_rules_nodejs//:defs.bzl", "nodejs_binary") +load(":uses_workers.bzl", "work") + +# This is our program that we want to run as a worker +# Imagine that it takes a long time to start, or benefits from caching work +nodejs_binary( + name = "tool", + # For the integration test, allow a second bazel build + # to explicitly be a cache miss, letting us test both + # worker and standalone modes. + configuration_env_vars = ["cache_bust"], + data = ["@npm//@bazel/worker"], + entry_point = ":tool.js", +) + +# How a user would call our rule that uses workers. +work( + name = "do_work", + src = "foo.js", +) + +# For running this example as a bazel_integration_test +# See //examples:BUILD.bazel +filegroup( + name = "all_files", + srcs = glob( + include = ["**/*"], + exclude = [ + "bazel-out/**/*", + "dist/**/*", + "node_modules/**/*", + ], + ), + visibility = ["//visibility:public"], +) diff --git a/examples/worker/README.md b/examples/worker/README.md new file mode 100644 index 0000000000..74a9be59c8 --- /dev/null +++ b/examples/worker/README.md @@ -0,0 +1,25 @@ +# Worker example + +This shows how to keep a tool running a persistent worker. This is like a daemon process that Bazel will start and manage as needed to perform actions. + +Bazel's protocol for workers is: + +- start a pool of processes +- when an action needs to be run, it encodes the request as a protocol buffer and writes it to the worker's stdin +- the `@bazel/worker` package provides a utility to speak this protocol, and dispatches to a function you provide that performs the work of the tool. See /packages/worker/README.md for a description of that utility. +- the tool returns a response written as another protocol buffer to stdout (note this means you cannot log to stdout) + +## Files in the example + +`foo.js` is some arbitrary input to the rule. You can run `ibazel build :do_work` and then make edits to this JS input to observe how every change triggers the action to run, and it's quite fast because the worker process stays running. + +The `tool.js` file shows how to use the `@bazel/worker` package to implement the worker protocol. +Note that the main method first checks whether the tool is being run under the worker mode, or should just do the work once and exit. + +`uses_workers.bzl` shows how the tool is wrapped in a Bazel rule. When the action is declared, we mark it with attribute `execution_requirements = {"supports-workers": "1"}` which informs Bazel that it speaks the worker protocol. Bazel will decide whether to actually keep the process running as a persistent worker. + +By also providing `mnemonic` attribute to the action, users will be able to control the scheduling if desired. +Note the `--strategy=DoWork=standalone` flag passed to Bazel in the integration test in the /examples directory. This tells Bazel not to use workers. Similarly the user could set some other strategy like `--strategy=DoWork=worker` to explicitly opt-in. + +`BUILD.bazel` defines the binary for the tool, then shows how it would be used by calling `work()`. Note that the usage site just calls the rule without knowing whether it uses workers for performance. + diff --git a/examples/worker/WORKSPACE b/examples/worker/WORKSPACE new file mode 100644 index 0000000000..b397676bb8 --- /dev/null +++ b/examples/worker/WORKSPACE @@ -0,0 +1,34 @@ +# Copyright 2019 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +workspace( + name = "examples_worker", + managed_directories = {"@npm": ["node_modules"]}, +) + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +http_archive( + name = "build_bazel_rules_nodejs", + sha256 = "6625259f9f77ef90d795d20df1d0385d9b3ce63b6619325f702b6358abb4ab33", + urls = ["https://github.com/bazelbuild/rules_nodejs/releases/download/0.35.0/rules_nodejs-0.35.0.tar.gz"], +) + +load("@build_bazel_rules_nodejs//:defs.bzl", "yarn_install") + +yarn_install( + name = "npm", + package_json = "//:package.json", + yarn_lock = "//:yarn.lock", +) diff --git a/examples/worker/foo.js b/examples/worker/foo.js new file mode 100644 index 0000000000..9756f59993 --- /dev/null +++ b/examples/worker/foo.js @@ -0,0 +1,3 @@ +// This is an arbitrary file used as an input to the worker action +// Any time this file is changed, the action will need to re-run +export const num = 0; diff --git a/examples/worker/package.json b/examples/worker/package.json new file mode 100644 index 0000000000..b6dc074329 --- /dev/null +++ b/examples/worker/package.json @@ -0,0 +1,6 @@ +{ + "private": true, + "devDependencies": { + "@bazel/worker": "latest" + } +} diff --git a/examples/worker/tool.js b/examples/worker/tool.js new file mode 100644 index 0000000000..8b8917b13d --- /dev/null +++ b/examples/worker/tool.js @@ -0,0 +1,52 @@ +/** + * @fileoverview this program does a trivial job of writing a dummy string to an output + */ +const worker = require('@bazel/worker'); + +function runOneBuild(args, inputs) { + // IMPORTANT don't log with console.out - stdout is reserved for the worker protocol. + // This is true for any code running in the program, even if it comes from a third-party library. + worker.log('Performing a build with args', args); + if (inputs) { + // The inputs help you manage a cache within the worker process + // They are available only when run as a worker, not in standalone mode + worker.log('We were run as a worker so we also got a manifest of all the inputs', inputs); + } + + // Parse our arguments as usual. The worker library handles getting these out of the protocol + // buffer. + const [output] = args; + require('fs').writeFileSync(output, 'Dummy output', {encoding: 'utf-8'}); + + // Return true if the tool succeeded, false otherwise. + return true; +} + +if (require.main === module) { + // One reason to run a program under a worker is that it takes a long time to start + // Imagine that several seconds are spent here + + // Bazel will pass a special argument to the program when it's running us as a worker + if (worker.runAsWorker(process.argv)) { + worker.log('Running as a Bazel worker'); + + worker.runWorkerLoop(runOneBuild); + } else { + // Running standalone so stdout is available as usual + console.log('Running as a standalone process'); + + // Help our users get on the fast path + console.error( + 'Started a new process to perform this action. Your build might be misconfigured, try --strategy=DoWork=worker'); + + // The first argument to the program is prefixed with '@' + // because Bazel does that for param files. Strip it first. + const paramFile = process.argv[2].replace(/^@/, ''); + const args = require('fs').readFileSync(paramFile, 'utf-8').trim().split('\n'); + + // Bazel is just running the program as a single action, don't act like a worker + if (!runOneBuild(args)) { + process.exitCode = 1; + } + } +} diff --git a/examples/worker/uses_workers.bzl b/examples/worker/uses_workers.bzl new file mode 100644 index 0000000000..0456cdde57 --- /dev/null +++ b/examples/worker/uses_workers.bzl @@ -0,0 +1,39 @@ +"Shows how to define a bazel rule that runs its action as a persistent worker." + +def _work(ctx): + output = ctx.actions.declare_file(ctx.label.name + ".out") + + # Bazel workers always get their arguments spilled into a params file + args = ctx.actions.args() + + # Bazel requires a flagfile for worker mode, + # either prefixed with @ or --flagfile= argument + args.use_param_file("@%s", use_always = True) + args.set_param_file_format("multiline") + + args.add(output.path) + + ctx.actions.run( + arguments = [args], + executable = ctx.executable.tool, + inputs = [ctx.file.src], + outputs = [output], + # Tell Bazel that this program speaks the worker protocol + execution_requirements = {"supports-workers": "1"}, + # The user can explicitly set the execution strategy + mnemonic = "DoWork", + ) + + return [DefaultInfo(files = depset([output]))] + +work = rule( + implementation = _work, + attrs = { + "src": attr.label(allow_single_file = True), + "tool": attr.label( + default = Label("//:tool"), + executable = True, + cfg = "host", + ), + }, +) diff --git a/packages/index.bzl b/packages/index.bzl index 1e6b25455a..74ea9e25a4 100644 --- a/packages/index.bzl +++ b/packages/index.bzl @@ -28,4 +28,5 @@ NESTED_PACKAGES = [ NPM_PACKAGES = [ "@bazel/create", "@bazel/hide-bazel-files", + "@bazel/worker", ] + ["@bazel/%s" % pkg for pkg in NESTED_PACKAGES] diff --git a/packages/worker/BUILD.bazel b/packages/worker/BUILD.bazel new file mode 100644 index 0000000000..448356a675 --- /dev/null +++ b/packages/worker/BUILD.bazel @@ -0,0 +1,63 @@ +# Copyright 2019 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("@build_bazel_rules_nodejs//:tools/defaults.bzl", "npm_package") + +# We reach inside the @bazel/typescript npm package to grab this one .js file +# This avoids a complex refactoring where we extract that .ts file from tsc_wrapped to a common library +_worker_path = "external/build_bazel_rules_typescript/internal/tsc_wrapped/worker" + +# Copy the proto file to a matching third_party/... nested directory +# so the runtime require() statements still work +_worker_proto_dir = "third_party/github.com/bazelbuild/bazel/src/main/protobuf" + +genrule( + name = "copy_worker_js", + srcs = ["@build_bazel_rules_typescript//:npm_bazel_typescript_package"], + outs = ["index.js"], + cmd = "cp $(location @build_bazel_rules_typescript//:npm_bazel_typescript_package)/%s.js $@" % _worker_path, +) + +genrule( + name = "copy_worker_dts", + srcs = ["@build_bazel_rules_typescript//:npm_bazel_typescript_package"], + outs = ["index.d.ts"], + cmd = "cp $(location @build_bazel_rules_typescript//:npm_bazel_typescript_package)/%s.d.ts $@" % _worker_path, +) + +genrule( + name = "copy_worker_proto", + srcs = ["@build_bazel_rules_typescript//%s:worker_protocol.proto" % _worker_proto_dir], + outs = ["%s/worker_protocol.proto" % _worker_proto_dir], + cmd = "cp $< $@", +) + +npm_package( + name = "npm_package", + srcs = [ + "README.md", + "package.json", + ], + replacements = { + # Fix the require() statement that loads the worker_protocol.proto file + # we are re-rooting the sources into the @bazel/worker package so it's no longer + # relative to the build_bazel_rules_typescript workspace. + "build_bazel_rules_typescript": "@bazel/worker", + }, + deps = [ + ":copy_worker_dts", + ":copy_worker_js", + ":copy_worker_proto", + ], +) diff --git a/packages/worker/README.md b/packages/worker/README.md new file mode 100644 index 0000000000..b21b528ca3 --- /dev/null +++ b/packages/worker/README.md @@ -0,0 +1,23 @@ +# Bazel Worker support + +Bazel workers allow actions to be executed by a program that stays running. + +Learn more about Bazel workers from Mike Morearty's [medium article](https://medium.com/@mmorearty/how-to-create-a-persistent-worker-for-bazel-7738bba2cabb) + +## Typical usage + +Read `index.d.ts` for the worker API. Essentially you call `runWorkerLoop` passing it a function to call back when each build request arrives. + +See the [worker example] for a full example with comments. + +[worker example]: https://github.com/bazelbuild/rules_nodejs/tree/master/examples/worker + +## Restrictions on programs that run as a worker + +Accept arguments as a params file + +stdin and stdout of the process are reserved for the worker protocol with Bazel. +That means anything that does a `console.log` can cause an error. +Bazel prints a snippet of whatever was printed to stdout to help you track it down. +Writing to stderr is fine, for example with `console.error`. +In the future, we might improve this worker library to patch out the nodejs console.log function so that it doesn't interfere with the worker protocol. diff --git a/packages/worker/package.json b/packages/worker/package.json new file mode 100644 index 0000000000..18345b666e --- /dev/null +++ b/packages/worker/package.json @@ -0,0 +1,20 @@ +{ + "name": "@bazel/worker", + "dependencies": { + "protobufjs": "6.8.8" + }, + "description": "Adapt Node programs to run as a Bazel worker", + "license": "Apache-2.0", + "version": "0.0.0-PLACEHOLDER", + "repository": { + "type" : "git", + "url" : "https://github.com/bazelbuild/rules_nodejs.git", + "directory": "packages/worker" + }, + "bugs": { + "url": "https://github.com/bazelbuild/rules_nodejs/issues" + }, + "keywords": [ + "bazel" + ] +} \ No newline at end of file