diff --git a/.changelog/11791.txt b/.changelog/11791.txt new file mode 100644 index 00000000000..439eb5080e5 --- /dev/null +++ b/.changelog/11791.txt @@ -0,0 +1,3 @@ +```release-note:improvement +hcl: added support for using the `filebase64` function in jobspecs +``` diff --git a/.changelog/11864.txt b/.changelog/11864.txt new file mode 100644 index 00000000000..9ca1db038bd --- /dev/null +++ b/.changelog/11864.txt @@ -0,0 +1,3 @@ +```release-note:improvement +qemu: Added option to configure `drive_interface` +``` diff --git a/.changelog/12520.txt b/.changelog/12520.txt new file mode 100644 index 00000000000..80ce4c84d86 --- /dev/null +++ b/.changelog/12520.txt @@ -0,0 +1,3 @@ +```release-note:improvement +bootstrap: Added option to allow for an operator generated bootstrap token to be passed to the `acl bootstrap` command +``` \ No newline at end of file diff --git a/.changelog/12534.txt b/.changelog/12534.txt new file mode 100644 index 00000000000..e3055f09bef --- /dev/null +++ b/.changelog/12534.txt @@ -0,0 +1,3 @@ +```release-note:improvement +api: support Authorization Bearer header in lieu of X-Nomad-Token header +``` diff --git a/.changelog/12800.txt b/.changelog/12800.txt new file mode 100644 index 00000000000..b58a322a334 --- /dev/null +++ b/.changelog/12800.txt @@ -0,0 +1,3 @@ +```release-note:improvement +qemu: add support for guest agent socket +``` diff --git a/.changelog/12817.txt b/.changelog/12817.txt new file mode 100644 index 00000000000..e81b1c01cd7 --- /dev/null +++ b/.changelog/12817.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: Fixed a bug where network.dns block was not interpolated +``` diff --git a/.changelog/12847.txt b/.changelog/12847.txt new file mode 100644 index 00000000000..a8e152b2da9 --- /dev/null +++ b/.changelog/12847.txt @@ -0,0 +1,3 @@ +```release-note:improvement +ui: change sort-order of evaluations to be reverse-chronological +``` diff --git a/.changelog/12862.txt b/.changelog/12862.txt new file mode 100644 index 00000000000..ec238e923f6 --- /dev/null +++ b/.changelog/12862.txt @@ -0,0 +1,3 @@ +```release-note:improvement +api: enable setting `?choose` parameter when querying services +``` diff --git a/.changelog/12916.txt b/.changelog/12916.txt new file mode 100644 index 00000000000..83056fc4d3a --- /dev/null +++ b/.changelog/12916.txt @@ -0,0 +1,3 @@ +```release-note:bug +event_stream: fixed a bug where dynamic port values would fail to serialize in the event stream +``` diff --git a/.changelog/12925.txt b/.changelog/12925.txt new file mode 100644 index 00000000000..83dfa34b7cc --- /dev/null +++ b/.changelog/12925.txt @@ -0,0 +1,3 @@ +```release-note:bug +ui: Fixed a bug that prevented the UI task exec functionality to work from behind a reverse proxy. +``` diff --git a/.changelog/12953.txt b/.changelog/12953.txt new file mode 100644 index 00000000000..82ed92b50a1 --- /dev/null +++ b/.changelog/12953.txt @@ -0,0 +1,3 @@ +```release-note:improvement +consul: Reduce load on Consul leader server by allowing stale results when listing namespaces. +``` diff --git a/.changelog/12955.txt b/.changelog/12955.txt new file mode 100644 index 00000000000..cc0dc3c5178 --- /dev/null +++ b/.changelog/12955.txt @@ -0,0 +1,3 @@ +```release-note:improvement +core: On node updates skip creating evaluations for jobs not in the node's datacenter. +``` diff --git a/.changelog/12961.txt b/.changelog/12961.txt new file mode 100644 index 00000000000..93826a3f2de --- /dev/null +++ b/.changelog/12961.txt @@ -0,0 +1,3 @@ +```release-note:improvement +agent: emit a warning message if the agent starts with `bootstrap_expect` set to an even number. +``` diff --git a/.changelog/12962.txt b/.changelog/12962.txt new file mode 100644 index 00000000000..d8a2e2c81a8 --- /dev/null +++ b/.changelog/12962.txt @@ -0,0 +1,3 @@ +```release-note:bug +agent: fixed a panic on startup when the `server.protocol_version` config parameter was set +``` diff --git a/.changelog/13008.txt b/.changelog/13008.txt new file mode 100644 index 00000000000..b12e5294bc9 --- /dev/null +++ b/.changelog/13008.txt @@ -0,0 +1,3 @@ +```release-note:bug +volumes: Fixed a bug where additions, updates, or removals of host volumes or CSI volumes were not treated as destructive updates +``` diff --git a/.changelog/13012.txt b/.changelog/13012.txt new file mode 100644 index 00000000000..dd646bec560 --- /dev/null +++ b/.changelog/13012.txt @@ -0,0 +1,3 @@ +```release-note:bug +ui: fixed a bug where links to jobs with "@" in their name would mis-identify namespace and 404 +``` diff --git a/.changelog/13041.txt b/.changelog/13041.txt new file mode 100644 index 00000000000..53f0577f7a1 --- /dev/null +++ b/.changelog/13041.txt @@ -0,0 +1,3 @@ +```release-note:improvement +client: added more fault tolerant defaults for template configuration +``` diff --git a/.changelog/13044.txt b/.changelog/13044.txt new file mode 100644 index 00000000000..7f051f87adb --- /dev/null +++ b/.changelog/13044.txt @@ -0,0 +1,3 @@ +```release-note:improvement +cli: update default redis and use nomad service discovery +``` diff --git a/.changelog/13045.txt b/.changelog/13045.txt new file mode 100644 index 00000000000..cb930bcf867 --- /dev/null +++ b/.changelog/13045.txt @@ -0,0 +1,7 @@ +```release-note:improvement +cli: Added `scheduler get-config` and `scheduler set-config` commands to the operator CLI +``` + +```release-note:improvement +core: Added the ability to pause and un-pause the eval broker and blocked eval broker +``` diff --git a/.changelog/13055.txt b/.changelog/13055.txt new file mode 100644 index 00000000000..f04454a1938 --- /dev/null +++ b/.changelog/13055.txt @@ -0,0 +1,3 @@ +```release-note:bug +lifecycle: fixed a bug where sidecar tasks were not being stopped last +``` diff --git a/.changelog/13057.txt b/.changelog/13057.txt new file mode 100644 index 00000000000..2eac63dcb07 --- /dev/null +++ b/.changelog/13057.txt @@ -0,0 +1,3 @@ +```release-note:security +A vulnerability was identified in the go-getter library that Nomad uses for its artifacts such that a specially crafted Nomad jobspec can be used for privilege escalation onto client agent hosts. [CVE-2022-30324](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-30324) +``` diff --git a/.changelog/13058.txt b/.changelog/13058.txt new file mode 100644 index 00000000000..8ed6bbe8004 --- /dev/null +++ b/.changelog/13058.txt @@ -0,0 +1,3 @@ +```release-note:bug +docker: Fixed a bug where cgroups-v1 parent was being set +``` diff --git a/.changelog/13065.txt b/.changelog/13065.txt new file mode 100644 index 00000000000..1f367db488c --- /dev/null +++ b/.changelog/13065.txt @@ -0,0 +1,3 @@ +```release-note:bug +api: Fixed a bug where Consul token was not respected for job revert API +``` diff --git a/.changelog/13070.txt b/.changelog/13070.txt new file mode 100644 index 00000000000..b072ff39adb --- /dev/null +++ b/.changelog/13070.txt @@ -0,0 +1,3 @@ +```release-note:bug +cli: Fixed a bug where job validate did not respect vault token or namespace +``` diff --git a/.changelog/13076.txt b/.changelog/13076.txt new file mode 100644 index 00000000000..b388b8199ba --- /dev/null +++ b/.changelog/13076.txt @@ -0,0 +1,3 @@ +```release-note:improvement +agent: logs are no longer buffered at startup when logging in JSON format +``` diff --git a/.changelog/13103.txt b/.changelog/13103.txt new file mode 100644 index 00000000000..1f3a3d42369 --- /dev/null +++ b/.changelog/13103.txt @@ -0,0 +1,3 @@ +```release-note:improvement +cli: warn destructive update only when count is greater than 1 +``` diff --git a/.changelog/13104.txt b/.changelog/13104.txt new file mode 100644 index 00000000000..3b383c00e96 --- /dev/null +++ b/.changelog/13104.txt @@ -0,0 +1,3 @@ +```release-note:bug +core: Fixed a bug where blocked eval resources were incorrectly computed +``` diff --git a/.changelog/13125.txt b/.changelog/13125.txt new file mode 100644 index 00000000000..f0ffce55e94 --- /dev/null +++ b/.changelog/13125.txt @@ -0,0 +1,3 @@ +```release-note:bug +connect: Added missing ability of setting Connect upstream destination namespace +``` diff --git a/.changelog/13182.txt b/.changelog/13182.txt new file mode 100644 index 00000000000..31d0e9d03d4 --- /dev/null +++ b/.changelog/13182.txt @@ -0,0 +1,3 @@ +```release-note:improvement +fingerprint: add support for detecting kernel architecture of clients. (attribute: `kernel.arch`) +``` diff --git a/.changelog/13184.txt b/.changelog/13184.txt new file mode 100644 index 00000000000..ab657c2308b --- /dev/null +++ b/.changelog/13184.txt @@ -0,0 +1,3 @@ +```release-note:improvements +consul/connect: Allow configuring tls settings for ingress. +``` diff --git a/.changelog/13205.txt b/.changelog/13205.txt new file mode 100644 index 00000000000..9882e74b660 --- /dev/null +++ b/.changelog/13205.txt @@ -0,0 +1,3 @@ +```release-note:bug +core: Fixed a bug where an evicted batch job would not be rescheduled +``` diff --git a/.changelog/13265.txt b/.changelog/13265.txt new file mode 100644 index 00000000000..7e34eb38ae9 --- /dev/null +++ b/.changelog/13265.txt @@ -0,0 +1,3 @@ +```release-note:improvement +driver/docker: Eliminate excess Docker registry pulls for the `infra_image` when it already exists locally. +``` diff --git a/.changelog/13274.txt b/.changelog/13274.txt new file mode 100644 index 00000000000..dc5f84f6087 --- /dev/null +++ b/.changelog/13274.txt @@ -0,0 +1,3 @@ +```release-note:bug +csi: Fixed a scheduler bug where failed feasibility checks would return early and prevent processing additional nodes +``` diff --git a/.changelog/13301.txt b/.changelog/13301.txt new file mode 100644 index 00000000000..c6ee35f40bc --- /dev/null +++ b/.changelog/13301.txt @@ -0,0 +1,3 @@ +```release-note:bug +csi: Fixed a bug where volume claims on lost or garbage collected nodes could not be freed +``` diff --git a/.changelog/13340.txt b/.changelog/13340.txt new file mode 100644 index 00000000000..948ce639258 --- /dev/null +++ b/.changelog/13340.txt @@ -0,0 +1,3 @@ +```release-note:bug +csi: Fixed a regression where a timeout was introduced that prevented some plugins from running by marking them as unhealthy after 30s by introducing a configurable `health_timeout` field +``` diff --git a/.changelog/13359.txt b/.changelog/13359.txt new file mode 100644 index 00000000000..d42c98f36ee --- /dev/null +++ b/.changelog/13359.txt @@ -0,0 +1,3 @@ +```release-note:improvements +api: Added `NewSysbatchJob` helper function to create a base sysbatch job object +``` diff --git a/.changelog/13364.txt b/.changelog/13364.txt new file mode 100644 index 00000000000..7a34eda8fc2 --- /dev/null +++ b/.changelog/13364.txt @@ -0,0 +1,3 @@ +```release-note:bug +cli: Fixed a bug where the evaluation ID was not returned from `job revert` when the `-detach` flag was used +`` \ No newline at end of file diff --git a/.changelog/13405.txt b/.changelog/13405.txt new file mode 100644 index 00000000000..fac6e67f261 --- /dev/null +++ b/.changelog/13405.txt @@ -0,0 +1,3 @@ +```release-note:bug +cli: Fixed remaining bugs where the evaluation ID was not printed when the `-detach` flag was passed +`` diff --git a/.changelog/13407.txt b/.changelog/13407.txt new file mode 100644 index 00000000000..7a2fb340bac --- /dev/null +++ b/.changelog/13407.txt @@ -0,0 +1,3 @@ +```release-note:bug +core: Fixed a bug where the plan applier could deadlock if leader's state lagged behind plan's creation index for more than 5 seconds. +``` diff --git a/.changelog/13421.txt b/.changelog/13421.txt new file mode 100644 index 00000000000..d37bb2d58a2 --- /dev/null +++ b/.changelog/13421.txt @@ -0,0 +1,7 @@ +```release-note:improvement +core: automatically mark clients with recurring plan rejections as ineligible +``` + +```release-note:improvement +metrics: emit `nomad.nomad.plan.rejection_tracker.node_score` metric for the number of times a node had a plan rejection within the past time window +``` diff --git a/.changelog/13428.txt b/.changelog/13428.txt new file mode 100644 index 00000000000..70203bdbc71 --- /dev/null +++ b/.changelog/13428.txt @@ -0,0 +1,3 @@ +```release-note:bug +cni: Fixed a bug where loopback address was not set for all drivers +``` diff --git a/.changelog/13446.txt b/.changelog/13446.txt new file mode 100644 index 00000000000..7c10253f144 --- /dev/null +++ b/.changelog/13446.txt @@ -0,0 +1,3 @@ +```release-note:bug +csi: Fixed a bug where CSI hook validation would fail if all tasks didn't support CSI. +`` diff --git a/.changelog/13472.txt b/.changelog/13472.txt new file mode 100644 index 00000000000..cec378d1061 --- /dev/null +++ b/.changelog/13472.txt @@ -0,0 +1,3 @@ +```release-note:improvement +cli: `operator debug` now outputs current leader to debug bundle +``` diff --git a/.changelog/13473.txt b/.changelog/13473.txt new file mode 100644 index 00000000000..4404af1f69b --- /dev/null +++ b/.changelog/13473.txt @@ -0,0 +1,3 @@ +```release-note:bug +ui: Fixed a bug where dispatched children of parameterized jobs wouldn't display metadata or payloads +`` diff --git a/.changelog/13491.txt b/.changelog/13491.txt new file mode 100644 index 00000000000..93b778d931c --- /dev/null +++ b/.changelog/13491.txt @@ -0,0 +1,3 @@ +```release-note:improvement +deps: Updated `github.com/hashicorp/go-discover` to latest to allow setting the AWS endpoint definition +``` diff --git a/.changelog/13492.txt b/.changelog/13492.txt new file mode 100644 index 00000000000..73e01837d2b --- /dev/null +++ b/.changelog/13492.txt @@ -0,0 +1,7 @@ +```release-note:improvement +cli: Added `delete` command to the eval CLI +``` + +```release-note:improvement +agent: Added delete support to the eval HTTP API +``` diff --git a/.changelog/13493.txt b/.changelog/13493.txt new file mode 100644 index 00000000000..0d19a993807 --- /dev/null +++ b/.changelog/13493.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: Fixed a problem calculating a services namespace +``` diff --git a/.changelog/13530.txt b/.changelog/13530.txt new file mode 100644 index 00000000000..d62572144bf --- /dev/null +++ b/.changelog/13530.txt @@ -0,0 +1,7 @@ +```release-note:bug +api: Fix listing evaluations with the wildcard namespace and an ACL token +``` + +```release-note:bug +ui: Fix a bug that prevented viewing the details of an evaluation in a non-default namespace +``` diff --git a/.changelog/13551.txt b/.changelog/13551.txt new file mode 100644 index 00000000000..e31859a7033 --- /dev/null +++ b/.changelog/13551.txt @@ -0,0 +1,3 @@ +```release-note:bug +state: Fix listing evaluations from all namespaces +``` diff --git a/.changelog/13581.txt b/.changelog/13581.txt new file mode 100644 index 00000000000..2d9fef106f0 --- /dev/null +++ b/.changelog/13581.txt @@ -0,0 +1,7 @@ +```release-note:improvement +cli: display namespace and node ID in the `eval list` command and when `eval status` matches multiple evals +``` + +```release-note:improvement +cli: always display job ID and namespace in the `eval status` command +``` diff --git a/.changelog/13588.txt b/.changelog/13588.txt new file mode 100644 index 00000000000..7f694e29576 --- /dev/null +++ b/.changelog/13588.txt @@ -0,0 +1,3 @@ +```release-note:bug +ui: Fixed an issue where editing or running a job with a namespace via the UI would throw a 404 on redirect. +``` diff --git a/.changelog/13608.txt b/.changelog/13608.txt new file mode 100644 index 00000000000..43100971517 --- /dev/null +++ b/.changelog/13608.txt @@ -0,0 +1,3 @@ +```release-note:improvement +api: refactor ACL check when using the all namespaces wildcard in the job and alloc list endpoints +``` diff --git a/.changelog/13621.txt b/.changelog/13621.txt new file mode 100644 index 00000000000..acdffdf07f9 --- /dev/null +++ b/.changelog/13621.txt @@ -0,0 +1,3 @@ +```release-note:improvement +api: HTTP server now returns a 429 error code when hitting the connection limit +``` diff --git a/.changelog/13626.txt b/.changelog/13626.txt new file mode 100644 index 00000000000..9c6f8670139 --- /dev/null +++ b/.changelog/13626.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: Fixed a bug where max_kill_timeout client config was ignored +``` diff --git a/.changelog/13651.txt b/.changelog/13651.txt new file mode 100644 index 00000000000..7cce7bb42f2 --- /dev/null +++ b/.changelog/13651.txt @@ -0,0 +1,3 @@ +```release-note:bug +core: Fixed a bug where reserved ports on multiple node networks would be treated as a collision. `client.reserved.reserved_ports` is now merged into each `host_network`'s reserved ports instead of being treated as a collision. +``` diff --git a/.changelog/13656.txt b/.changelog/13656.txt new file mode 100644 index 00000000000..951a780f038 --- /dev/null +++ b/.changelog/13656.txt @@ -0,0 +1,3 @@ +```release-note:bug +cli: Fixed a bug in the names of the `node drain` and `node status` sub-commands +``` diff --git a/.changelog/13658.txt b/.changelog/13658.txt new file mode 100644 index 00000000000..b196bc500aa --- /dev/null +++ b/.changelog/13658.txt @@ -0,0 +1,3 @@ +```release-note:improvement +cli: `operator snapshot state` supports `-filter` expressions and avoids writing large temporary files +``` diff --git a/.changelog/13659.txt b/.changelog/13659.txt new file mode 100644 index 00000000000..c6ad896f565 --- /dev/null +++ b/.changelog/13659.txt @@ -0,0 +1,3 @@ +```release-note:bug +ui: Allow running jobs from a namespace-limited token +``` diff --git a/.changelog/13670.txt b/.changelog/13670.txt new file mode 100644 index 00000000000..04e3541e3dd --- /dev/null +++ b/.changelog/13670.txt @@ -0,0 +1,3 @@ +```release-note:bug +ui: Fixed a bug where task memory was reported as zero on systems using cgroups v2 +``` diff --git a/.changelog/13679.txt b/.changelog/13679.txt new file mode 100644 index 00000000000..2cef210de33 --- /dev/null +++ b/.changelog/13679.txt @@ -0,0 +1,3 @@ +```release-note:improvement +ui: Namespace filter query paramters are now isolated by route +``` diff --git a/.changelog/13737.txt b/.changelog/13737.txt new file mode 100644 index 00000000000..3130f221c01 --- /dev/null +++ b/.changelog/13737.txt @@ -0,0 +1,3 @@ +```release-note:bug +acl: Fixed a bug where the timestamp for expiring one-time tokens was not deterministic between servers +``` diff --git a/.changelog/13755.txt b/.changelog/13755.txt new file mode 100644 index 00000000000..3b1c2c051ae --- /dev/null +++ b/.changelog/13755.txt @@ -0,0 +1,3 @@ +```release-note:improvement +template: Templates support new uid/gid parameter pair +``` \ No newline at end of file diff --git a/.changelog/13786.txt b/.changelog/13786.txt new file mode 100644 index 00000000000..17d25b90bd1 --- /dev/null +++ b/.changelog/13786.txt @@ -0,0 +1,3 @@ +```release-note:bug +metrics: Fixed a bug where blocked evals with no class produced no dc:class scope metrics +``` diff --git a/.changelog/13845.txt b/.changelog/13845.txt new file mode 100644 index 00000000000..88778bcc8f4 --- /dev/null +++ b/.changelog/13845.txt @@ -0,0 +1,3 @@ +```release-note:bug +api: Fixed a nil pointer dereference when periodic jobs are missing their periodic spec +``` diff --git a/.changelog/13865.txt b/.changelog/13865.txt new file mode 100644 index 00000000000..ae38cdf0f8f --- /dev/null +++ b/.changelog/13865.txt @@ -0,0 +1,3 @@ +```release-note:bug +ui: Add missing breadcrumb in the Evaluations page. +``` diff --git a/.changelog/13866.txt b/.changelog/13866.txt new file mode 100644 index 00000000000..77d02d247f6 --- /dev/null +++ b/.changelog/13866.txt @@ -0,0 +1,3 @@ +```release-note:improvement +ui: Reorder and apply the same style to the Evaluations list page filters to match the Job list page. +``` diff --git a/.changelog/13869.txt b/.changelog/13869.txt new file mode 100644 index 00000000000..a29c7568bb8 --- /dev/null +++ b/.changelog/13869.txt @@ -0,0 +1,3 @@ +```release-note:bug +servicedisco: Fixed a bug where non-unique services would escape job validation +``` diff --git a/.changelog/13880.txt b/.changelog/13880.txt new file mode 100644 index 00000000000..31cdaca0fd7 --- /dev/null +++ b/.changelog/13880.txt @@ -0,0 +1,3 @@ +```release-note:bug +namespaces: Fixed a bug that allowed deleting a namespace that contained a CSI volume +``` diff --git a/.changelog/13907.txt b/.changelog/13907.txt new file mode 100644 index 00000000000..50dc4a0ce19 --- /dev/null +++ b/.changelog/13907.txt @@ -0,0 +1,3 @@ +```release-note:improvement +template: Expose consul-template configuration options at the client level for `nomad_retry`. +``` diff --git a/.changelog/13919.txt b/.changelog/13919.txt new file mode 100644 index 00000000000..559d948d863 --- /dev/null +++ b/.changelog/13919.txt @@ -0,0 +1,3 @@ +```release-note:improvement +csi: Add `stage_publish_base_dir` field to `csi_plugin` block to support plugins that require a specific staging/publishing directory for mounts +``` diff --git a/.changelog/13971.txt b/.changelog/13971.txt new file mode 100644 index 00000000000..3873e254e11 --- /dev/null +++ b/.changelog/13971.txt @@ -0,0 +1,3 @@ +```release-note:improvement +qemu: use shorter socket file names to reduce the chance of hitting the max path length +``` diff --git a/.changelog/13972.txt b/.changelog/13972.txt new file mode 100644 index 00000000000..330faea98a1 --- /dev/null +++ b/.changelog/13972.txt @@ -0,0 +1,3 @@ +```release-note:improvement +template: add script change_mode that allows scripts to be executed on template change +``` \ No newline at end of file diff --git a/.changelog/14000.txt b/.changelog/14000.txt new file mode 100644 index 00000000000..3db5ca3b648 --- /dev/null +++ b/.changelog/14000.txt @@ -0,0 +1,3 @@ +```release-note:bug +qemu: restore the monitor socket path when restoring a QEMU task. +``` diff --git a/.changelog/14001.txt b/.changelog/14001.txt new file mode 100644 index 00000000000..026c80f8136 --- /dev/null +++ b/.changelog/14001.txt @@ -0,0 +1,3 @@ +```release-note:bug +deployments: Fixed a bug that prevented auto-approval if canaries were marked as unhealthy during deployment +``` diff --git a/.changelog/14065.txt b/.changelog/14065.txt new file mode 100644 index 00000000000..eef7084eeb2 --- /dev/null +++ b/.changelog/14065.txt @@ -0,0 +1,3 @@ +```release-note:bug +cli: Fixed a bug where job validation requeset was not sent to leader +``` diff --git a/.changelog/14069.txt b/.changelog/14069.txt new file mode 100644 index 00000000000..9076fc3d6b6 --- /dev/null +++ b/.changelog/14069.txt @@ -0,0 +1,3 @@ +```release-note:bug +cli: Fixed a bug where the memory usage reported by Allocation Resource Utilization is zero on systems using cgroups v2 +``` diff --git a/.changelog/14071.txt b/.changelog/14071.txt new file mode 100644 index 00000000000..b4f729dcf23 --- /dev/null +++ b/.changelog/14071.txt @@ -0,0 +1,3 @@ +```release-note:improvement +ui: Display different message when trying to exec into a job with no task running. +``` diff --git a/.changelog/14088.txt b/.changelog/14088.txt new file mode 100644 index 00000000000..e8963029aaa --- /dev/null +++ b/.changelog/14088.txt @@ -0,0 +1,3 @@ +```release-note:bug +cli: Fixed a bug where vault token not respected in plan command +``` diff --git a/.changelog/14089.txt b/.changelog/14089.txt new file mode 100644 index 00000000000..c2a0d3e5059 --- /dev/null +++ b/.changelog/14089.txt @@ -0,0 +1,3 @@ +```release-note:improvement +driver/docker: Added config option to disable container healthcheck +``` diff --git a/.changelog/14115.txt b/.changelog/14115.txt new file mode 100644 index 00000000000..e8a7f86b6ea --- /dev/null +++ b/.changelog/14115.txt @@ -0,0 +1,3 @@ +```release-note:bug +ui: Removes duplicate breadcrumb header when navigating from child job back to parent. +``` diff --git a/.changelog/14127.txt b/.changelog/14127.txt new file mode 100644 index 00000000000..61c0368774e --- /dev/null +++ b/.changelog/14127.txt @@ -0,0 +1,7 @@ +```release-note:improvement +client: add option to restart all tasks of an allocation, regardless of lifecycle type or state. +``` + +```release-note:improvement +client: only start poststop tasks after poststart tasks are done. +``` diff --git a/.changelog/14132.txt b/.changelog/14132.txt new file mode 100644 index 00000000000..a93b804ce49 --- /dev/null +++ b/.changelog/14132.txt @@ -0,0 +1,3 @@ +```release-note:improvement +build: update to go1.19 +``` diff --git a/.changelog/14138.txt b/.changelog/14138.txt new file mode 100644 index 00000000000..0978a97ce52 --- /dev/null +++ b/.changelog/14138.txt @@ -0,0 +1,3 @@ +```release-note:improvement +ui: add general keyboard navigation to the Nomad UI +``` \ No newline at end of file diff --git a/.changelog/14145.txt b/.changelog/14145.txt new file mode 100644 index 00000000000..5a543dac8de --- /dev/null +++ b/.changelog/14145.txt @@ -0,0 +1,3 @@ +```release-note:bug +api: cleanup whitespace from failed api response body +``` diff --git a/.changelog/14171.txt b/.changelog/14171.txt new file mode 100644 index 00000000000..ca84601d615 --- /dev/null +++ b/.changelog/14171.txt @@ -0,0 +1,3 @@ +```release-note:improvement +sentinel: add the ability to reference the namespace and Nomad acl token in policies +``` diff --git a/.changelog/14203.txt b/.changelog/14203.txt new file mode 100644 index 00000000000..f331d84c878 --- /dev/null +++ b/.changelog/14203.txt @@ -0,0 +1,3 @@ +```release-note:bug +template: Fixed a bug where job templates would use `uid` and `gid` 0 after upgrading to Nomad 1.3.3, causing tasks to fail with the error `failed looking up user: managing file ownership is not supported on Windows`. +``` diff --git a/.changelog/14223.txt b/.changelog/14223.txt new file mode 100644 index 00000000000..e8fceaa7c1c --- /dev/null +++ b/.changelog/14223.txt @@ -0,0 +1,3 @@ +```release-note:improvement +ui: Add button to restart all tasks in an allocation. +``` diff --git a/.changelog/14224.txt b/.changelog/14224.txt new file mode 100644 index 00000000000..8e05f384d30 --- /dev/null +++ b/.changelog/14224.txt @@ -0,0 +1,3 @@ +```release-note:bug +ui: Fixed a bug that caused the allocation details page to display the stats bar chart even if the task was pending. +``` diff --git a/.changelog/14230.txt b/.changelog/14230.txt new file mode 100644 index 00000000000..7bb45f94528 --- /dev/null +++ b/.changelog/14230.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: Fixed a bug where cpuset initialization would not work on first agent startup +``` diff --git a/.changelog/14248.txt b/.changelog/14248.txt new file mode 100644 index 00000000000..32b4e40e9e0 --- /dev/null +++ b/.changelog/14248.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: Fixed a bug where user lookups would hang or panic +``` diff --git a/.changelog/14297.txt b/.changelog/14297.txt new file mode 100644 index 00000000000..207eb385890 --- /dev/null +++ b/.changelog/14297.txt @@ -0,0 +1,3 @@ +```release-note:bug +client/logmon: fixed a bug where logmon cannot find nomad executable +``` diff --git a/.changelog/14298.txt b/.changelog/14298.txt new file mode 100644 index 00000000000..1072f7bebf2 --- /dev/null +++ b/.changelog/14298.txt @@ -0,0 +1,7 @@ +```release-note:bug +vault: Fixed a bug where changing the Vault configuration `namespace` field was not detected as a change during server configuration reload. +``` + +```release-note:bug +vault: Fixed a bug where Vault clients were recreated when the server configuration was reloaded, even if there were no changes to the Vault configuration. +``` diff --git a/.changelog/14333.txt b/.changelog/14333.txt new file mode 100644 index 00000000000..7d9d69f69e0 --- /dev/null +++ b/.changelog/14333.txt @@ -0,0 +1,3 @@ +```release-note:bug +cli: Fixed a bug where forcing a periodic job would fail if the job ID prefix-matched other periodic jobs +``` diff --git a/.changelog/14371.txt b/.changelog/14371.txt new file mode 100644 index 00000000000..b4bdc97236e --- /dev/null +++ b/.changelog/14371.txt @@ -0,0 +1,3 @@ +```release-note:improvement +cgroups: use cgroup.kill interface file when using cgroups v2 +``` diff --git a/.changelog/14374.txt b/.changelog/14374.txt new file mode 100644 index 00000000000..ab33fd6d6a8 --- /dev/null +++ b/.changelog/14374.txt @@ -0,0 +1,3 @@ +```release-note:bug +template: Fixed a bug that could cause Nomad to panic when using `change_mode = "script"` +``` diff --git a/.changelog/14381.txt b/.changelog/14381.txt new file mode 100644 index 00000000000..60006c7ab3a --- /dev/null +++ b/.changelog/14381.txt @@ -0,0 +1,3 @@ +```release-note:bug +ui: Revert a change that resulted in UI errors when ACLs were not used. +``` diff --git a/.changelog/14424.txt b/.changelog/14424.txt new file mode 100644 index 00000000000..2b93a039060 --- /dev/null +++ b/.changelog/14424.txt @@ -0,0 +1,3 @@ +```release-note:bug +cli: Ignore Vault token when generating job diff. +``` diff --git a/.changelog/14426.txt b/.changelog/14426.txt new file mode 100644 index 00000000000..96b21b059d9 --- /dev/null +++ b/.changelog/14426.txt @@ -0,0 +1,7 @@ +```release-note:improvement +cli: ignore `-hcl2-strict` when -hcl1 is set. +``` + +```release-note:bug +cli: return exit code `255` when `nomad job plan` fails job validation. +``` diff --git a/.changelog/14431.txt b/.changelog/14431.txt new file mode 100644 index 00000000000..f6a1a7a5065 --- /dev/null +++ b/.changelog/14431.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: Fixed bug where clients could attempt to connect to servers with invalid addresses retrieved from Consul. +``` diff --git a/.changelog/14445.txt b/.changelog/14445.txt new file mode 100644 index 00000000000..611fddc2827 --- /dev/null +++ b/.changelog/14445.txt @@ -0,0 +1,3 @@ +```release-note:improvement +consul: Allow interpolation of task environment values into Consul Service Mesh configuration +``` diff --git a/.changelog/14457.txt b/.changelog/14457.txt new file mode 100644 index 00000000000..0bc5278a3ee --- /dev/null +++ b/.changelog/14457.txt @@ -0,0 +1,3 @@ +```release-note:bug +ui: Remove extra space when displaying the version in the menu footer. +``` diff --git a/.changelog/14483.txt b/.changelog/14483.txt new file mode 100644 index 00000000000..07a3e141de1 --- /dev/null +++ b/.changelog/14483.txt @@ -0,0 +1,3 @@ +```release-note:bug +metrics: Update client `node_scheduling_eligibility` value with server heartbeats. +``` diff --git a/.changelog/14484.txt b/.changelog/14484.txt new file mode 100644 index 00000000000..305e9b065ad --- /dev/null +++ b/.changelog/14484.txt @@ -0,0 +1,11 @@ +```release-note:bug +csi: Fixed a bug where the server would not send controller unpublish for a failed allocation. +``` + +```release-note:bug +csi: Fixed a data race in the volume unpublish endpoint that could result in claims being incorrectly marked as freed before being persisted to raft. +``` + +```release-note:bug +api: Fixed a bug where the List Volume API did not include the `ControllerRequired` and `ResourceExhausted` fields. +``` diff --git a/.changelog/14497.txt b/.changelog/14497.txt new file mode 100644 index 00000000000..4b233f0acb0 --- /dev/null +++ b/.changelog/14497.txt @@ -0,0 +1,3 @@ +```release-note:bug +helpers: Fixed a bug where random stagger func did not protect against negative inputs +``` diff --git a/.changelog/14519.txt b/.changelog/14519.txt new file mode 100644 index 00000000000..8eae18caf7b --- /dev/null +++ b/.changelog/14519.txt @@ -0,0 +1,3 @@ +```release-note:bug +rpc: check for spec changes in all regions when registering multiregion jobs +``` diff --git a/.changelog/14629.txt b/.changelog/14629.txt new file mode 100644 index 00000000000..0ee3d074b1c --- /dev/null +++ b/.changelog/14629.txt @@ -0,0 +1,3 @@ +```release-note:improvement +api: return a more descriptive error when /v1/acl/bootstrap fails to decode request body +``` diff --git a/.changelog/14634.txt b/.changelog/14634.txt new file mode 100644 index 00000000000..ccab596c855 --- /dev/null +++ b/.changelog/14634.txt @@ -0,0 +1,3 @@ +```release-note:bug +cli: set content length on POST requests when using the `nomad operator api` command +``` diff --git a/.changelog/14635.txt b/.changelog/14635.txt new file mode 100644 index 00000000000..8830ed3e3be --- /dev/null +++ b/.changelog/14635.txt @@ -0,0 +1,3 @@ +```release-note:bug +cli: fixed a bug in the `operator api` command where the HTTPS scheme was not always correctly calculated +``` diff --git a/.changelog/14637.txt b/.changelog/14637.txt new file mode 100644 index 00000000000..7e01d444d33 --- /dev/null +++ b/.changelog/14637.txt @@ -0,0 +1,3 @@ +```release-note:bug +jobspec: Fixed a bug where an `artifact` with `headers` configuration would fail to parse when using HCLv1 +``` diff --git a/.changelog/14659.txt b/.changelog/14659.txt new file mode 100644 index 00000000000..a8de1aba1fb --- /dev/null +++ b/.changelog/14659.txt @@ -0,0 +1,3 @@ +```release-note:bug +scheduler: Fixed bug where the scheduler would treat multiregion jobs as paused for job types that don't use deployments +``` diff --git a/.changelog/14675.txt b/.changelog/14675.txt new file mode 100644 index 00000000000..2efa8ce2bb1 --- /dev/null +++ b/.changelog/14675.txt @@ -0,0 +1,3 @@ +```release-note:bug +csi: Fixed a bug where a volume that was successfully unmounted by the client but then failed controller unpublishing would not be marked free until garbage collection ran. +``` diff --git a/.changelog/14696.txt b/.changelog/14696.txt new file mode 100644 index 00000000000..f1b4af4b453 --- /dev/null +++ b/.changelog/14696.txt @@ -0,0 +1,3 @@ +```release-note:security +client: recover from panics caused by artifact download to prevent the Nomad client from crashing +``` diff --git a/.changelog/14749.txt b/.changelog/14749.txt new file mode 100644 index 00000000000..416bd366409 --- /dev/null +++ b/.changelog/14749.txt @@ -0,0 +1,3 @@ +```release-note:bug +template: Fixed a bug where the `splay` timeout was not being applied when `change_mode` was set to `script`. +``` diff --git a/.changelog/14798.txt b/.changelog/14798.txt new file mode 100644 index 00000000000..5a64b1ad640 --- /dev/null +++ b/.changelog/14798.txt @@ -0,0 +1,3 @@ +```release-note:bug-fix +ui: always show ports on allocations page +``` diff --git a/.changelog/14875.txt b/.changelog/14875.txt new file mode 100644 index 00000000000..bef53c031f1 --- /dev/null +++ b/.changelog/14875.txt @@ -0,0 +1,3 @@ +```release-note:improvement +docker: improve memory usage for docker_logger +``` diff --git a/.changelog/14911.txt b/.changelog/14911.txt new file mode 100644 index 00000000000..4e8562800f8 --- /dev/null +++ b/.changelog/14911.txt @@ -0,0 +1,6 @@ +```release-note:bug +acl: Fixed a bug where Nomad version checking for one-time tokens was enforced across regions +``` +```release-note:bug +scheduler: Fixed a bug where version checking for disconnected clients handling was enforced across regions +``` diff --git a/.changelog/14917.txt b/.changelog/14917.txt new file mode 100644 index 00000000000..502aafb7deb --- /dev/null +++ b/.changelog/14917.txt @@ -0,0 +1,3 @@ +```release-note:bug +consul: Fixed a bug where services continuously re-registered +``` diff --git a/.changelog/14924.txt b/.changelog/14924.txt new file mode 100644 index 00000000000..3e7d61ff842 --- /dev/null +++ b/.changelog/14924.txt @@ -0,0 +1,4 @@ +```release-note:bug +nomad native service discovery: Ensure all local servers meet v.1.3.0 minimum before service registrations can be written +``` + diff --git a/.changelog/14944.txt b/.changelog/14944.txt new file mode 100644 index 00000000000..0f5a6358b22 --- /dev/null +++ b/.changelog/14944.txt @@ -0,0 +1,3 @@ +```release-note:bug +consul: atomically register checks on initial service registration +``` diff --git a/.changelog/14983.txt b/.changelog/14983.txt new file mode 100644 index 00000000000..56b020b7b2c --- /dev/null +++ b/.changelog/14983.txt @@ -0,0 +1,3 @@ +```release-note:improvement +deps: update go-memdb for goroutine leak fix +``` diff --git a/.changelog/15021.txt b/.changelog/15021.txt new file mode 100644 index 00000000000..33671b10453 --- /dev/null +++ b/.changelog/15021.txt @@ -0,0 +1,3 @@ +```release-note:bug +deps: Update hashicorp/raft to v1.3.11; fixes unstable leadership on server removal +``` diff --git a/.changelog/15027.txt b/.changelog/15027.txt new file mode 100644 index 00000000000..9f5f5f0e5cc --- /dev/null +++ b/.changelog/15027.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: Fixed a bug where Nomad could not detect cores on recent RHEL systems +``` diff --git a/.changelog/15068.txt b/.changelog/15068.txt new file mode 100644 index 00000000000..4e587b83784 --- /dev/null +++ b/.changelog/15068.txt @@ -0,0 +1,7 @@ +```release-note:bug +scheduler: Fixed a bug that prevented disconnected allocations to be updated after they reconnect. +``` + +```release-note:bug +scheduler: Prevent unnecessary placements when disconnected allocations reconnect. +``` diff --git a/.changelog/15096.txt b/.changelog/15096.txt new file mode 100644 index 00000000000..695aac420e2 --- /dev/null +++ b/.changelog/15096.txt @@ -0,0 +1,3 @@ +```release-note:bug +drivers: pass missing `propagation_mode` configuration for volume mounts to external plugins +``` diff --git a/.changelog/15097.txt b/.changelog/15097.txt new file mode 100644 index 00000000000..15a495d9aa5 --- /dev/null +++ b/.changelog/15097.txt @@ -0,0 +1,3 @@ +```release-note:breaking-change +core: Ensure no leakage of evaluations for batch jobs. Prior to this change allocations and evaluations for batch jobs were never garbage collected until the batch job was explicitly stopped. The new `batch_eval_gc_threshold` server configuration controls how often they are collected. The default threshold is `24h`. +``` diff --git a/.changelog/15101.txt b/.changelog/15101.txt new file mode 100644 index 00000000000..c76126f7918 --- /dev/null +++ b/.changelog/15101.txt @@ -0,0 +1,3 @@ +```release-note:bug +csi: Fixed race condition that can cause a panic when volume is garbage collected +``` diff --git a/.changelog/15125.txt b/.changelog/15125.txt new file mode 100644 index 00000000000..23cd7abd0cd --- /dev/null +++ b/.changelog/15125.txt @@ -0,0 +1,3 @@ +```release-note:bug +device: Fixed a bug where device plugins would not fingerprint on startup +``` diff --git a/.changelog/15134.txt b/.changelog/15134.txt new file mode 100644 index 00000000000..3e4f358e74a --- /dev/null +++ b/.changelog/15134.txt @@ -0,0 +1,3 @@ +```release-note:bug +cleanup: fixed missing timer.Reset for plan queue stat emitter +``` \ No newline at end of file diff --git a/.changelog/15140.txt b/.changelog/15140.txt new file mode 100644 index 00000000000..d6ffc32110d --- /dev/null +++ b/.changelog/15140.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: prevent allocations from failing on client reconnect by retrying RPC requests when no servers are available yet +``` diff --git a/.changelog/15180.txt b/.changelog/15180.txt new file mode 100644 index 00000000000..342a238c717 --- /dev/null +++ b/.changelog/15180.txt @@ -0,0 +1,3 @@ +```release-note:bug +drivers: Fixed a bug where one goroutine was leaked per task +``` diff --git a/.changelog/15192.txt b/.changelog/15192.txt new file mode 100644 index 00000000000..abe797edd61 --- /dev/null +++ b/.changelog/15192.txt @@ -0,0 +1,3 @@ +```release-note:bug +template: Fixed a bug where template could cause agent panic on startup +``` diff --git a/.changelog/15214.txt b/.changelog/15214.txt new file mode 100644 index 00000000000..222889a0151 --- /dev/null +++ b/.changelog/15214.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: fixed a bug where non-`docker` tasks with network isolation would leak network namespaces and iptables rules if the client was restarted while they were running +``` diff --git a/.changelog/15215.txt b/.changelog/15215.txt new file mode 100644 index 00000000000..4428ce62316 --- /dev/null +++ b/.changelog/15215.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: Fixed a bug where tasks would restart without waiting for interval +``` diff --git a/.changelog/15252.txt b/.changelog/15252.txt new file mode 100644 index 00000000000..dbc28731871 --- /dev/null +++ b/.changelog/15252.txt @@ -0,0 +1,3 @@ +```release-note:bug +api: Ensure all request body decode errors return a 400 status code +``` diff --git a/.changelog/15309.txt b/.changelog/15309.txt new file mode 100644 index 00000000000..73621fea253 --- /dev/null +++ b/.changelog/15309.txt @@ -0,0 +1,3 @@ +```release-note:bug +fingerprint: Ensure Nomad can correctly fingerprint Consul gRPC where the Consul agent is running v1.14.0 or greater +``` diff --git a/.changelog/15325.txt b/.changelog/15325.txt new file mode 100644 index 00000000000..1ecec04c308 --- /dev/null +++ b/.changelog/15325.txt @@ -0,0 +1,3 @@ +```release-note:bug +scheduler (Enterprise): Fixed a bug that prevented new allocations from multiregion jobs to be placed in situations where other regions are not involved, such as node updates. +``` diff --git a/.changelog/15372.txt b/.changelog/15372.txt new file mode 100644 index 00000000000..e44c391da42 --- /dev/null +++ b/.changelog/15372.txt @@ -0,0 +1,3 @@ +```release-note:bug +csi: Fixed a bug where volumes in non-default namespaces could not be scheduled for system or sysbatch jobs +``` diff --git a/.changelog/15407.txt b/.changelog/15407.txt new file mode 100644 index 00000000000..65e776a46cd --- /dev/null +++ b/.changelog/15407.txt @@ -0,0 +1,3 @@ +```release-note:improvement +client: detect and cleanup leaked iptables rules +``` diff --git a/.changelog/15411.txt b/.changelog/15411.txt new file mode 100644 index 00000000000..369cede3e2e --- /dev/null +++ b/.changelog/15411.txt @@ -0,0 +1,3 @@ +```release-note:bug +consul: Fixed a bug where services would continuously re-register when using ipv6 +``` diff --git a/.changelog/15452.txt b/.changelog/15452.txt new file mode 100644 index 00000000000..5221daa9143 --- /dev/null +++ b/.changelog/15452.txt @@ -0,0 +1,3 @@ +```release-note:improvement +fingerprint: Detect CNI plugins and set versions as node attributes +``` diff --git a/.changelog/15495.txt b/.changelog/15495.txt new file mode 100644 index 00000000000..77b38170ce8 --- /dev/null +++ b/.changelog/15495.txt @@ -0,0 +1,3 @@ +```release-note:bug +event stream: Fixed a bug where undefined ACL policies on the request's ACL would result in incorrect authentication errors +``` diff --git a/.changelog/15518.txt b/.changelog/15518.txt new file mode 100644 index 00000000000..d081950b9b4 --- /dev/null +++ b/.changelog/15518.txt @@ -0,0 +1,3 @@ +```release-note:bug +csi: Fixed a bug where a crashing plugin could panic the Nomad client +``` diff --git a/.changelog/15541.txt b/.changelog/15541.txt new file mode 100644 index 00000000000..ed1e9a7c950 --- /dev/null +++ b/.changelog/15541.txt @@ -0,0 +1,11 @@ +```release-note:bug +api: Fixed a bug where exposeConfig field was not provided correctly when getting the jobs via the API +``` + +```release-note:deprecation +api: The connect `ConsulProxy.ExposeConfig` field is deprecated in favor of `ConsulProxy.Expose` +``` + +```release-note:deprecation +api: The connect `ConsulExposeConfig.Path` field is deprecated in favor of `ConsulExposeConfig.Paths` +``` diff --git a/.changelog/15605.txt b/.changelog/15605.txt new file mode 100644 index 00000000000..b8906939923 --- /dev/null +++ b/.changelog/15605.txt @@ -0,0 +1,3 @@ +```release-note:bug +api: Fix stale querystring parameter value as boolean +``` diff --git a/.changelog/15625.txt b/.changelog/15625.txt new file mode 100644 index 00000000000..2e5e82668ee --- /dev/null +++ b/.changelog/15625.txt @@ -0,0 +1,3 @@ +```release-note:bug +agent: Make agent syslog log level follow log_level config +``` diff --git a/.changelog/15626.txt b/.changelog/15626.txt new file mode 100644 index 00000000000..2eecd7f91b3 --- /dev/null +++ b/.changelog/15626.txt @@ -0,0 +1,3 @@ +```release-note:bug +fix: Add the missing option propagation_mode for volume_mount +``` diff --git a/.changelog/15670.txt b/.changelog/15670.txt new file mode 100644 index 00000000000..3d227f937cf --- /dev/null +++ b/.changelog/15670.txt @@ -0,0 +1,3 @@ +```release-note:bug +csi: Fixed a bug where secrets that include '=' were incorrectly rejected +``` diff --git a/.changelog/15701.txt b/.changelog/15701.txt new file mode 100644 index 00000000000..bc358d2b1fe --- /dev/null +++ b/.changelog/15701.txt @@ -0,0 +1,3 @@ +```release-note:improvement +consul: add client configuration for grpc_ca_file +``` diff --git a/.changelog/15732.txt b/.changelog/15732.txt new file mode 100644 index 00000000000..b9e285e0e94 --- /dev/null +++ b/.changelog/15732.txt @@ -0,0 +1,3 @@ +```release-note:bug +docker: configure restart policy for bridge network pause container +``` diff --git a/.changelog/15749.txt b/.changelog/15749.txt new file mode 100644 index 00000000000..f8dfda28567 --- /dev/null +++ b/.changelog/15749.txt @@ -0,0 +1,3 @@ +```release-note:bug +connect: ingress http/2/grpc listeners may exclude hosts +``` diff --git a/.changelog/15769.txt b/.changelog/15769.txt new file mode 100644 index 00000000000..5d05fbc1230 --- /dev/null +++ b/.changelog/15769.txt @@ -0,0 +1,3 @@ +```release-note:improvement +build: Update to go1.19.5 +``` diff --git a/.changelog/15770.txt b/.changelog/15770.txt new file mode 100644 index 00000000000..b60ae5b7a02 --- /dev/null +++ b/.changelog/15770.txt @@ -0,0 +1,3 @@ +```release-note:improvement +env/ec2: update cpu metadata +``` diff --git a/.changelog/15808.txt b/.changelog/15808.txt new file mode 100644 index 00000000000..36e2026b5f6 --- /dev/null +++ b/.changelog/15808.txt @@ -0,0 +1,3 @@ +```release-note:bug +core: enforce strict ordering that node status updates are recorded after allocation updates for reconnecting clients +``` diff --git a/.changelog/15898.txt b/.changelog/15898.txt new file mode 100644 index 00000000000..64d96190d93 --- /dev/null +++ b/.changelog/15898.txt @@ -0,0 +1,3 @@ +```release-note:bug +docker: Fixed a bug where infra_image did not get alloc_id label +``` diff --git a/.changelog/15906.txt b/.changelog/15906.txt new file mode 100644 index 00000000000..0c330a76be4 --- /dev/null +++ b/.changelog/15906.txt @@ -0,0 +1,3 @@ +```release-note:bug +ui: Fix navigation to pages for jobs that are not in the default namespace +``` diff --git a/.changelog/15909.txt b/.changelog/15909.txt new file mode 100644 index 00000000000..f0aef43dcf2 --- /dev/null +++ b/.changelog/15909.txt @@ -0,0 +1,3 @@ +```release-note:bug +ui: Fix allocation memory chart to display the same value as the CLI +``` diff --git a/.changelog/15915.txt b/.changelog/15915.txt new file mode 100644 index 00000000000..cb6ccb4ba2c --- /dev/null +++ b/.changelog/15915.txt @@ -0,0 +1,3 @@ +```release-note:bug +template: Fixed a bug that caused the chage script to fail to run +``` diff --git a/.changelog/15928.txt b/.changelog/15928.txt new file mode 100644 index 00000000000..8d334b18463 --- /dev/null +++ b/.changelog/15928.txt @@ -0,0 +1,3 @@ +```release-note:bug +consul: Fixed a bug where acceptable service identity on Consul token was not accepted +``` diff --git a/.changelog/15962.txt b/.changelog/15962.txt new file mode 100644 index 00000000000..602aedc969d --- /dev/null +++ b/.changelog/15962.txt @@ -0,0 +1,3 @@ +```release-note:bug +docker: Fixed a bug where images referenced by multiple tags would not be GC'd +``` diff --git a/.changelog/15996.txt b/.changelog/15996.txt new file mode 100644 index 00000000000..5e3bc839d1c --- /dev/null +++ b/.changelog/15996.txt @@ -0,0 +1,3 @@ +```release-note:bug +consul: Fixed a bug where consul token was not respected when reverting a job +``` diff --git a/.changelog/16000.txt b/.changelog/16000.txt new file mode 100644 index 00000000000..1600e4865f3 --- /dev/null +++ b/.changelog/16000.txt @@ -0,0 +1,3 @@ +```release-note:bug +acl: Fixed a bug where creating/updating a policy which was invalid would return a 404 status code, not a 400 +``` diff --git a/.changelog/16011.txt b/.changelog/16011.txt new file mode 100644 index 00000000000..9e0e4f23ca6 --- /dev/null +++ b/.changelog/16011.txt @@ -0,0 +1,3 @@ +```release-note:bug +cli: Fixed a panic in `deployment status` when rollback deployments are slow to appear +``` diff --git a/.changelog/16018.txt b/.changelog/16018.txt new file mode 100644 index 00000000000..2f3355538a9 --- /dev/null +++ b/.changelog/16018.txt @@ -0,0 +1,3 @@ +```release-note:bug +parser: Fixed a panic in the job spec parser when a variable validation block was missing its condition +``` diff --git a/.changelog/16029.txt b/.changelog/16029.txt new file mode 100644 index 00000000000..5e6d05904d3 --- /dev/null +++ b/.changelog/16029.txt @@ -0,0 +1,3 @@ +```release-note:improvement +build: Update to go1.20 +``` diff --git a/.changelog/16030.txt b/.changelog/16030.txt new file mode 100644 index 00000000000..4649a508c77 --- /dev/null +++ b/.changelog/16030.txt @@ -0,0 +1,3 @@ +```release-note:bug +volumes: Fixed a bug where `per_alloc` was allowed for volume blocks on system and sysbatch jobs, which do not have an allocation index +``` diff --git a/.changelog/16126.txt b/.changelog/16126.txt new file mode 100644 index 00000000000..f6346a3f8a3 --- /dev/null +++ b/.changelog/16126.txt @@ -0,0 +1,3 @@ +```release-note:security +artifact: Provide mitigations against unbounded artifact decompression +``` diff --git a/.changelog/16166.txt b/.changelog/16166.txt new file mode 100644 index 00000000000..1347d8d7494 --- /dev/null +++ b/.changelog/16166.txt @@ -0,0 +1,3 @@ +```release-note:bug +api: Added missing node states to NodeStatus constants +``` diff --git a/.changelog/16180.txt b/.changelog/16180.txt new file mode 100644 index 00000000000..265d62404e7 --- /dev/null +++ b/.changelog/16180.txt @@ -0,0 +1,3 @@ +```release-note:bug +cgutil: handle panic coming from runc helper method +``` diff --git a/.changelog/16182.txt b/.changelog/16182.txt new file mode 100644 index 00000000000..bd2437dfa43 --- /dev/null +++ b/.changelog/16182.txt @@ -0,0 +1,3 @@ +```release-note:security +build: Update to go1.20.1 +``` diff --git a/.changelog/16217.txt b/.changelog/16217.txt new file mode 100644 index 00000000000..0ebbc293b23 --- /dev/null +++ b/.changelog/16217.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: Fixed a bug where clients used the serf advertise address to connect to servers when using Consul auto-discovery +``` diff --git a/.changelog/16256.txt b/.changelog/16256.txt new file mode 100644 index 00000000000..8564c9b5df2 --- /dev/null +++ b/.changelog/16256.txt @@ -0,0 +1,3 @@ +```release-note:bug +csi: Fixed potential state store corruption when garbage collecting CSI volume claims or checking whether it's safe to force-deregister a volume +``` diff --git a/.changelog/16287.txt b/.changelog/16287.txt new file mode 100644 index 00000000000..5496bb2b1db --- /dev/null +++ b/.changelog/16287.txt @@ -0,0 +1,11 @@ +```release-note:bug +server: Fixed a bug where deregistering a job that was already garbage collected would create a new evaluation +``` + +```release-note:bug +server: Fixed a bug where the `system reconcile summaries` command and API would not return any scheduler-related errors +``` + +```release-note:bug +server: Fixed a bug where node updates that produced errors from service discovery or CSI plugin updates were not logged +``` diff --git a/.changelog/16289.txt b/.changelog/16289.txt new file mode 100644 index 00000000000..f11e0dd4688 --- /dev/null +++ b/.changelog/16289.txt @@ -0,0 +1,3 @@ +```release-note:bug +services: Fixed a bug where a service would be deregistered twice +``` diff --git a/.changelog/16352.txt b/.changelog/16352.txt new file mode 100644 index 00000000000..e164d900307 --- /dev/null +++ b/.changelog/16352.txt @@ -0,0 +1,3 @@ +```release-note:bug +docker: Fixed a bug where pause containers would be erroneously removed +``` diff --git a/.changelog/16401.txt b/.changelog/16401.txt new file mode 100644 index 00000000000..a737f43e2b9 --- /dev/null +++ b/.changelog/16401.txt @@ -0,0 +1,3 @@ +```release-note:bug +scheduler: Fixed a bug where collisions in dynamic port offerings would result in spurious plan-for-node-rejected errors +``` diff --git a/.changelog/16417.txt b/.changelog/16417.txt new file mode 100644 index 00000000000..b60ae5b7a02 --- /dev/null +++ b/.changelog/16417.txt @@ -0,0 +1,3 @@ +```release-note:improvement +env/ec2: update cpu metadata +``` diff --git a/.changelog/16427.txt b/.changelog/16427.txt new file mode 100644 index 00000000000..5f5ffbcc4bd --- /dev/null +++ b/.changelog/16427.txt @@ -0,0 +1,3 @@ +```release-note:improvement +build: Update to go1.20.2 +``` diff --git a/.changelog/16434.txt b/.changelog/16434.txt new file mode 100644 index 00000000000..39b3031d47f --- /dev/null +++ b/.changelog/16434.txt @@ -0,0 +1,3 @@ +```release-note:bug +plugin: Add missing fields to `TaskConfig` so they can be accessed by external task drivers +``` diff --git a/.changelog/16467.txt b/.changelog/16467.txt new file mode 100644 index 00000000000..317bbe11c40 --- /dev/null +++ b/.changelog/16467.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: Fixed a bug where cpuset initialization fails after Client restart +``` diff --git a/.changelog/16490.txt b/.changelog/16490.txt new file mode 100644 index 00000000000..42584d3f746 --- /dev/null +++ b/.changelog/16490.txt @@ -0,0 +1,3 @@ +```release-note:bug +client: Fixed a bug where clients using Consul discovery to join the cluster would get permission denied errors +``` diff --git a/.changelog/16609.txt b/.changelog/16609.txt new file mode 100644 index 00000000000..61de306525c --- /dev/null +++ b/.changelog/16609.txt @@ -0,0 +1,3 @@ +```release-note:bug +scheduler: Fix reconciliation of reconnecting allocs when the replacement allocations are not running +``` diff --git a/.changelog/_839.txt b/.changelog/_839.txt new file mode 100644 index 00000000000..90e5933bf06 --- /dev/null +++ b/.changelog/_839.txt @@ -0,0 +1,3 @@ +```release-note: bug +quotas (Enterprise): Fixed a server crashing panic when updating and checking a quota concurrently. +``` diff --git a/.circleci/config.yml b/.circleci/config.yml index bb58b0ac7d0..16a02456c30 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -158,43 +158,6 @@ commands: fi jobs: - website-docker-image: - docker: - - image: docker.mirror.hashicorp.services/circleci/buildpack-deps - shell: /usr/bin/env bash -euo pipefail -c - steps: - - checkout - - run: - name: Skip building if nothing changed - command: | - # There is an edge case that would cause an issue here - if dependencies are updated to an exact copy - # of a previous version, for example if packge-lock.json is reverted, we need to manually push the new - # image to the "latest" tag - # Ignore job if running an enterprise build - IMAGE_TAG=$(cat website/Dockerfile website/package-lock.json | sha256sum | awk '{print $1;}') - echo "Using $IMAGE_TAG" - - if [ "$CIRCLE_REPOSITORY_URL" != "git@github.com:hashicorp/nomad.git" ]; then - echo "Not Nomad OSS Repo, not building website docker image" - circleci-agent step halt - elif curl https://hub.docker.com/v2/repositories/hashicorp/nomad-website/tags/$IMAGE_TAG -fsL > /dev/null; then - echo "Dependencies have not changed, not building a new website docker image." - circleci-agent step halt - fi - - - setup_remote_docker - - run: - name: Build Docker Image - command: | - IMAGE_TAG=$(cat website/Dockerfile website/package-lock.json | sha256sum | awk '{print $1;}') - echo "Using $IMAGE_TAG" - - cd website/ - docker build -t hashicorp/nomad-website:$IMAGE_TAG . - docker tag hashicorp/nomad-website:$IMAGE_TAG hashicorp/nomad-website:latest - docker login -u $WEBSITE_DOCKER_USER -p $WEBSITE_DOCKER_PASS - docker push hashicorp/nomad-website - test-windows: executor: go-windows @@ -208,7 +171,11 @@ jobs: mkdir -p $GOTESTSUM_PATH - install-golang: target_directory: "c:" - - run: go version + - run: + name: Show installed Go version + command: | + export PATH=/c/go/bin:/c/gopath/bin:$PATH + go version - install-vault: version: $VAULT_VERSION - run: vault version @@ -216,21 +183,21 @@ jobs: - run: name: Install golang dependencies command: | - export PATH=$PATH:/c/go/bin:/c/gopath/bin + export PATH=/c/go/bin:/c/gopath/bin:$PATH make deps - run: name: Pre-download docker test image - command: docker pull docker.mirror.hashicorp.services/hashicorpnomad/busybox-windows:server2016-0.1 + command: docker pull docker.mirror.hashicorp.services/hashicorpdev/busybox-windows:server2016-0.1 - run: name: Build nomad command: | - export PATH=$PATH:/c/go/bin:/c/gopath/bin + export PATH=/c/go/bin:/c/gopath/bin:$PATH go build -o $GOBIN\nomad.exe - run: name: Run tests with gotestsum command: | # Only test docker driver tests for now - export PATH=$PATH:/c/go/bin:/c/gopath/bin + export PATH=/c/go/bin:/c/gopath/bin:$PATH gotestsum --format=short-verbose \ --junitfile $GOTESTSUM_PATH/results.xml \ github.com/hashicorp/nomad/drivers/docker \ @@ -243,7 +210,7 @@ jobs: path: c:\tmp\test-reports test-ui: docker: - - image: docker.mirror.hashicorp.services/circleci/node:12-browsers + - image: docker.mirror.hashicorp.services/circleci/node:14-browsers environment: # See https://git.io/vdao3 for details. JOBS: 2 @@ -416,6 +383,7 @@ jobs: ./scripts/vagrant-linux-unpriv-ui.sh export PATH="$GOPATH/bin:/usr/local/go/bin:$PATH" + source ${BASH_ENV} . ~/.nvm/nvm.sh cd ui && yarn install --frozen-lockfile && cd .. @@ -489,27 +457,11 @@ jobs: - store_artifacts: path: /tmp/ui-assets destination: /ui-assets - algolia_index: - docker: - - image: docker.mirror.hashicorp.services/node:14 - steps: - - checkout - - run: - name: Push content to Algolia Index - command: | - if [ "$CIRCLE_REPOSITORY_URL" != "git@github.com:hashicorp/nomad.git" ]; then - echo "Not Nomad OSS Repo, not indexing Algolia" - exit 0 - fi - - cd website/ - npm install - node scripts/index_search_content.js executors: go: working_directory: /go/src/github.com/hashicorp/nomad docker: - - image: docker.mirror.hashicorp.services/golang:1.17.9 + - image: docker.mirror.hashicorp.services/golang:1.20.2 resource_class: medium environment: <<: *common_envs @@ -522,16 +474,16 @@ executors: resource_class: large environment: &machine_env <<: *common_envs - GOLANG_VERSION: 1.17.9 + GOLANG_VERSION: 1.20.2 go-macos: working_directory: ~/go/src/github.com/hashicorp/nomad macos: - xcode: 12.4.0 + xcode: 13.4.1 environment: <<: *common_envs GOPATH: /Users/distiller/go - GOLANG_VERSION: 1.17.9 + GOLANG_VERSION: 1.20.2 go-windows: machine: @@ -543,7 +495,7 @@ executors: GOPATH: c:\gopath GOBIN: c:\gopath\bin GOTESTSUM_PATH: c:\tmp\test-reports - GOLANG_VERSION: 1.17.9 + GOLANG_VERSION: 1.20.2 GOTESTSUM_VERSION: 1.7.0 VAULT_VERSION: 1.4.1 @@ -568,10 +520,6 @@ workflows: - /^backport/docs-.*/ - stable-website - # Note: comment-out this job in ENT - - build-darwin-binaries: - filters: *backend_check_branches_filter - - test-e2e: filters: *backend_check_branches_filter @@ -594,68 +542,3 @@ workflows: - /^backport/docs-.*/ - /^e2e-.*/ - stable-website - - - test-machine: - name: "test-client" - test_packages: "./client/..." - # test branches are the branches that can impact unit tests - filters: &backend_test_branches_filter - branches: - ignore: - - /^.-ui\b.*/ - - /^docs-.*/ - - /^backport/docs-.*/ - - /^e2e-.*/ - - stable-website - - test-machine: - name: "test-nomad" - test_packages: "./nomad/..." - filters: *backend_test_branches_filter - - test-machine: - # API Tests run in a VM rather than container due to the FS tests - # requiring `mount` priviliges. - name: "test-api" - test_module: "api" - filters: *backend_test_branches_filter - enable_race_testing: true - - test-machine: - name: "test-other" - exclude_packages: "./api|./client|./drivers/docker|./drivers/exec|./drivers/shared/executor|./nomad|./e2e" - filters: *backend_test_branches_filter - - test-machine: - name: "test-docker" - test_packages: "./drivers/docker" - executor: go-machine - filters: *backend_test_branches_filter - - test-machine: - name: "test-exec" - test_packages: "./drivers/exec" - filters: *backend_test_branches_filter - - test-machine: - name: "test-shared-exec" - test_packages: "./drivers/shared/executor" - filters: *backend_test_branches_filter - - test-machine: - name: "test-32bit" - # Currently we only explicitly test fingerprinting on 32bit - # architectures. - test_packages: "./client/fingerprint" - goarch: "386" - filters: *backend_test_branches_filter - website: - when: - equal: [ "https://github.com/hashicorp/nomad", << pipeline.project.git_url >> ] - jobs: - - website-docker-image: - context: static-sites - filters: - branches: - only: - - main - - - algolia_index: - context: static-sites - filters: - branches: - only: - - stable-website diff --git a/.github/workflows/backport.yml b/.github/workflows/backport.yml index 98a4916236b..a6e0121ca40 100644 --- a/.github/workflows/backport.yml +++ b/.github/workflows/backport.yml @@ -15,33 +15,55 @@ jobs: steps: - name: Backport changes to stable-website run: | - backport-assistant backport -automerge + backport-assistant backport -merge-method=squash -automerge env: BACKPORT_LABEL_REGEXP: "backport/(?Pwebsite)" BACKPORT_TARGET_TEMPLATE: "stable-{{.target}}" GITHUB_TOKEN: ${{ secrets.ELEVATED_GITHUB_TOKEN }} - - name: Backport changes to latest release branch - run: | - resp=$(curl -f -s "https://api.github.com/repos/$GITHUB_REPOSITORY/labels?per_page=100") - ret="$?" - if [[ "$ret" -ne 0 ]]; then - echo "The GitHub API returned $ret" - exit $ret - fi - # get the latest backport label excluding any website labels, ex: `backport/0.3.x` and not `backport/website` - latest_backport_label=$(echo "$resp" | jq -r '.[] | select(.name | (startswith("backport/") and (contains("website") | not))) | .name' | sort -rV | head -n1) - echo "Latest backport label: $latest_backport_label" - # set BACKPORT_TARGET_TEMPLATE for backport-assistant - # trims backport/ from the beginning with parameter substitution - export BACKPORT_TARGET_TEMPLATE="release/${latest_backport_label#backport/}" - backport-assistant backport -automerge - env: - BACKPORT_LABEL_REGEXP: "backport/(?Pwebsite)" - GITHUB_TOKEN: ${{ secrets.ELEVATED_GITHUB_TOKEN }} - name: Backport changes to targeted release branch run: | - backport-assistant backport -automerge + backport-assistant backport -merge-method=squash -automerge env: BACKPORT_LABEL_REGEXP: "backport/(?P\\d+\\.\\d+\\.[+\\w]+)" BACKPORT_TARGET_TEMPLATE: "release/{{.target}}" GITHUB_TOKEN: ${{ secrets.ELEVATED_GITHUB_TOKEN }} + handle-failure: + needs: + - backport + if: always() && needs.backport.result == 'failure' + runs-on: ubuntu-latest + steps: + - name: Send slack notification on failure + uses: slackapi/slack-github-action@v1.23.0 + with: + payload: | + { + "text": ":x::arrow_right_hook::nomad-sob: Backport run *FAILED*", + "attachments": [ + { + "color": "#C41E3A", + "blocks": [ + { + "type": "section", + "fields": [ + { + "type": "mrkdwn", + "text": "*Pull Request:*\n<${{ github.event.pull_request.html_url}}|${{ github.repository }}#${{ github.event.pull_request.number}}>" + }, + { + "type": "mrkdwn", + "text": "*From:*\n@${{ github.event.sender.login }}" + }, + { + "type": "mrkdwn", + "text": "*Run:*\n<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|${{ github.run_id }}>" + } + ] + } + ] + } + ] + } + env: + SLACK_WEBHOOK_URL: ${{ secrets.BACKPORT_ASSISTANT_FAILURE_SLACK }} + SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e9d0033ebe3..dcfb802732a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -3,7 +3,8 @@ name: build on: push: branches: - - "main" + - main + - release/** workflow_dispatch: inputs: build-ref: @@ -23,7 +24,7 @@ env: jobs: get-go-version: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 outputs: go-version: ${{ steps.get-go-version.outputs.go-version }} steps: @@ -36,9 +37,9 @@ jobs: # version, because "goenv" can react to it automatically. run: | echo "Building with Go $(cat .go-version)" - echo "::set-output name=go-version::$(cat .go-version)" + echo "go-version=$(cat .go-version)" >> $GITHUB_OUTPUT get-product-version: - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 outputs: product-version: ${{ steps.get-product-version.outputs.product-version }} steps: @@ -49,10 +50,10 @@ jobs: id: get-product-version run: | make version - echo "::set-output name=product-version::$(make version)" + echo "product-version=$(make version)" >> $GITHUB_OUTPUT generate-metadata-file: needs: get-product-version - runs-on: ubuntu-latest + runs-on: ubuntu-20.04 outputs: filepath: ${{ steps.generate-metadata-file.outputs.filepath }} steps: @@ -72,26 +73,10 @@ jobs: with: name: metadata.json path: ${{ steps.generate-metadata-file.outputs.filepath }} - generate-ld-flags: - needs: get-product-version - runs-on: ubuntu-latest - outputs: - ldflags: ${{ steps.generate-ld-flags.outputs.ldflags }} - steps: - - uses: actions/checkout@v2 - with: - ref: ${{ github.event.inputs.build-ref }} - - name: "Generate ld flags" - id: generate-ld-flags - run: | - project="$(go list -m)" - sha="$(git rev-parse --short HEAD)" - echo "::set-output name=ldflags::"-s -w -X \'$project/version.Name=${{ env.PKG_NAME }}\' \ - -X \'$project/version.GitDescribe=v$(make version base=1)\'"" build-other: - needs: [get-go-version, get-product-version, generate-ld-flags] - runs-on: ubuntu-latest + needs: [get-go-version, get-product-version] + runs-on: [ custom, linux, xxl, 20.04 ] strategy: matrix: goos: [windows] @@ -131,7 +116,6 @@ jobs: GOOS: ${{ matrix.goos }} GOARCH: ${{ matrix.goarch }} GO_TAGS: ${{ env.GO_TAGS }} - GO_LDFLAGS: ${{ needs.generate-ld-flags.outputs.ldflags }} CGO_ENABLED: 1 run: | make pkg/${{ matrix.goos }}_${{ matrix.goarch }}.zip @@ -142,8 +126,8 @@ jobs: path: ${{ env.PKG_NAME }}_${{ needs.get-product-version.outputs.product-version }}_${{ matrix.goos }}_${{ matrix.goarch }}.zip build-linux: - needs: [get-go-version, get-product-version, generate-ld-flags] - runs-on: ubuntu-latest + needs: [get-go-version, get-product-version] + runs-on: [ custom, linux, xxl, 20.04 ] strategy: matrix: goos: [linux] @@ -208,7 +192,6 @@ jobs: GOOS: ${{ matrix.goos }} GOARCH: ${{ matrix.goarch }} GO_TAGS: ${{ env.GO_TAGS }} - GO_LDFLAGS: ${{ needs.generate-ld-flags.outputs.ldflags }} CGO_ENABLED: 1 run: | make pkg/${{ matrix.goos }}_${{ matrix.goarch }}.zip @@ -233,6 +216,7 @@ jobs: rpm_depends: "openssl" config_dir: ".release/linux/package/" preinstall: ".release/linux/preinst" + postinstall: ".release/linux/postinst" postremove: ".release/linux/postrm" - name: Set Package Names @@ -251,7 +235,7 @@ jobs: path: out/${{ env.DEB_PACKAGE }} build-darwin: - needs: [get-go-version, get-product-version, generate-ld-flags] + needs: [get-go-version, get-product-version] runs-on: macos-latest strategy: matrix: @@ -293,7 +277,6 @@ jobs: GOOS: ${{ matrix.goos }} GOARCH: ${{ matrix.goarch }} GO_TAGS: "${{ env.GO_TAGS }} netcgo" - GO_LDFLAGS: ${{ needs.generate-ld-flags.outputs.ldflags }} CGO_ENABLED: 1 run: | make pkg/${{ matrix.goos }}_${{ matrix.goarch }}.zip @@ -311,7 +294,7 @@ jobs: # needs: # - get-product-version # - build - # runs-on: ubuntu-latest + # runs-on: [ custom, linux, xxl, 20.04 ] # strategy: # matrix: # arch: ["arm", "arm64", "386", "amd64"] diff --git a/.github/workflows/ember-assets.yml b/.github/workflows/ember-assets.yml index 87cb1df846c..7aabe38804f 100644 --- a/.github/workflows/ember-assets.yml +++ b/.github/workflows/ember-assets.yml @@ -17,7 +17,7 @@ jobs: - name: Use Node.js uses: actions/setup-node@v2 with: - node-version: '12' + node-version: '14' - uses: backspace/ember-asset-size-action@edit-comment with: repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/ember-test-audit.yml b/.github/workflows/ember-test-audit.yml index 05e34ffb64e..f7f29a28be4 100644 --- a/.github/workflows/ember-test-audit.yml +++ b/.github/workflows/ember-test-audit.yml @@ -21,7 +21,7 @@ jobs: - name: Use Node.js uses: actions/setup-node@v2 with: - node-version: '12' + node-version: '14' - run: yarn --frozen-lockfile - run: mkdir -p /tmp/test-reports - run: npx ember-test-audit 3 --json --output ../base-audit.json @@ -38,7 +38,7 @@ jobs: - name: Use Node.js uses: actions/setup-node@v2 with: - node-version: '12' + node-version: '14' - run: yarn --frozen-lockfile - run: mkdir -p /tmp/test-reports - run: npx ember-test-audit 3 --json --output ../pr-audit.json diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 91e8d18d850..9de1eef7632 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -11,7 +11,7 @@ on: description: 'Update CHANGELOG' required: true type: boolean - default: true + default: false notification-channel: description: 'Slack channel to use for notifications' required: false @@ -22,12 +22,47 @@ env: GO_TAGS: "release" jobs: - get-go-version: - runs-on: ubuntu-latest + prepare-release: + runs-on: ubuntu-20.04 outputs: - go-version: ${{ steps.get-go-version.outputs.go-version }} + build-ref: ${{ steps.commit-change-push.outputs.build-ref }} steps: + - name: Prevent running from main + if: ${{ github.ref_name == 'main' }} + run: | + echo "::error::Workflow not allowed to run from ${{ github.ref_name }}" + exit 1 + + - name: Print release info + run: | + echo "::notice::Release v${{ github.event.inputs.version }} from branch ${{ github.ref_name }}" + + - name: Install semver CLI + run: | + local_bin="${HOME}/.local/bin" + mkdir -p "${local_bin}" + curl -L --output "${local_bin}/semver" \ + https://raw.githubusercontent.com/fsaintjacques/semver-tool/3.3.0/src/semver + chmod +x "${local_bin}/semver" + echo "${local_bin}" >> $GITHUB_PATH + + - name: Validate release version + run: | + if [ "$(semver validate ${{ github.event.inputs.version }})" == "invalid" ]; then + echo "::error::Version ${{ github.event.inputs.version }} is invalid" + exit 1 + fi + - uses: actions/checkout@v2 + + - name: Setup Git + run: | + if [ "${{ secrets.ELEVATED_GITHUB_TOKEN }}" ]; then + git config --global url."https://${{ secrets.ELEVATED_GITHUB_TOKEN }}:@github.com/".insteadOf "https://github.com" + fi + git config --global user.email "github-team-nomad-core@hashicorp.com" + git config --global user.name "hc-github-team-nomad-core" + - name: Determine Go version id: get-go-version # We use .go-version as our source of truth for current Go @@ -36,18 +71,10 @@ jobs: echo "Building with Go $(cat .go-version)" echo "::set-output name=go-version::$(cat .go-version)" - prepare-release: - needs: get-go-version - runs-on: ubuntu-latest - outputs: - build-ref: ${{ steps.commit-change-push.outputs.build-ref }} - steps: - - uses: actions/checkout@v2 - - name: Setup go uses: actions/setup-go@v2 with: - go-version: ${{ needs.get-go-version.outputs.go-version }} + go-version: ${{ steps.get-go-version.outputs.go-version }} - name: Setup node and yarn uses: actions/setup-node@v2 @@ -64,6 +91,7 @@ jobs: make deps - name: Update notification channel + id: notification-channel if: ${{ github.event.inputs.notification-channel != '' }} run: | sed -i.bak -e 's|\(notification_channel * = *"\)[^"]*|\1${{ github.event.inputs.notification-channel }}|g' .release/ci.hcl @@ -73,8 +101,8 @@ jobs: - name: Update version file run: | NOMAD_VERSION="${{ github.event.inputs.version }}" - NOMAD_MAIN_VERSION=$(echo "$NOMAD_VERSION" | cut -d- -f1) - NOMAD_PRERELEASE_VERSION=$(echo "$NOMAD_VERSION" | sed 's|^[^-]*-\{0,1\}||g') + NOMAD_MAIN_VERSION=$(semver get release "$NOMAD_VERSION") + NOMAD_PRERELEASE_VERSION=$(semver get prerel "$NOMAD_VERSION") echo "updating version to ${NOMAD_MAIN_VERSION}-${NOMAD_PRERELEASE_VERSION}" @@ -103,10 +131,7 @@ jobs: run: | git add -A . find . -name '*.generated.go' -not -path './vendor/*' -exec git add -f '{}' \; - if ! git diff-index --quiet HEAD --; - then - git config --global user.email "github-team-nomad-core@hashicorp.com" - git config --global user.name "hc-github-team-nomad-core" + if ! git diff-index --quiet HEAD --; then git commit --message "Generate files for ${{ github.event.inputs.version }} release" git push origin "$(git rev-parse --abbrev-ref HEAD)" echo "committing generated files" @@ -122,3 +147,61 @@ jobs: token: ${{ secrets.ELEVATED_GITHUB_TOKEN}} inputs: '{"build-ref": "${{ steps.commit-change-push.outputs.build-ref }}", "make-prerelease": "false"}' ref: ${{ needs.prepare-release.outputs.build-ref }} + + - name: Revert notification channel + if: ${{ github.event.inputs.notification-channel != '' }} + run: | + git reset ${{ github.sha }} -- .release/ci.hcl + + # git reset will place the original file content in the staging area + # and leave the changes since then unstaged, so call git restore to + # discard these changes and use --cached to display the diff in the + # staging area. + git restore .release/ci.hcl + git diff --cached --color=always .release/ci.hcl + + - name: Update version file + run: | + # Only bump the Version value if this is not a pre-release. + # For final releases we want `nomad -version` to display the next + # version to indicate that the current release is done. + if [ -z "$(semver get prerel ${{ github.event.inputs.version }})" ]; then + next_version=$(semver bump patch ${{ github.event.inputs.version }}) + sed -i.bak -e "s|\(Version * = *\"\)[^\"]*|\1${next_version}|g" version/version.go + fi + # Set the VersionPrerelease variable back to dev. + sed -i.bak -e "s|\(VersionPrerelease * = *\"\)[^\"]*|\1dev|g" version/version.go + rm -rf version/version.go.bak + git diff --color=always version/version.go + + - name: Update LAST_RELEASE + run: | + # LAST_RELEASE is used to generate the new CHANGELOG entries, so it's + # only updated for final releases. + if [ -z "$(semver get prerel ${{ github.event.inputs.version }})" ]; then + sed -i.bak -re "s|^(LAST_RELEASE\s+\?=\s+v).*$|\1${{ github.event.inputs.version }}|g" GNUmakefile + rm -fr GNUmakefile.bak + git diff --color=always GNUmakefile + else + echo "Version ${{ github.event.inputs.version }} is a prerelease, skipping update of LAST_RELEASE" + fi + + - name: Remove generated files + run: | + # These generated files are only needed when building the final + # binary and should be not be present in the repository afterwards. + find . -name '*.generated.go' | xargs git rm + git status + + - name: Commit post-release changes + run: | + # Display staged and unstaged diffs, skipping deleted files to avoid + # cluttering the output with the generated files. + git diff --diff-filter=d --color=always HEAD + git add -A . + if ! git diff-index --quiet HEAD --; then + git commit --message 'Prepare for next release' + git push origin "$(git rev-parse --abbrev-ref HEAD)" + else + echo "no files were updated" + fi diff --git a/.github/workflows/test-core.yaml b/.github/workflows/test-core.yaml index 553efdf4766..04cbcae88f7 100644 --- a/.github/workflows/test-core.yaml +++ b/.github/workflows/test-core.yaml @@ -1,9 +1,25 @@ name: Core CI Tests on: + pull_request: + paths-ignore: + - 'README.md' + - 'CHANGELOG.md' + - '.changelog/**' + - '.tours/**' + - 'contributing/**' + - 'demo/**' + - 'dev/**' + - 'e2e/terraform/**' + - 'integrations/**' + - 'pkg/**' + - 'scripts/**' + - 'terraform/**' + - 'ui/**' + - 'website/**' push: - branches-ignore: + branches: - main - - release-** + - release/** paths-ignore: - 'README.md' - 'CHANGELOG.md' @@ -13,66 +29,68 @@ on: - 'demo/**' - 'dev/**' - 'e2e/terraform/**' + - 'e2e/ui/**' - 'integrations/**' - 'pkg/**' - 'scripts/**' - 'terraform/**' - 'ui/**' - 'website/**' + env: VERBOSE: 1 - GO_VERSION: 1.17.9 - GOBIN: /usr/local/bin GOTESTARCH: amd64 - CONSUL_VERSION: 1.11.3 - VAULT_VERSION: 1.9.3 + CONSUL_VERSION: 1.12.6 + VAULT_VERSION: 1.12.0 NOMAD_SLOW_TEST: 0 NOMAD_TEST_LOG_LEVEL: OFF jobs: + mods: + runs-on: ubuntu-22.04 + timeout-minutes: 10 + steps: + - uses: actions/checkout@v3 + - uses: hashicorp/setup-golang@v1 + - name: Get Go modules + run: | + make tidy + make bootstrap checks: - runs-on: ubuntu-20.04 + needs: [mods] + runs-on: ubuntu-22.04 timeout-minutes: 10 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: fetch-depth: 0 # needs tags for checkproto - - uses: magnetikonline/action-golang-cache@v1 - with: - go-version: ${{env.GO_VERSION}} - cache-key-suffix: -checks + - uses: hashicorp/setup-golang@v1 - name: Run make check run: | make missing make bootstrap make check compile: + needs: [mods, checks] strategy: fail-fast: false matrix: - os: [ubuntu-20.04, macos-11, windows-2019] + os: [ubuntu-22.04, macos-11, windows-2019] runs-on: ${{matrix.os}} timeout-minutes: 20 steps: - - uses: actions/checkout@v2 - - uses: magnetikonline/action-golang-cache@v1 - with: - go-version: ${{env.GO_VERSION}} - cache-key-suffix: -compile + - uses: actions/checkout@v3 + - uses: hashicorp/setup-golang@v1 - name: Run make dev - env: - GOBIN: ${{env.GOROOT}}/bin # windows kludge run: | make bootstrap make dev tests-api: - runs-on: ubuntu-20.04 + needs: [mods] + runs-on: [custom, xl, 22.04] timeout-minutes: 30 steps: - - uses: actions/checkout@v2 - - uses: magnetikonline/action-golang-cache@v1 - with: - go-version: ${{env.GO_VERSION}} - cache-key-suffix: -api + - uses: actions/checkout@v3 + - uses: hashicorp/setup-golang@v1 - name: Run API tests env: GOTEST_MOD: api @@ -81,72 +99,30 @@ jobs: make generate-all sudo sed -i 's!Defaults!#Defaults!g' /etc/sudoers sudo -E env "PATH=$PATH" make test-nomad-module - tests-pkgs: - runs-on: ubuntu-20.04 + tests-groups: + needs: [mods] + runs-on: ubuntu-22.04 timeout-minutes: 30 strategy: fail-fast: false matrix: - pkg: - - acl/... + groups: + - nomad - client - - client/allocdir/... - - client/allochealth/... - - client/allocrunner/... - - client/allocwatcher/... - - client/config/... - - client/consul/... - - client/devicemanager/... - - client/dynamicplugins/... - - client/fingerprint/... - - client/interfaces/... - - client/lib/... - - client/logmon/... - - client/pluginmanager/... - - client/servers/... - - client/serviceregistration/... - - client/state/... - - client/stats/... - - client/structs/... - - client/taskenv/... - command - - command/agent/... - - command/raft_tools/... - - drivers/docker/... - - drivers/exec/... - - drivers/java/... - - drivers/mock/... - - drivers/rawexec/... - - drivers/shared/... - - drivers/qemu/... - - helper/... - - internal/... - - jobspec/... - - lib/... - - nomad - - nomad/deploymentwatcher/... - - nomad/drainer/... - - nomad/state/... - - nomad/stream/... - - nomad/structs/... - - nomad/volumewatcher/... - - plugins/... - - scheduler/... - - testutil/... + - drivers + - quick steps: - - uses: actions/checkout@v2 - - uses: magnetikonline/action-golang-cache@v1 - with: - go-version: ${{env.GO_VERSION}} - cache-key-suffix: -pkgs + - uses: actions/checkout@v3 + - uses: hashicorp/setup-golang@v1 - name: Run Matrix Tests env: - GOTEST_PKGS: ./${{matrix.pkg}} + GOTEST_GROUP: ${{matrix.groups}} run: | make bootstrap make generate-all - hc-install vault ${{env.VAULT_VERSION}} - hc-install consul ${{env.CONSUL_VERSION}} + make dev + hc-install install -version ${{env.VAULT_VERSION}} -path ${{env.GOBIN}} vault + hc-install install -version ${{env.CONSUL_VERSION}} -path ${{env.GOBIN}} consul sudo sed -i 's!Defaults!#Defaults!g' /etc/sudoers sudo -E env "PATH=$PATH" make test-nomad - diff --git a/.gitignore b/.gitignore index f19433b97d3..280c5141136 100644 --- a/.gitignore +++ b/.gitignore @@ -55,6 +55,10 @@ ui/dist/ website/.bundle website/vendor +# Go work files +go.work +go.work.sum + # init outputs example.nomad spec.hcl @@ -74,8 +78,10 @@ GNUMakefile.local rkt-* +# Common editor config ./idea *.iml +.vscode # UI rules @@ -125,3 +131,6 @@ e2e/remotetasks/input/ecs.vars # local terraform overrides *.auto.tfvars + +# Tools files +tools/missing/missing diff --git a/.go-version b/.go-version index 19fb7bd9dff..769e37e159d 100644 --- a/.go-version +++ b/.go-version @@ -1 +1 @@ -1.17.9 +1.20.2 diff --git a/.golangci.yml b/.golangci.yml index 46c48ff61ee..da61f51e6ed 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,10 +1,11 @@ run: - # timeout for analysis, e.g. 30s, 5m, default is 1m + # Timeout for analysis. deadline: 10m - # Modules download mode. If not empty, passed as -mod= to go tools - module-download-mode: vendor + # Modules download mode (do not modify go.mod) + module-download-mode: readonly + # Exclude test files tests: false # which dirs to skip: they won't be analyzed; @@ -84,16 +85,19 @@ linters: - gocritic - misspell - govet - - deadcode - - varcheck - ineffassign - - structcheck - unconvert - gofmt - gosimple - depguard - staticcheck - # Stretch Goal - #- maligned + - asasalint + - asciicheck + - bidichk + - bodyclose + - dogsled + - durationcheck + # - errchkjson (todo) + # - errorlint (todo) + - exportloopref fast: false - diff --git a/.release/ci.hcl b/.release/ci.hcl index c9a62492ac2..4423b810e7f 100644 --- a/.release/ci.hcl +++ b/.release/ci.hcl @@ -3,31 +3,19 @@ schema = "1" project "nomad" { team = "nomad" slack { - // #feed-nomad-releases notification_channel = "C03B5EWFW01" - // #proj-nomad-releases - // notification_channel = "CUYKT2A73" } github { organization = "hashicorp" repository = "nomad" release_branches = [ "main", - "release/1.0.x", - "release/1.1.x", - "release/1.2.x", - "release/1.3.x", + "release/**", ] } } -event "merge" { - // "entrypoint" to use if build is not run automatically - // i.e. send "merge" complete signal to orchestrator to trigger build -} - event "build" { - depends = ["merge"] action "build" { organization = "hashicorp" repository = "nomad" @@ -35,52 +23,14 @@ event "build" { } } -event "upload-dev" { +event "prepare" { depends = ["build"] - action "upload-dev" { - organization = "hashicorp" - repository = "crt-workflows-common" - workflow = "upload-dev" - } - - notification { - on = "fail" - } -} - -event "security-scan-binaries" { - depends = ["upload-dev"] - action "security-scan-binaries" { - organization = "hashicorp" - repository = "crt-workflows-common" - workflow = "security-scan-binaries" - config = "security-scan.hcl" - } - - notification { - on = "fail" - } -} - -event "notarize-darwin-amd64" { - depends = ["security-scan-binaries"] - action "notarize-darwin-amd64" { - organization = "hashicorp" - repository = "crt-workflows-common" - workflow = "notarize-darwin-amd64" - } - notification { - on = "fail" - } -} - -event "notarize-darwin-arm64" { - depends = ["notarize-darwin-amd64"] - action "notarize-darwin-arm64" { + action "prepare" { organization = "hashicorp" repository = "crt-workflows-common" - workflow = "notarize-darwin-arm64" + workflow = "prepare" + depends = ["build"] } notification { @@ -88,73 +38,8 @@ event "notarize-darwin-arm64" { } } -event "notarize-windows-386" { - depends = ["notarize-darwin-arm64"] - action "notarize-windows-386" { - organization = "hashicorp" - repository = "crt-workflows-common" - workflow = "notarize-windows-386" - } - - notification { - on = "fail" - } -} - -event "notarize-windows-amd64" { - depends = ["notarize-windows-386"] - action "notarize-windows-amd64" { - organization = "hashicorp" - repository = "crt-workflows-common" - workflow = "notarize-windows-amd64" - } - - notification { - on = "fail" - } -} - -event "sign" { - depends = ["notarize-windows-amd64"] - action "sign" { - organization = "hashicorp" - repository = "crt-workflows-common" - workflow = "sign" - } - - notification { - on = "fail" - } -} - -event "sign-linux-rpms" { - depends = ["sign"] - action "sign-linux-rpms" { - organization = "hashicorp" - repository = "crt-workflows-common" - workflow = "sign-linux-rpms" - } - - notification { - on = "fail" - } -} - -event "verify" { - depends = ["sign-linux-rpms"] - action "verify" { - organization = "hashicorp" - repository = "crt-workflows-common" - workflow = "verify" - } - - notification { - on = "always" - } -} - ## These are promotion and post-publish events -## they should be added to the end of the file after the verify event stanza. +## they should be added to the end of the file after the prepare event stanza. event "trigger-staging" { // This event is dispatched by the bob trigger-promotion command @@ -167,6 +52,7 @@ event "promote-staging" { organization = "hashicorp" repository = "crt-workflows-common" workflow = "promote-staging" + config = "release-metadata.hcl" } notification { @@ -204,3 +90,17 @@ event "promote-production-packaging" { on = "always" } } + +event "post-publish-website" { + depends = ["promote-production-packaging"] + + action "post-publish-website" { + organization = "hashicorp" + repository = "crt-workflows-common" + workflow = "post-publish-website" + } + + notification { + on = "always" + } +} diff --git a/.release/linux/package/etc/nomad.d/nomad.hcl b/.release/linux/package/etc/nomad.d/nomad.hcl index 8b3204e960c..73893ae0125 100644 --- a/.release/linux/package/etc/nomad.d/nomad.hcl +++ b/.release/linux/package/etc/nomad.d/nomad.hcl @@ -4,8 +4,8 @@ data_dir = "/opt/nomad/data" bind_addr = "0.0.0.0" server { - # license_path is required as of Nomad v1.1.1+ - #license_path = "/etc/nomad.d/nomad.hcl" + # license_path is required for Nomad Enterprise as of Nomad v1.1.1+ + #license_path = "/etc/nomad.d/license.hclic" enabled = true bootstrap_expect = 1 } diff --git a/.release/linux/postrm b/.release/linux/postrm index 8fa5dcffb50..f203cd0c2f9 100644 --- a/.release/linux/postrm +++ b/.release/linux/postrm @@ -4,4 +4,9 @@ if [ "$1" = "purge" ]; then userdel nomad fi +if [ "$1" == "upgrade" ] && [ -d /run/systemd/system ]; then + systemctl --system daemon-reload >/dev/null || true + systemctl restart nomad >/dev/null || true +fi + exit 0 diff --git a/.semgrep/api_errorf.yml b/.semgrep/api_errorf.yml new file mode 100644 index 00000000000..703c5bf29f0 --- /dev/null +++ b/.semgrep/api_errorf.yml @@ -0,0 +1,11 @@ +rules: + - id: "fmt_errorf_unformatted_use" + patterns: + - pattern: fmt.Errorf("...") + message: "Use of fmt.Errorf without formatting. Please use errors.New" + languages: + - "go" + severity: "WARNING" + paths: + include: + - "./api/*" diff --git a/.semgrep/protect_globals.yml b/.semgrep/protect_globals.yml new file mode 100644 index 00000000000..0dc5fa60fde --- /dev/null +++ b/.semgrep/protect_globals.yml @@ -0,0 +1,13 @@ +rules: + - id: "no-overriding-struct-globals" + patterns: + - pattern: | + structs.$A = ... + message: "Mutating global structs is never safe" + languages: + - "go" + severity: "ERROR" + fix: " " + paths: + # including tests! + include: ["*"] diff --git a/CHANGELOG.md b/CHANGELOG.md index 59bf24bf166..6ef977c3cf9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,14 +1,307 @@ -## 1.3.0 (Unreleased) +## 1.3.12 (March 21, 2023) + +IMPROVEMENTS: + +* build: Update to go1.20.2 [[GH-16427](https://github.com/hashicorp/nomad/issues/16427)] + +BUG FIXES: + +* client: Fixed a bug where clients using Consul discovery to join the cluster would get permission denied errors [[GH-16490](https://github.com/hashicorp/nomad/issues/16490)] +* client: Fixed a bug where cpuset initialization fails after Client restart [[GH-16467](https://github.com/hashicorp/nomad/issues/16467)] +* plugin: Add missing fields to `TaskConfig` so they can be accessed by external task drivers [[GH-16434](https://github.com/hashicorp/nomad/issues/16434)] +* services: Fixed a bug where a service would be deregistered twice [[GH-16289](https://github.com/hashicorp/nomad/issues/16289)] + +## 1.3.11 (March 10, 2023) + +IMPROVEMENTS: + +* env/ec2: update cpu metadata [[GH-16417](https://github.com/hashicorp/nomad/issues/16417)] + +BUG FIXES: + +* client: Fixed a bug where clients used the serf advertise address to connect to servers when using Consul auto-discovery [[GH-16217](https://github.com/hashicorp/nomad/issues/16217)] +* docker: Fixed a bug where pause containers would be erroneously removed [[GH-16352](https://github.com/hashicorp/nomad/issues/16352)] +* scheduler: Fixed a bug where collisions in dynamic port offerings would result in spurious plan-for-node-rejected errors [[GH-16401](https://github.com/hashicorp/nomad/issues/16401)] +* server: Fixed a bug where deregistering a job that was already garbage collected would create a new evaluation [[GH-16287](https://github.com/hashicorp/nomad/issues/16287)] +* server: Fixed a bug where node updates that produced errors from service discovery or CSI plugin updates were not logged [[GH-16287](https://github.com/hashicorp/nomad/issues/16287)] +* server: Fixed a bug where the `system reconcile summaries` command and API would not return any scheduler-related errors [[GH-16287](https://github.com/hashicorp/nomad/issues/16287)] + +## 1.3.10 (March 01, 2023) + +BREAKING CHANGES: + +* core: Ensure no leakage of evaluations for batch jobs. Prior to this change allocations and evaluations for batch jobs were never garbage collected until the batch job was explicitly stopped. The new `batch_eval_gc_threshold` server configuration controls how often they are collected. The default threshold is `24h`. [[GH-15097](https://github.com/hashicorp/nomad/issues/15097)] + +IMPROVEMENTS: + +* build: Update to go1.19.5 [[GH-15769](https://github.com/hashicorp/nomad/issues/15769)] +* build: Update to go1.20 [[GH-16029](https://github.com/hashicorp/nomad/issues/16029)] +* client: detect and cleanup leaked iptables rules [[GH-15407](https://github.com/hashicorp/nomad/issues/15407)] +* consul: add client configuration for grpc_ca_file [[GH-15701](https://github.com/hashicorp/nomad/issues/15701)] +* env/ec2: update cpu metadata [[GH-15770](https://github.com/hashicorp/nomad/issues/15770)] +* fingerprint: Detect CNI plugins and set versions as node attributes [[GH-15452](https://github.com/hashicorp/nomad/issues/15452)] + +DEPRECATIONS: + +* api: The connect `ConsulExposeConfig.Path` field is deprecated in favor of `ConsulExposeConfig.Paths` [[GH-15541](https://github.com/hashicorp/nomad/issues/15541)] +* api: The connect `ConsulProxy.ExposeConfig` field is deprecated in favor of `ConsulProxy.Expose` [[GH-15541](https://github.com/hashicorp/nomad/issues/15541)] + +BUG FIXES: + +* acl: Fixed a bug where creating/updating a policy which was invalid would return a 404 status code, not a 400 [[GH-16000](https://github.com/hashicorp/nomad/issues/16000)] +* agent: Make agent syslog log level follow log_level config [[GH-15625](https://github.com/hashicorp/nomad/issues/15625)] +* api: Added missing node states to NodeStatus constants [[GH-16166](https://github.com/hashicorp/nomad/issues/16166)] +* api: Fix stale querystring parameter value as boolean [[GH-15605](https://github.com/hashicorp/nomad/issues/15605)] +* api: Fixed a bug where exposeConfig field was not provided correctly when getting the jobs via the API [[GH-15541](https://github.com/hashicorp/nomad/issues/15541)] +* api: Fixed a nil pointer dereference when periodic jobs are missing their periodic spec [[GH-13845](https://github.com/hashicorp/nomad/issues/13845)] +* cgutil: handle panic coming from runc helper method [[GH-16180](https://github.com/hashicorp/nomad/issues/16180)] +* cli: Fixed a panic in `deployment status` when rollback deployments are slow to appear [[GH-16011](https://github.com/hashicorp/nomad/issues/16011)] +* connect: ingress http/2/grpc listeners may exclude hosts [[GH-15749](https://github.com/hashicorp/nomad/issues/15749)] +* consul: Fixed a bug where acceptable service identity on Consul token was not accepted [[GH-15928](https://github.com/hashicorp/nomad/issues/15928)] +* consul: Fixed a bug where consul token was not respected when reverting a job [[GH-15996](https://github.com/hashicorp/nomad/issues/15996)] +* consul: Fixed a bug where services would continuously re-register when using ipv6 [[GH-15411](https://github.com/hashicorp/nomad/issues/15411)] +* core: enforce strict ordering that node status updates are recorded after allocation updates for reconnecting clients [[GH-15808](https://github.com/hashicorp/nomad/issues/15808)] +* csi: Fixed a bug where a crashing plugin could panic the Nomad client [[GH-15518](https://github.com/hashicorp/nomad/issues/15518)] +* csi: Fixed a bug where secrets that include '=' were incorrectly rejected [[GH-15670](https://github.com/hashicorp/nomad/issues/15670)] +* csi: Fixed a bug where volumes in non-default namespaces could not be scheduled for system or sysbatch jobs [[GH-15372](https://github.com/hashicorp/nomad/issues/15372)] +* csi: Fixed potential state store corruption when garbage collecting CSI volume claims or checking whether it's safe to force-deregister a volume [[GH-16256](https://github.com/hashicorp/nomad/issues/16256)] +* docker: Fixed a bug where images referenced by multiple tags would not be GC'd [[GH-15962](https://github.com/hashicorp/nomad/issues/15962)] +* docker: Fixed a bug where infra_image did not get alloc_id label [[GH-15898](https://github.com/hashicorp/nomad/issues/15898)] +* docker: configure restart policy for bridge network pause container [[GH-15732](https://github.com/hashicorp/nomad/issues/15732)] +* event stream: Fixed a bug where undefined ACL policies on the request's ACL would result in incorrect authentication errors [[GH-15495](https://github.com/hashicorp/nomad/issues/15495)] +* fix: Add the missing option propagation_mode for volume_mount [[GH-15626](https://github.com/hashicorp/nomad/issues/15626)] +* parser: Fixed a panic in the job spec parser when a variable validation block was missing its condition [[GH-16018](https://github.com/hashicorp/nomad/issues/16018)] +* scheduler (Enterprise): Fixed a bug that prevented new allocations from multiregion jobs to be placed in situations where other regions are not involved, such as node updates. [[GH-15325](https://github.com/hashicorp/nomad/issues/15325)] +* template: Fixed a bug that caused the chage script to fail to run [[GH-15915](https://github.com/hashicorp/nomad/issues/15915)] +* ui: Fix allocation memory chart to display the same value as the CLI [[GH-15909](https://github.com/hashicorp/nomad/issues/15909)] +* ui: Fix navigation to pages for jobs that are not in the default namespace [[GH-15906](https://github.com/hashicorp/nomad/issues/15906)] +* volumes: Fixed a bug where `per_alloc` was allowed for volume blocks on system and sysbatch jobs, which do not have an allocation index [[GH-16030](https://github.com/hashicorp/nomad/issues/16030)] + +## 1.3.9 (February 14, 2023) + +SECURITY: + +* artifact: Provide mitigations against unbounded artifact decompression [[GH-16126](https://github.com/hashicorp/nomad/issues/16126)] +* build: Update to go1.20.1 [[GH-16182](https://github.com/hashicorp/nomad/issues/16182)] + +## 1.3.8 (November 21, 2022) + +BUG FIXES: + +* api: Ensure all request body decode errors return a 400 status code [[GH-15252](https://github.com/hashicorp/nomad/issues/15252)] +* cleanup: fixed missing timer.Reset for plan queue stat emitter [[GH-15134](https://github.com/hashicorp/nomad/issues/15134)] +* client: Fixed a bug where tasks would restart without waiting for interval [[GH-15215](https://github.com/hashicorp/nomad/issues/15215)] +* client: fixed a bug where non-`docker` tasks with network isolation would leak network namespaces and iptables rules if the client was restarted while they were running [[GH-15214](https://github.com/hashicorp/nomad/issues/15214)] +* client: prevent allocations from failing on client reconnect by retrying RPC requests when no servers are available yet [[GH-15140](https://github.com/hashicorp/nomad/issues/15140)] +* csi: Fixed race condition that can cause a panic when volume is garbage collected [[GH-15101](https://github.com/hashicorp/nomad/issues/15101)] +* device: Fixed a bug where device plugins would not fingerprint on startup [[GH-15125](https://github.com/hashicorp/nomad/issues/15125)] +* drivers: Fixed a bug where one goroutine was leaked per task [[GH-15180](https://github.com/hashicorp/nomad/issues/15180)] +* drivers: pass missing `propagation_mode` configuration for volume mounts to external plugins [[GH-15096](https://github.com/hashicorp/nomad/issues/15096)] +* event_stream: fixed a bug where dynamic port values would fail to serialize in the event stream [[GH-12916](https://github.com/hashicorp/nomad/issues/12916)] +* fingerprint: Ensure Nomad can correctly fingerprint Consul gRPC where the Consul agent is running v1.14.0 or greater [[GH-15309](https://github.com/hashicorp/nomad/issues/15309)] +* scheduler: Fixed a bug that prevented disconnected allocations to be updated after they reconnect. [[GH-15068](https://github.com/hashicorp/nomad/issues/15068)] +* scheduler: Prevent unnecessary placements when disconnected allocations reconnect. [[GH-15068](https://github.com/hashicorp/nomad/issues/15068)] +* template: Fixed a bug where template could cause agent panic on startup [[GH-15192](https://github.com/hashicorp/nomad/issues/15192)] + +## 1.3.7 (October 26, 2022) + +IMPROVEMENTS: + +* deps: update go-memdb for goroutine leak fix [[GH-14983](https://github.com/hashicorp/nomad/issues/14983)] +* docker: improve memory usage for docker_logger [[GH-14875](https://github.com/hashicorp/nomad/issues/14875)] + +BUG FIXES: + +* acl: Fixed a bug where Nomad version checking for one-time tokens was enforced across regions [[GH-14911](https://github.com/hashicorp/nomad/issues/14911)] +* client: Fixed a bug where Nomad could not detect cores on recent RHEL systems [[GH-15027](https://github.com/hashicorp/nomad/issues/15027)] +* consul: Fixed a bug where services continuously re-registered [[GH-14917](https://github.com/hashicorp/nomad/issues/14917)] +* consul: atomically register checks on initial service registration [[GH-14944](https://github.com/hashicorp/nomad/issues/14944)] +* deps: Update hashicorp/raft to v1.3.11; fixes unstable leadership on server removal [[GH-15021](https://github.com/hashicorp/nomad/issues/15021)] +* nomad native service discovery: Ensure all local servers meet v.1.3.0 minimum before service registrations can be written [[GH-14924](https://github.com/hashicorp/nomad/issues/14924)] +* scheduler: Fixed a bug where version checking for disconnected clients handling was enforced across regions [[GH-14911](https://github.com/hashicorp/nomad/issues/14911)] + +## 1.3.6 (October 04, 2022) + +SECURITY: + +* client: recover from panics caused by artifact download to prevent the Nomad client from crashing [[GH-14696](https://github.com/hashicorp/nomad/issues/14696)] + +IMPROVEMENTS: + +* api: return a more descriptive error when /v1/acl/bootstrap fails to decode request body [[GH-14629](https://github.com/hashicorp/nomad/issues/14629)] +* cli: ignore `-hcl2-strict` when -hcl1 is set. [[GH-14426](https://github.com/hashicorp/nomad/issues/14426)] +* cli: warn destructive update only when count is greater than 1 [[GH-13103](https://github.com/hashicorp/nomad/issues/13103)] +* consul: Allow interpolation of task environment values into Consul Service Mesh configuration [[GH-14445](https://github.com/hashicorp/nomad/issues/14445)] +* ui: Display different message when trying to exec into a job with no task running. [[GH-14071](https://github.com/hashicorp/nomad/issues/14071)] + +BUG FIXES: + +* api: Fixed a bug where the List Volume API did not include the `ControllerRequired` and `ResourceExhausted` fields. [[GH-14484](https://github.com/hashicorp/nomad/issues/14484)] +* cli: Ignore Vault token when generating job diff. [[GH-14424](https://github.com/hashicorp/nomad/issues/14424)] +* cli: fixed a bug in the `operator api` command where the HTTPS scheme was not always correctly calculated [[GH-14635](https://github.com/hashicorp/nomad/issues/14635)] +* cli: return exit code `255` when `nomad job plan` fails job validation. [[GH-14426](https://github.com/hashicorp/nomad/issues/14426)] +* cli: set content length on POST requests when using the `nomad operator api` command [[GH-14634](https://github.com/hashicorp/nomad/issues/14634)] +* client: Fixed bug where clients could attempt to connect to servers with invalid addresses retrieved from Consul. [[GH-14431](https://github.com/hashicorp/nomad/issues/14431)] +* csi: Fixed a bug where a volume that was successfully unmounted by the client but then failed controller unpublishing would not be marked free until garbage collection ran. [[GH-14675](https://github.com/hashicorp/nomad/issues/14675)] +* csi: Fixed a bug where the server would not send controller unpublish for a failed allocation. [[GH-14484](https://github.com/hashicorp/nomad/issues/14484)] +* csi: Fixed a data race in the volume unpublish endpoint that could result in claims being incorrectly marked as freed before being persisted to raft. [[GH-14484](https://github.com/hashicorp/nomad/issues/14484)] +* helpers: Fixed a bug where random stagger func did not protect against negative inputs [[GH-14497](https://github.com/hashicorp/nomad/issues/14497)] +* jobspec: Fixed a bug where an `artifact` with `headers` configuration would fail to parse when using HCLv1 [[GH-14637](https://github.com/hashicorp/nomad/issues/14637)] +* metrics: Update client `node_scheduling_eligibility` value with server heartbeats. [[GH-14483](https://github.com/hashicorp/nomad/issues/14483)] +* quotas (Enterprise): Fixed a server crashing panic when updating and checking a quota concurrently. +* rpc: check for spec changes in all regions when registering multiregion jobs [[GH-14519](https://github.com/hashicorp/nomad/issues/14519)] +* scheduler: Fixed bug where the scheduler would treat multiregion jobs as paused for job types that don't use deployments [[GH-14659](https://github.com/hashicorp/nomad/issues/14659)] +* template: Fixed a bug where the `splay` timeout was not being applied when `change_mode` was set to `script`. [[GH-14749](https://github.com/hashicorp/nomad/issues/14749)] +* ui: Remove extra space when displaying the version in the menu footer. [[GH-14457](https://github.com/hashicorp/nomad/issues/14457)] + +## 1.3.5 (August 31, 2022) + +IMPROVEMENTS: + +* cgroups: use cgroup.kill interface file when using cgroups v2 [[GH-14371](https://github.com/hashicorp/nomad/issues/14371)] +* consul: Reduce load on Consul leader server by allowing stale results when listing namespaces. [[GH-12953](https://github.com/hashicorp/nomad/issues/12953)] + +BUG FIXES: + +* cli: Fixed a bug where forcing a periodic job would fail if the job ID prefix-matched other periodic jobs [[GH-14333](https://github.com/hashicorp/nomad/issues/14333)] +* template: Fixed a bug that could cause Nomad to panic when using `change_mode = "script"` [[GH-14374](https://github.com/hashicorp/nomad/issues/14374)] +* ui: Revert a change that resulted in UI errors when ACLs were not used. [[GH-14381](https://github.com/hashicorp/nomad/issues/14381)] + +## 1.3.4 (August 25, 2022) + +IMPROVEMENTS: + +* api: HTTP server now returns a 429 error code when hitting the connection limit [[GH-13621](https://github.com/hashicorp/nomad/issues/13621)] +* build: update to go1.19 [[GH-14132](https://github.com/hashicorp/nomad/issues/14132)] +* cli: `operator debug` now outputs current leader to debug bundle [[GH-13472](https://github.com/hashicorp/nomad/issues/13472)] +* cli: `operator snapshot state` supports `-filter` expressions and avoids writing large temporary files [[GH-13658](https://github.com/hashicorp/nomad/issues/13658)] +* client: add option to restart all tasks of an allocation, regardless of lifecycle type or state. [[GH-14127](https://github.com/hashicorp/nomad/issues/14127)] +* client: only start poststop tasks after poststart tasks are done. [[GH-14127](https://github.com/hashicorp/nomad/issues/14127)] +* deps: Updated `github.com/hashicorp/go-discover` to latest to allow setting the AWS endpoint definition [[GH-13491](https://github.com/hashicorp/nomad/issues/13491)] +* driver/docker: Added config option to disable container healthcheck [[GH-14089](https://github.com/hashicorp/nomad/issues/14089)] +* qemu: Added option to configure `drive_interface` [[GH-11864](https://github.com/hashicorp/nomad/issues/11864)] +* sentinel: add the ability to reference the namespace and Nomad acl token in policies [[GH-14171](https://github.com/hashicorp/nomad/issues/14171)] +* template: add script change_mode that allows scripts to be executed on template change [[GH-13972](https://github.com/hashicorp/nomad/issues/13972)] +* ui: Add button to restart all tasks in an allocation. [[GH-14223](https://github.com/hashicorp/nomad/issues/14223)] +* ui: add general keyboard navigation to the Nomad UI [[GH-14138](https://github.com/hashicorp/nomad/issues/14138)] + +BUG FIXES: + +* api: cleanup whitespace from failed api response body [[GH-14145](https://github.com/hashicorp/nomad/issues/14145)] +* cli: Fixed a bug where job validation requeset was not sent to leader [[GH-14065](https://github.com/hashicorp/nomad/issues/14065)] +* cli: Fixed a bug where the memory usage reported by Allocation Resource Utilization is zero on systems using cgroups v2 [[GH-14069](https://github.com/hashicorp/nomad/issues/14069)] +* cli: Fixed a bug where vault token not respected in plan command [[GH-14088](https://github.com/hashicorp/nomad/issues/14088)] +* client/logmon: fixed a bug where logmon cannot find nomad executable [[GH-14297](https://github.com/hashicorp/nomad/issues/14297)] +* client: Fixed a bug where cpuset initialization would not work on first agent startup [[GH-14230](https://github.com/hashicorp/nomad/issues/14230)] +* client: Fixed a bug where user lookups would hang or panic [[GH-14248](https://github.com/hashicorp/nomad/issues/14248)] +* client: Fixed a problem calculating a services namespace [[GH-13493](https://github.com/hashicorp/nomad/issues/13493)] +* csi: Fixed a bug where volume claims on lost or garbage collected nodes could not be freed [[GH-13301](https://github.com/hashicorp/nomad/issues/13301)] +* template: Fixed a bug where job templates would use `uid` and `gid` 0 after upgrading to Nomad 1.3.3, causing tasks to fail with the error `failed looking up user: managing file ownership is not supported on Windows`. [[GH-14203](https://github.com/hashicorp/nomad/issues/14203)] +* ui: Fixed a bug that caused the allocation details page to display the stats bar chart even if the task was pending. [[GH-14224](https://github.com/hashicorp/nomad/issues/14224)] +* ui: Removes duplicate breadcrumb header when navigating from child job back to parent. [[GH-14115](https://github.com/hashicorp/nomad/issues/14115)] +* vault: Fixed a bug where Vault clients were recreated when the server configuration was reloaded, even if there were no changes to the Vault configuration. [[GH-14298](https://github.com/hashicorp/nomad/issues/14298)] +* vault: Fixed a bug where changing the Vault configuration `namespace` field was not detected as a change during server configuration reload. [[GH-14298](https://github.com/hashicorp/nomad/issues/14298)] + +## 1.3.3 (August 05, 2022) + +IMPROVEMENTS: + +* csi: Add `stage_publish_base_dir` field to `csi_plugin` block to support plugins that require a specific staging/publishing directory for mounts [[GH-13919](https://github.com/hashicorp/nomad/issues/13919)] +* qemu: use shorter socket file names to reduce the chance of hitting the max path length [[GH-13971](https://github.com/hashicorp/nomad/issues/13971)] +* template: Expose consul-template configuration options at the client level for `nomad_retry`. [[GH-13907](https://github.com/hashicorp/nomad/issues/13907)] +* template: Templates support new uid/gid parameter pair [[GH-13755](https://github.com/hashicorp/nomad/issues/13755)] +* ui: Reorder and apply the same style to the Evaluations list page filters to match the Job list page. [[GH-13866](https://github.com/hashicorp/nomad/issues/13866)] + +BUG FIXES: + +* acl: Fixed a bug where the timestamp for expiring one-time tokens was not deterministic between servers [[GH-13737](https://github.com/hashicorp/nomad/issues/13737)] +* deployments: Fixed a bug that prevented auto-approval if canaries were marked as unhealthy during deployment [[GH-14001](https://github.com/hashicorp/nomad/issues/14001)] +* metrics: Fixed a bug where blocked evals with no class produced no dc:class scope metrics [[GH-13786](https://github.com/hashicorp/nomad/issues/13786)] +* namespaces: Fixed a bug that allowed deleting a namespace that contained a CSI volume [[GH-13880](https://github.com/hashicorp/nomad/issues/13880)] +* qemu: restore the monitor socket path when restoring a QEMU task. [[GH-14000](https://github.com/hashicorp/nomad/issues/14000)] +* servicedisco: Fixed a bug where non-unique services would escape job validation [[GH-13869](https://github.com/hashicorp/nomad/issues/13869)] +* ui: Add missing breadcrumb in the Evaluations page. [[GH-13865](https://github.com/hashicorp/nomad/issues/13865)] +* ui: Fixed a bug where task memory was reported as zero on systems using cgroups v2 [[GH-13670](https://github.com/hashicorp/nomad/issues/13670)] + +## 1.3.2 (July 13, 2022) + +IMPROVEMENTS: + +* agent: Added delete support to the eval HTTP API [[GH-13492](https://github.com/hashicorp/nomad/issues/13492)] +* agent: emit a warning message if the agent starts with `bootstrap_expect` set to an even number. [[GH-12961](https://github.com/hashicorp/nomad/issues/12961)] +* agent: logs are no longer buffered at startup when logging in JSON format [[GH-13076](https://github.com/hashicorp/nomad/issues/13076)] +* api: enable setting `?choose` parameter when querying services [[GH-12862](https://github.com/hashicorp/nomad/issues/12862)] +* api: refactor ACL check when using the all namespaces wildcard in the job and alloc list endpoints [[GH-13608](https://github.com/hashicorp/nomad/issues/13608)] +* api: support Authorization Bearer header in lieu of X-Nomad-Token header [[GH-12534](https://github.com/hashicorp/nomad/issues/12534)] +* bootstrap: Added option to allow for an operator generated bootstrap token to be passed to the `acl bootstrap` command [[GH-12520](https://github.com/hashicorp/nomad/issues/12520)] +* cli: Added `delete` command to the eval CLI [[GH-13492](https://github.com/hashicorp/nomad/issues/13492)] +* cli: Added `scheduler get-config` and `scheduler set-config` commands to the operator CLI [[GH-13045](https://github.com/hashicorp/nomad/issues/13045)] +* cli: always display job ID and namespace in the `eval status` command [[GH-13581](https://github.com/hashicorp/nomad/issues/13581)] +* cli: display namespace and node ID in the `eval list` command and when `eval status` matches multiple evals [[GH-13581](https://github.com/hashicorp/nomad/issues/13581)] +* cli: update default redis and use nomad service discovery [[GH-13044](https://github.com/hashicorp/nomad/issues/13044)] +* client: added more fault tolerant defaults for template configuration [[GH-13041](https://github.com/hashicorp/nomad/issues/13041)] +* core: Added the ability to pause and un-pause the eval broker and blocked eval broker [[GH-13045](https://github.com/hashicorp/nomad/issues/13045)] +* core: On node updates skip creating evaluations for jobs not in the node's datacenter. [[GH-12955](https://github.com/hashicorp/nomad/issues/12955)] +* core: automatically mark clients with recurring plan rejections as ineligible [[GH-13421](https://github.com/hashicorp/nomad/issues/13421)] +* driver/docker: Eliminate excess Docker registry pulls for the `infra_image` when it already exists locally. [[GH-13265](https://github.com/hashicorp/nomad/issues/13265)] +* fingerprint: add support for detecting kernel architecture of clients. (attribute: `kernel.arch`) [[GH-13182](https://github.com/hashicorp/nomad/issues/13182)] +* hcl: added support for using the `filebase64` function in jobspecs [[GH-11791](https://github.com/hashicorp/nomad/issues/11791)] +* metrics: emit `nomad.nomad.plan.rejection_tracker.node_score` metric for the number of times a node had a plan rejection within the past time window [[GH-13421](https://github.com/hashicorp/nomad/issues/13421)] +* qemu: add support for guest agent socket [[GH-12800](https://github.com/hashicorp/nomad/issues/12800)] +* ui: Namespace filter query paramters are now isolated by route [[GH-13679](https://github.com/hashicorp/nomad/issues/13679)] + +BUG FIXES: + +* api: Fix listing evaluations with the wildcard namespace and an ACL token [[GH-13530](https://github.com/hashicorp/nomad/issues/13530)] +* api: Fixed a bug where Consul token was not respected for job revert API [[GH-13065](https://github.com/hashicorp/nomad/issues/13065)] +* cli: Fixed a bug in the names of the `node drain` and `node status` sub-commands [[GH-13656](https://github.com/hashicorp/nomad/issues/13656)] +* cli: Fixed a bug where job validate did not respect vault token or namespace [[GH-13070](https://github.com/hashicorp/nomad/issues/13070)] +* client: Fixed a bug where max_kill_timeout client config was ignored [[GH-13626](https://github.com/hashicorp/nomad/issues/13626)] +* client: Fixed a bug where network.dns block was not interpolated [[GH-12817](https://github.com/hashicorp/nomad/issues/12817)] +* cni: Fixed a bug where loopback address was not set for all drivers [[GH-13428](https://github.com/hashicorp/nomad/issues/13428)] +* connect: Added missing ability of setting Connect upstream destination namespace [[GH-13125](https://github.com/hashicorp/nomad/issues/13125)] +* core: Fixed a bug where an evicted batch job would not be rescheduled [[GH-13205](https://github.com/hashicorp/nomad/issues/13205)] +* core: Fixed a bug where blocked eval resources were incorrectly computed [[GH-13104](https://github.com/hashicorp/nomad/issues/13104)] +* core: Fixed a bug where reserved ports on multiple node networks would be treated as a collision. `client.reserved.reserved_ports` is now merged into each `host_network`'s reserved ports instead of being treated as a collision. [[GH-13651](https://github.com/hashicorp/nomad/issues/13651)] +* core: Fixed a bug where the plan applier could deadlock if leader's state lagged behind plan's creation index for more than 5 seconds. [[GH-13407](https://github.com/hashicorp/nomad/issues/13407)] +* csi: Fixed a regression where a timeout was introduced that prevented some plugins from running by marking them as unhealthy after 30s by introducing a configurable `health_timeout` field [[GH-13340](https://github.com/hashicorp/nomad/issues/13340)] +* csi: Fixed a scheduler bug where failed feasibility checks would return early and prevent processing additional nodes [[GH-13274](https://github.com/hashicorp/nomad/issues/13274)] +* docker: Fixed a bug where cgroups-v1 parent was being set [[GH-13058](https://github.com/hashicorp/nomad/issues/13058)] +* lifecycle: fixed a bug where sidecar tasks were not being stopped last [[GH-13055](https://github.com/hashicorp/nomad/issues/13055)] +* state: Fix listing evaluations from all namespaces [[GH-13551](https://github.com/hashicorp/nomad/issues/13551)] +* ui: Allow running jobs from a namespace-limited token [[GH-13659](https://github.com/hashicorp/nomad/issues/13659)] +* ui: Fix a bug that prevented viewing the details of an evaluation in a non-default namespace [[GH-13530](https://github.com/hashicorp/nomad/issues/13530)] +* ui: Fixed a bug that prevented the UI task exec functionality to work from behind a reverse proxy. [[GH-12925](https://github.com/hashicorp/nomad/issues/12925)] +* ui: Fixed an issue where editing or running a job with a namespace via the UI would throw a 404 on redirect. [[GH-13588](https://github.com/hashicorp/nomad/issues/13588)] +* ui: fixed a bug where links to jobs with "@" in their name would mis-identify namespace and 404 [[GH-13012](https://github.com/hashicorp/nomad/issues/13012)] +* volumes: Fixed a bug where additions, updates, or removals of host volumes or CSI volumes were not treated as destructive updates [[GH-13008](https://github.com/hashicorp/nomad/issues/13008)] + +## 1.3.1 (May 19, 2022) + +SECURITY: + +* A vulnerability was identified in the go-getter library that Nomad uses for its artifacts such that a specially crafted Nomad jobspec can be used for privilege escalation onto client agent hosts. [CVE-2022-30324](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-30324) [[GH-13057](https://github.com/hashicorp/nomad/issues/13057)] + +BUG FIXES: + +* agent: fixed a panic on startup when the `server.protocol_version` config parameter was set [[GH-12962](https://github.com/hashicorp/nomad/issues/12962)] + +## 1.3.0 (May 11, 2022) FEATURES: +* **Edge compute improvements**: Added support for reconnecting healthy allocations when disconnected clients reconnect. [[GH-12476](https://github.com/hashicorp/nomad/issues/12476)] * **Native service discovery**: Register and discover services using builtin simple service discovery. [[GH-12368](https://github.com/hashicorp/nomad/issues/12368)] -* core: Added support for reconnecting healthy allocations when disconnected clients reconnect. [[GH-12476](https://github.com/hashicorp/nomad/issues/12476)] BREAKING CHANGES: * agent: The state database on both clients and servers will automatically migrate its underlying database on startup. Downgrading to a previous version of an agent after upgrading it to Nomad 1.3 is not supported. [[GH-12107](https://github.com/hashicorp/nomad/issues/12107)] * client: The client state store will be automatically migrated to a new schema version when upgrading a client. Downgrading to a previous version of the client after upgrading it to Nomad 1.3 is not supported. To downgrade safely, users should erase the Nomad client's data directory. [[GH-12078](https://github.com/hashicorp/nomad/issues/12078)] +* connect: Consul Service Identity ACL tokens automatically generated for Connect services are now +created as Local rather than Global tokens. Nomad clusters with Connect services making cross-Consul +datacenter requests will need to ensure their Consul agents are configured with anonymous ACL tokens +of sufficient node and service read permissions. [[GH-8068](https://github.com/hashicorp/nomad/issues/8068)] +* connect: The minimum Consul version supported by Nomad's Connect integration is now Consul v1.8.0. [[GH-8068](https://github.com/hashicorp/nomad/issues/8068)] * csi: The client filesystem layout for CSI plugins has been updated to correctly handle the lifecycle of multiple allocations serving the same plugin. Running plugin tasks will not be updated after upgrading the client, but it is recommended to redeploy CSI plugin jobs after upgrading the cluster. [[GH-12078](https://github.com/hashicorp/nomad/issues/12078)] * raft: The default raft protocol version is now 3 so you must follow the [Upgrading to Raft Protocol 3](https://www.nomadproject.io/docs/upgrade#upgrading-to-raft-protocol-3) guide when upgrading an existing cluster to Nomad 1.3.0. Downgrading the raft protocol version is not supported. [[GH-11572](https://github.com/hashicorp/nomad/issues/11572)] @@ -21,7 +314,9 @@ IMPROVEMENTS: * agent: Switch from boltdb/bolt to go.etcd.io/bbolt [[GH-12107](https://github.com/hashicorp/nomad/issues/12107)] * api: Add `related` query parameter to the Evaluation details endpoint [[GH-12305](https://github.com/hashicorp/nomad/issues/12305)] * api: Add support for filtering and pagination to the jobs and volumes list endpoint [[GH-12186](https://github.com/hashicorp/nomad/issues/12186)] +* api: Add support for filtering and pagination to the node list endpoint [[GH-12727](https://github.com/hashicorp/nomad/issues/12727)] * api: Add support for filtering, sorting, and pagination to the ACL tokens and allocations list endpoint [[GH-12186](https://github.com/hashicorp/nomad/issues/12186)] +* api: Added ParseHCLOpts helper func to ease parsing HCLv1 jobspecs [[GH-12777](https://github.com/hashicorp/nomad/issues/12777)] * api: CSI secrets for list and delete snapshots are now passed in HTTP headers [[GH-12144](https://github.com/hashicorp/nomad/issues/12144)] * api: `AllocFS.Logs` now explicitly closes frames channel after being canceled [[GH-12248](https://github.com/hashicorp/nomad/issues/12248)] * api: default to using `DefaultPooledTransport` client to support keep-alive by default [[GH-12492](https://github.com/hashicorp/nomad/issues/12492)] @@ -29,20 +324,26 @@ IMPROVEMENTS: * api: sort return values of evaluation and deployment list api endpoints by creation index [[GH-12054](https://github.com/hashicorp/nomad/issues/12054)] * build: make targets now respect GOBIN variable [[GH-12077](https://github.com/hashicorp/nomad/issues/12077)] * build: upgrade and speedup circleci configuration [[GH-11889](https://github.com/hashicorp/nomad/issues/11889)] +* cli: Added -json flag to `nomad job {run,plan,validate}` to support parsing JSON formatted jobs [[GH-12591](https://github.com/hashicorp/nomad/issues/12591)] * cli: Added -os flag to node status to display operating system name [[GH-12388](https://github.com/hashicorp/nomad/issues/12388)] * cli: Added `nomad operator api` command to ease querying Nomad's HTTP API. [[GH-10808](https://github.com/hashicorp/nomad/issues/10808)] * cli: CSI secrets argument for `volume snapshot list` has been made consistent with `volume snapshot delete` [[GH-12144](https://github.com/hashicorp/nomad/issues/12144)] * cli: Return a redacted value for mount flags in the `volume status` command, instead of `` [[GH-12150](https://github.com/hashicorp/nomad/issues/12150)] +* cli: `operator debug` command now skips generating pprofs to avoid a panic on Nomad 0.11.2. 0.11.1, and 0.11.0 [[GH-12807](https://github.com/hashicorp/nomad/issues/12807)] * cli: add `nomad config validate` command to check configuration files without an agent [[GH-9198](https://github.com/hashicorp/nomad/issues/9198)] * cli: added `-pprof-interval` to `nomad operator debug` command [[GH-11938](https://github.com/hashicorp/nomad/issues/11938)] * cli: display the Raft version instead of the Serf protocol in the `nomad server members` command [[GH-12317](https://github.com/hashicorp/nomad/issues/12317)] * cli: rename the `nomad server members` `-detailed` flag to `-verbose` so it matches other commands [[GH-12317](https://github.com/hashicorp/nomad/issues/12317)] +* client: Added `NOMAD_SHORT_ALLOC_ID` allocation env var [[GH-12603](https://github.com/hashicorp/nomad/issues/12603)] * client: Allow interpolation of the network.dns block [[GH-12021](https://github.com/hashicorp/nomad/issues/12021)] +* client: Download up to 3 artifacts concurrently [[GH-11531](https://github.com/hashicorp/nomad/issues/11531)] * client: Enable support for cgroups v2 [[GH-12274](https://github.com/hashicorp/nomad/issues/12274)] * client: fingerprint AWS instance life cycle option [[GH-12371](https://github.com/hashicorp/nomad/issues/12371)] * client: set NOMAD_CPU_CORES environment variable when reserving cpu cores [[GH-12496](https://github.com/hashicorp/nomad/issues/12496)] +* connect: automatically set alloc_id in envoy_stats_tags configuration [[GH-12543](https://github.com/hashicorp/nomad/issues/12543)] * connect: bootstrap envoy sidecars using -proxy-for [[GH-12011](https://github.com/hashicorp/nomad/issues/12011)] * consul/connect: write Envoy bootstrapping information to disk for debugging [[GH-11975](https://github.com/hashicorp/nomad/issues/11975)] +* consul: Added implicit Consul constraint for task groups utilising Consul service and check registrations [[GH-12602](https://github.com/hashicorp/nomad/issues/12602)] * consul: add go-sockaddr templating support to nomad consul address [[GH-12084](https://github.com/hashicorp/nomad/issues/12084)] * consul: improve service name validation message to include maximum length requirement [[GH-12012](https://github.com/hashicorp/nomad/issues/12012)] * core: Enable configuring raft boltdb freelist sync behavior [[GH-12107](https://github.com/hashicorp/nomad/issues/12107)] @@ -59,11 +360,13 @@ IMPROVEMENTS: * csi: allow namespace field to be passed in volume spec [[GH-12400](https://github.com/hashicorp/nomad/issues/12400)] * deps: Update hashicorp/raft-boltdb to v2.2.0 [[GH-12107](https://github.com/hashicorp/nomad/issues/12107)] * deps: Update serf library to v0.9.7 [[GH-12130](https://github.com/hashicorp/nomad/issues/12130)] +* deps: Updated hashicorp/consul-template to v0.29.0 [[GH-12747](https://github.com/hashicorp/nomad/issues/12747)] * deps: Updated hashicorp/raft to v1.3.5 [[GH-12079](https://github.com/hashicorp/nomad/issues/12079)] * deps: Upgrade kr/pty to creack/pty v1.1.5 [[GH-11855](https://github.com/hashicorp/nomad/issues/11855)] * deps: use gorilla package for gzip http handler [[GH-11843](https://github.com/hashicorp/nomad/issues/11843)] * drainer: defer draining CSI plugin jobs until system jobs are drained [[GH-12324](https://github.com/hashicorp/nomad/issues/12324)] * drivers/raw_exec: Add support for cgroups v2 in raw_exec driver [[GH-12419](https://github.com/hashicorp/nomad/issues/12419)] +* drivers: removed support for restoring tasks created before Nomad 0.9 [[GH-12791](https://github.com/hashicorp/nomad/issues/12791)] * fingerprint: add support for detecting DigitalOcean environment [[GH-12015](https://github.com/hashicorp/nomad/issues/12015)] * metrics: Emit metrics regarding raft boltdb operations [[GH-12107](https://github.com/hashicorp/nomad/issues/12107)] * metrics: emit `nomad.vault.token_last_renewal` and `nomad.vault.token_next_renewal` metrics for Vault token renewal information [[GH-12435](https://github.com/hashicorp/nomad/issues/12435)] @@ -74,10 +377,12 @@ IMPROVEMENTS: * scheduler: recover scheduler goroutines on panic [[GH-12009](https://github.com/hashicorp/nomad/issues/12009)] * server: Transfer Raft leadership in case the Nomad server fails to establish leadership [[GH-12293](https://github.com/hashicorp/nomad/issues/12293)] * server: store and check previous Raft protocol version to prevent downgrades [[GH-12362](https://github.com/hashicorp/nomad/issues/12362)] +* services: Enable setting arbitrary address on Nomad or Consul service registration [[GH-12720](https://github.com/hashicorp/nomad/issues/12720)] * template: Upgraded to from consul-template v0.25.2 to v0.28.0 which includes the sprig library of functions and more. [[GH-12312](https://github.com/hashicorp/nomad/issues/12312)] +* ui: added visual indicators for disconnected allocations and client nodes [[GH-12544](https://github.com/hashicorp/nomad/issues/12544)] * ui: break long service tags into multiple lines [[GH-11995](https://github.com/hashicorp/nomad/issues/11995)] +* ui: change sort-order of evaluations to be reverse-chronological [[GH-12847](https://github.com/hashicorp/nomad/issues/12847)] * ui: make buttons with confirmation more descriptive of their actions [[GH-12252](https://github.com/hashicorp/nomad/issues/12252)] -* vault: support Vault entity aliases when deriving tokens [[GH-12449](https://github.com/hashicorp/nomad/issues/12449)] DEPRECATIONS: @@ -87,8 +392,11 @@ BUG FIXES: * api: Apply prefix filter when querying CSI volumes in all namespaces [[GH-12184](https://github.com/hashicorp/nomad/issues/12184)] * cleanup: prevent leaks from time.After [[GH-11983](https://github.com/hashicorp/nomad/issues/11983)] +* client: Fixed a bug that could prevent a preempting alloc from ever starting. [[GH-12779](https://github.com/hashicorp/nomad/issues/12779)] +* client: Fixed a bug where clients that retry blocking queries would not reset the correct blocking duration [[GH-12593](https://github.com/hashicorp/nomad/issues/12593)] * config: Fixed a bug where the `reservable_cores` setting was not respected [[GH-12044](https://github.com/hashicorp/nomad/issues/12044)] * core: Fixed auto-promotion of canaries in jobs with at least one task group without canaries. [[GH-11878](https://github.com/hashicorp/nomad/issues/11878)] +* core: prevent malformed plans from crashing leader [[GH-11944](https://github.com/hashicorp/nomad/issues/11944)] * csi: Fixed a bug where `plugin status` commands could choose the incorrect plugin if a plugin with a name that matched the same prefix existed. [[GH-12194](https://github.com/hashicorp/nomad/issues/12194)] * csi: Fixed a bug where `volume snapshot list` did not correctly filter by plugin IDs. The `-plugin` parameter is required. [[GH-12197](https://github.com/hashicorp/nomad/issues/12197)] * csi: Fixed a bug where allocations with volume claims would fail their first placement after a reschedule [[GH-12113](https://github.com/hashicorp/nomad/issues/12113)] @@ -96,6 +404,9 @@ BUG FIXES: * csi: Fixed a bug where creating snapshots required a plugin ID instead of falling back to the volume's plugin ID [[GH-12195](https://github.com/hashicorp/nomad/issues/12195)] * csi: Fixed a bug where fields were missing from the Read Volume API response [[GH-12178](https://github.com/hashicorp/nomad/issues/12178)] * csi: Fixed a bug where garbage collected nodes would block releasing a volume [[GH-12350](https://github.com/hashicorp/nomad/issues/12350)] +* csi: Fixed a bug where per-alloc volumes used the incorrect ID when querying for `alloc status -verbose` [[GH-12573](https://github.com/hashicorp/nomad/issues/12573)] +* csi: Fixed a bug where plugin configuration updates were not considered destructive [[GH-12774](https://github.com/hashicorp/nomad/issues/12774)] +* csi: Fixed a bug where plugins would not restart if they failed any time after a client restart [[GH-12752](https://github.com/hashicorp/nomad/issues/12752)] * csi: Fixed a bug where plugins written in NodeJS could fail to fingerprint [[GH-12359](https://github.com/hashicorp/nomad/issues/12359)] * csi: Fixed a bug where purging a job with a missing plugin would fail [[GH-12114](https://github.com/hashicorp/nomad/issues/12114)] * csi: Fixed a bug where single-use access modes were not enforced during validation [[GH-12337](https://github.com/hashicorp/nomad/issues/12337)] @@ -103,14 +414,19 @@ BUG FIXES: * csi: Fixed a bug where the plugin instance manager would not retry the initial gRPC connection to plugins [[GH-12057](https://github.com/hashicorp/nomad/issues/12057)] * csi: Fixed a bug where the plugin supervisor would not restart the task if it failed to connect to the plugin [[GH-12057](https://github.com/hashicorp/nomad/issues/12057)] * csi: Fixed a bug where volume snapshot timestamps were always zero values [[GH-12352](https://github.com/hashicorp/nomad/issues/12352)] +* csi: Fixed bug where accessing plugins was subject to a data race [[GH-12553](https://github.com/hashicorp/nomad/issues/12553)] * csi: fixed a bug where `volume detach`, `volume deregister`, and `volume status` commands did not accept an exact ID if multiple volumes matched the prefix [[GH-12051](https://github.com/hashicorp/nomad/issues/12051)] * csi: provide `CSI_ENDPOINT` environment variable to plugin tasks [[GH-12050](https://github.com/hashicorp/nomad/issues/12050)] +* jobspec: Fixed a bug where connect sidecar resources were ignored when using HCL1 [[GH-11927](https://github.com/hashicorp/nomad/issues/11927)] * lifecycle: Fixed a bug where successful poststart tasks were marked as unhealthy [[GH-11945](https://github.com/hashicorp/nomad/issues/11945)] * recommendations (Enterprise): Fixed a bug where the recommendations list RPC incorrectly forwarded requests to the authoritative region [[GH-12040](https://github.com/hashicorp/nomad/issues/12040)] * scheduler: fixed a bug where in-place updates on ineligible nodes would be ignored [[GH-12264](https://github.com/hashicorp/nomad/issues/12264)] * server: Write peers.json file with correct permissions [[GH-12369](https://github.com/hashicorp/nomad/issues/12369)] * template: Fixed a bug preventing allowing all consul-template functions. [[GH-12312](https://github.com/hashicorp/nomad/issues/12312)] +* template: Fixed a bug where the default `function_denylist` would be appended to a specified list [[GH-12071](https://github.com/hashicorp/nomad/issues/12071)] * ui: Fix the link target for CSI volumes on the task detail page [[GH-11896](https://github.com/hashicorp/nomad/issues/11896)] +* ui: Fixed a bug where volumes were being incorrectly linked when per_alloc=true [[GH-12713](https://github.com/hashicorp/nomad/issues/12713)] +* ui: fix broken link to task-groups in the Recent Allocations table in the Job Detail overview page. [[GH-12765](https://github.com/hashicorp/nomad/issues/12765)] * ui: fix the unit for the task row memory usage metric [[GH-11980](https://github.com/hashicorp/nomad/issues/11980)] ## 1.2.6 (February 9, 2022) diff --git a/CODEOWNERS b/CODEOWNERS index fd8112e6f83..93a12a6106b 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,4 +1,4 @@ # release configuration -/.release/ @hashicorp/release-engineering -/.github/workflows/build.yml @hashicorp/release-engineering +/.release/ @hashicorp/release-engineering @hashicorp/github-nomad-core +/.github/workflows/build.yml @hashicorp/release-engineering @hashicorp/github-nomad-core diff --git a/GNUmakefile b/GNUmakefile index d8cd2585370..8ef9206a641 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -21,7 +21,7 @@ ifndef BIN BIN := $(GOPATH)/bin endif -GO_TAGS ?= +GO_TAGS ?= osusergo ifeq ($(CI),true) GO_TAGS := codegen_generated $(GO_TAGS) @@ -32,24 +32,12 @@ ifndef NOMAD_NO_UI GO_TAGS := ui $(GO_TAGS) endif -ifeq ($(CIRCLECI),true) -GO_TEST_CMD = $(if $(shell command -v gotestsum 2>/dev/null),gotestsum --,go test) -else -GO_TEST_CMD = go test -endif - -ifeq ($(origin GOTEST_PKGS_EXCLUDE), undefined) -GOTEST_PKGS ?= "./..." -else -GOTEST_PKGS=$(shell go list ./... | sed 's/github.com\/hashicorp\/nomad/./' | egrep -v "^($(GOTEST_PKGS_EXCLUDE))(/.*)?$$") -endif - # tag corresponding to latest release we maintain backward compatibility with PROTO_COMPARE_TAG ?= v1.0.3$(if $(findstring ent,$(GO_TAGS)),+ent,) # LAST_RELEASE is the git sha of the latest release corresponding to this branch. main should have the latest # published release, and release branches should point to the latest published release in the X.Y release line. -LAST_RELEASE ?= v1.2.6 +LAST_RELEASE ?= v1.3.12 default: help @@ -75,6 +63,11 @@ ifeq (FreeBSD,$(THIS_OS)) ALL_TARGETS = freebsd_amd64 endif +# Allow overriding ALL_TARGETS via $TARGETS +ifdef TARGETS +ALL_TARGETS = $(TARGETS) +endif + SUPPORTED_OSES = Darwin Linux FreeBSD Windows MSYS_NT CGO_ENABLED = 1 @@ -130,22 +123,22 @@ deps: ## Install build and development dependencies go install github.com/hashicorp/go-bindata/go-bindata@bf7910af899725e4938903fb32048c7c0b15f12e go install github.com/elazarl/go-bindata-assetfs/go-bindata-assetfs@234c15e7648ff35458026de92b34c637bae5e6f7 go install github.com/a8m/tree/cmd/tree@fce18e2a750ea4e7f53ee706b1c3d9cbb22de79c - go install gotest.tools/gotestsum@v1.7.0 - go install github.com/hashicorp/hcl/v2/cmd/hclfmt@v2.5.1 + go install gotest.tools/gotestsum@v1.8.2 + go install github.com/hashicorp/hcl/v2/cmd/hclfmt@d0c4fa8b0bbc2e4eeccd1ed2a32c2089ed8c5cf1 go install github.com/golang/protobuf/protoc-gen-go@v1.3.4 go install github.com/hashicorp/go-msgpack/codec/codecgen@v1.1.5 go install github.com/bufbuild/buf/cmd/buf@v0.36.0 go install github.com/hashicorp/go-changelog/cmd/changelog-build@latest - go install golang.org/x/tools/cmd/stringer@v0.1.8 - go install gophers.dev/cmds/hc-install/cmd/hc-install@v1.0.1 + go install golang.org/x/tools/cmd/stringer@v0.1.12 + go install github.com/hashicorp/hc-install/cmd/hc-install@v0.5.0 .PHONY: lint-deps lint-deps: ## Install linter dependencies ## Keep versions in sync with tools/go.mod (see https://github.com/golang/go/issues/30515) @echo "==> Updating linter dependencies..." - go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.45.0 + go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.51.1 go install github.com/client9/misspell/cmd/misspell@v0.3.4 - go install github.com/hashicorp/go-hclog/hclogvet@v0.1.3 + go install github.com/hashicorp/go-hclog/hclogvet@v0.1.6 .PHONY: git-hooks git-dir = $(shell git rev-parse --git-dir) @@ -179,6 +172,9 @@ check: ## Lint the source code @echo "==> Check API package is isolated from rest" @cd ./api && if go list --test -f '{{ join .Deps "\n" }}' . | grep github.com/hashicorp/nomad/ | grep -v -e /nomad/api/ -e nomad/api.test; then echo " /api package depends the ^^ above internal nomad packages. Remove such dependency"; exit 1; fi + @echo "==> Check command package does not import structs" + @cd ./command && if go list -f '{{ join .Imports "\n" }}' . | grep github.com/hashicorp/nomad/nomad/structs; then echo " /command package imports the structs pkg. Remove such import"; exit 1; fi + @echo "==> Checking Go mod.." @GO111MODULE=on $(MAKE) tidy @if (git status --porcelain | grep -Eq "go\.(mod|sum)"); then \ @@ -279,26 +275,12 @@ release: clean $(foreach t,$(ALL_TARGETS),pkg/$(t).zip) ## Build all release pac @echo "==> Results:" @tree --dirsfirst $(PROJECT_ROOT)/pkg -.PHONY: test -test: ## Run the Nomad test suite and/or the Nomad UI test suite - @if [ ! $(SKIP_NOMAD_TESTS) ]; then \ - make test-nomad; \ - fi - @if [ $(RUN_WEBSITE_TESTS) ]; then \ - make test-website; \ - fi - @if [ $(RUN_UI_TESTS) ]; then \ - make test-ui; \ - fi - @if [ $(RUN_E2E_TESTS) ]; then \ - make e2e-test; \ - fi - .PHONY: test-nomad -test-nomad: dev ## Run Nomad test suites - @echo "==> Running Nomad test suites:" - $(if $(ENABLE_RACE),GORACE="strip_path_prefix=$(GOPATH)/src") $(GO_TEST_CMD) \ - $(if $(ENABLE_RACE),-race) $(if $(VERBOSE),-v) \ +test-nomad: GOTEST_PKGS=$(shell go run -modfile=tools/go.mod tools/missing/main.go ci/test-core.json $(GOTEST_GROUP)) +test-nomad: # dev ## Run Nomad unit tests + @echo "==> Running Nomad unit tests $(GOTEST_GROUP)" + @echo "==> with packages $(GOTEST_PKGS)" + gotestsum --format=testname --rerun-fails=3 --packages="$(GOTEST_PKGS)" -- \ -cover \ -timeout=20m \ -count=1 \ @@ -306,13 +288,13 @@ test-nomad: dev ## Run Nomad test suites $(GOTEST_PKGS) .PHONY: test-nomad-module -test-nomad-module: dev ## Run Nomad test suites on a sub-module - @echo "==> Running Nomad test suites on sub-module $(GOTEST_MOD)" - @cd $(GOTEST_MOD) && $(if $(ENABLE_RACE),GORACE="strip_path_prefix=$(GOPATH)/src") $(GO_TEST_CMD) \ - $(if $(ENABLE_RACE),-race) $(if $(VERBOSE),-v) \ +test-nomad-module: dev ## Run Nomad unit tests on sub-module + @echo "==> Running Nomad unit tests on sub-module $(GOTEST_MOD)" + cd $(GOTEST_MOD); gotestsum --format=testname --rerun-fails=3 --packages=./... -- \ -cover \ -timeout=20m \ -count=1 \ + -race \ -tags "$(GO_TAGS)" \ ./... @@ -421,4 +403,9 @@ endif .PHONY: missing missing: ## Check for packages not being tested @echo "==> Checking for packages not being tested ..." - @go run -modfile tools/go.mod tools/missing/main.go .github/workflows/test-core.yaml + @go run -modfile tools/go.mod tools/missing/main.go ci/test-core.json + +.PHONY: ec2info +ec2info: ## Generate AWS EC2 CPU specification table + @echo "==> Generating AWS EC2 specifications ..." + @go run -modfile tools/go.mod tools/ec2info/main.go diff --git a/acl/acl.go b/acl/acl.go index 57b64814f3c..9a4438ab2fc 100644 --- a/acl/acl.go +++ b/acl/acl.go @@ -9,6 +9,9 @@ import ( glob "github.com/ryanuber/go-glob" ) +// Redefine this value from structs to avoid circular dependency. +const AllNamespacesSentinel = "*" + // ManagementACL is a singleton used for management tokens var ManagementACL *ACL @@ -215,13 +218,32 @@ func (a *ACL) AllowNsOp(ns string, op string) bool { return a.AllowNamespaceOperation(ns, op) } -// AllowNamespaceOperation checks if a given operation is allowed for a namespace +// AllowNsOpFunc is a helper that returns a function that can be used to check +// namespace permissions. +func (a *ACL) AllowNsOpFunc(ops ...string) func(string) bool { + return func(ns string) bool { + return NamespaceValidator(ops...)(a, ns) + } +} + +// AllowNamespaceOperation checks if a given operation is allowed for a namespace. func (a *ACL) AllowNamespaceOperation(ns string, op string) bool { + // Hot path if ACL is not enabled. + if a == nil { + return true + } + // Hot path management tokens if a.management { return true } + // If using the all namespaces wildcard, allow if any namespace allows the + // operation. + if ns == AllNamespacesSentinel && a.anyNamespaceAllowsOp(op) { + return true + } + // Check for a matching capability set capabilities, ok := a.matchingNamespaceCapabilitySet(ns) if !ok { @@ -234,11 +256,22 @@ func (a *ACL) AllowNamespaceOperation(ns string, op string) bool { // AllowNamespace checks if any operations are allowed for a namespace func (a *ACL) AllowNamespace(ns string) bool { + // Hot path if ACL is not enabled. + if a == nil { + return true + } + // Hot path management tokens if a.management { return true } + // If using the all namespaces wildcard, allow if any namespace allows any + // operation. + if ns == AllNamespacesSentinel && a.anyNamespaceAllowsAnyOp() { + return true + } + // Check for a matching capability set capabilities, ok := a.matchingNamespaceCapabilitySet(ns) if !ok { @@ -307,6 +340,42 @@ func (a *ACL) matchingNamespaceCapabilitySet(ns string) (capabilitySet, bool) { return a.findClosestMatchingGlob(a.wildcardNamespaces, ns) } +// anyNamespaceAllowsOp returns true if any namespace in ACL object allows the +// given operation. +func (a *ACL) anyNamespaceAllowsOp(op string) bool { + return a.anyNamespaceAllows(func(c capabilitySet) bool { + return c.Check(op) + }) +} + +// anyNamespaceAllowsAnyOp returns true if any namespace in ACL object allows +// at least one operation. +func (a *ACL) anyNamespaceAllowsAnyOp() bool { + return a.anyNamespaceAllows(func(c capabilitySet) bool { + return len(c) > 0 && !c.Check(PolicyDeny) + }) +} + +// anyNamespaceAllows returns true if the callback cb returns true for any +// namespace operation of the ACL object. +func (a *ACL) anyNamespaceAllows(cb func(capabilitySet) bool) bool { + allow := false + + checkFn := func(_ []byte, iv interface{}) bool { + v := iv.(capabilitySet) + allow = cb(v) + return allow + } + + a.namespaces.Root().Walk(checkFn) + if allow { + return true + } + + a.wildcardNamespaces.Root().Walk(checkFn) + return allow +} + // matchingHostVolumeCapabilitySet looks for a capabilitySet that matches the host volume name, // if no concrete definitions are found, then we return the closest matching // glob. diff --git a/acl/acl_test.go b/acl/acl_test.go index 2ac22f70264..872d0728c16 100644 --- a/acl/acl_test.go +++ b/acl/acl_test.go @@ -5,6 +5,7 @@ import ( "github.com/hashicorp/nomad/ci" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestCapabilitySet(t *testing.T) { @@ -234,42 +235,89 @@ func TestAllowNamespace(t *testing.T) { ci.Parallel(t) tests := []struct { - Policy string - Allow bool + name string + policy string + allow bool + namespace string }{ { - Policy: `namespace "foo" {}`, - Allow: false, + name: "foo namespace - no capabilities", + policy: `namespace "foo" {}`, + allow: false, + namespace: "foo", }, { - Policy: `namespace "foo" { policy = "deny" }`, - Allow: false, + name: "foo namespace - deny policy", + policy: `namespace "foo" { policy = "deny" }`, + allow: false, + namespace: "foo", }, { - Policy: `namespace "foo" { capabilities = ["deny"] }`, - Allow: false, + name: "foo namespace - deny capability", + policy: `namespace "foo" { capabilities = ["deny"] }`, + allow: false, + namespace: "foo", }, { - Policy: `namespace "foo" { capabilities = ["list-jobs"] }`, - Allow: true, + name: "foo namespace - with capability", + policy: `namespace "foo" { capabilities = ["list-jobs"] }`, + allow: true, + namespace: "foo", }, { - Policy: `namespace "foo" { policy = "read" }`, - Allow: true, + name: "foo namespace - with policy", + policy: `namespace "foo" { policy = "read" }`, + allow: true, + namespace: "foo", + }, + { + name: "wildcard namespace - no capabilities", + policy: `namespace "foo" {}`, + allow: false, + namespace: "*", + }, + { + name: "wildcard namespace - deny policy", + policy: `namespace "foo" { policy = "deny" }`, + allow: false, + namespace: "*", + }, + { + name: "wildcard namespace - deny capability", + policy: `namespace "foo" { capabilities = ["deny"] }`, + allow: false, + namespace: "*", + }, + { + name: "wildcard namespace - with capability", + policy: `namespace "foo" { capabilities = ["list-jobs"] }`, + allow: true, + namespace: "*", + }, + { + name: "wildcard namespace - with policy", + policy: `namespace "foo" { policy = "read" }`, + allow: true, + namespace: "*", + }, + { + name: "wildcard namespace - no namespace rule", + policy: `agent { policy = "read" }`, + allow: false, + namespace: "*", }, } for _, tc := range tests { - t.Run(tc.Policy, func(t *testing.T) { - assert := assert.New(t) - - policy, err := Parse(tc.Policy) - assert.Nil(err) + t.Run(tc.name, func(t *testing.T) { + policy, err := Parse(tc.policy) + require.NoError(t, err) acl, err := NewACL(false, []*Policy{policy}) - assert.Nil(err) + require.NoError(t, err) - assert.Equal(tc.Allow, acl.AllowNamespace("foo")) + got := acl.AllowNamespace(tc.namespace) + require.Equal(t, tc.allow, got) }) } } @@ -278,51 +326,71 @@ func TestWildcardNamespaceMatching(t *testing.T) { ci.Parallel(t) tests := []struct { - Policy string - Allow bool + name string + policy string + allow bool + namespace string }{ - { // Wildcard matches - Policy: `namespace "prod-api-*" { policy = "write" }`, - Allow: true, + { + name: "wildcard matches", + policy: `namespace "prod-api-*" { policy = "write" }`, + allow: true, + namespace: "prod-api-services", }, - { // Non globbed namespaces are not wildcards - Policy: `namespace "prod-api" { policy = "write" }`, - Allow: false, + { + name: "non globbed namespaces are not wildcards", + policy: `namespace "prod-api" { policy = "write" }`, + allow: false, + namespace: "prod-api-services", }, - { // Concrete matches take precedence - Policy: `namespace "prod-api-services" { policy = "deny" } + { + name: "concrete matches take precedence", + policy: `namespace "prod-api-services" { policy = "deny" } namespace "prod-api-*" { policy = "write" }`, - Allow: false, + allow: false, + namespace: "prod-api-services", }, { - Policy: `namespace "prod-api-*" { policy = "deny" } + name: "glob match", + policy: `namespace "prod-api-*" { policy = "deny" } namespace "prod-api-services" { policy = "write" }`, - Allow: true, + allow: true, + namespace: "prod-api-services", }, - { // The closest character match wins - Policy: `namespace "*-api-services" { policy = "deny" } + { + name: "closest character match wins - suffix", + policy: `namespace "*-api-services" { policy = "deny" } namespace "prod-api-*" { policy = "write" }`, // 4 vs 8 chars - Allow: false, + allow: false, + namespace: "prod-api-services", }, { - Policy: `namespace "prod-api-*" { policy = "write" } + name: "closest character match wins - prefix", + policy: `namespace "prod-api-*" { policy = "write" } namespace "*-api-services" { policy = "deny" }`, // 4 vs 8 chars - Allow: false, + allow: false, + namespace: "prod-api-services", + }, + { + name: "wildcard namespace with glob match", + policy: `namespace "prod-api-*" { policy = "deny" } + namespace "prod-api-services" { policy = "write" }`, + allow: true, + namespace: "*", }, } for _, tc := range tests { - t.Run(tc.Policy, func(t *testing.T) { - assert := assert.New(t) - - policy, err := Parse(tc.Policy) - assert.NoError(err) - assert.NotNil(policy.Namespaces) + t.Run(tc.name, func(t *testing.T) { + policy, err := Parse(tc.policy) + require.NoError(t, err) + require.NotNil(t, policy.Namespaces) acl, err := NewACL(false, []*Policy{policy}) - assert.Nil(err) + require.NoError(t, err) - assert.Equal(tc.Allow, acl.AllowNamespace("prod-api-services")) + got := acl.AllowNamespace(tc.namespace) + require.Equal(t, tc.allow, got) }) } } diff --git a/acl/policy.go b/acl/policy.go index 95df4a280be..5bc5cd9dca0 100644 --- a/acl/policy.go +++ b/acl/policy.go @@ -8,7 +8,7 @@ import ( ) const ( - // The following levels are the only valid values for the `policy = "read"` stanza. + // The following levels are the only valid values for the `policy = "read"` block. // When policies are merged together, the most privilege is granted, except for deny // which always takes precedence and supersedes. PolicyDeny = "deny" @@ -20,7 +20,7 @@ const ( const ( // The following are the fine-grained capabilities that can be granted within a namespace. - // The Policy stanza is a short hand for granting several of these. When capabilities are + // The Policy block is a short hand for granting several of these. When capabilities are // combined we take the union of all capabilities. If the deny capability is present, it // takes precedence and overwrites all other capabilities. @@ -54,7 +54,7 @@ var ( const ( // The following are the fine-grained capabilities that can be granted for a volume set. - // The Policy stanza is a short hand for granting several of these. When capabilities are + // The Policy block is a short hand for granting several of these. When capabilities are // combined we take the union of all capabilities. If the deny capability is present, it // takes precedence and overwrites all other capabilities. diff --git a/api/acl.go b/api/acl.go index 0652e409c52..486bbcb5e3a 100644 --- a/api/acl.go +++ b/api/acl.go @@ -1,7 +1,7 @@ package api import ( - "fmt" + "errors" "time" ) @@ -28,7 +28,7 @@ func (a *ACLPolicies) List(q *QueryOptions) ([]*ACLPolicyListStub, *QueryMeta, e // Upsert is used to create or update a policy func (a *ACLPolicies) Upsert(policy *ACLPolicy, q *WriteOptions) (*WriteMeta, error) { if policy == nil || policy.Name == "" { - return nil, fmt.Errorf("missing policy name") + return nil, errors.New("missing policy name") } wm, err := a.client.write("/v1/acl/policy/"+policy.Name, policy, nil, q) if err != nil { @@ -40,9 +40,9 @@ func (a *ACLPolicies) Upsert(policy *ACLPolicy, q *WriteOptions) (*WriteMeta, er // Delete is used to delete a policy func (a *ACLPolicies) Delete(policyName string, q *WriteOptions) (*WriteMeta, error) { if policyName == "" { - return nil, fmt.Errorf("missing policy name") + return nil, errors.New("missing policy name") } - wm, err := a.client.delete("/v1/acl/policy/"+policyName, nil, q) + wm, err := a.client.delete("/v1/acl/policy/"+policyName, nil, nil, q) if err != nil { return nil, err } @@ -52,7 +52,7 @@ func (a *ACLPolicies) Delete(policyName string, q *WriteOptions) (*WriteMeta, er // Info is used to query a specific policy func (a *ACLPolicies) Info(policyName string, q *QueryOptions) (*ACLPolicy, *QueryMeta, error) { if policyName == "" { - return nil, nil, fmt.Errorf("missing policy name") + return nil, nil, errors.New("missing policy name") } var resp ACLPolicy wm, err := a.client.query("/v1/acl/policy/"+policyName, &resp, q) @@ -72,6 +72,7 @@ func (c *Client) ACLTokens() *ACLTokens { return &ACLTokens{client: c} } +// DEPRECATED: will be removed in Nomad 1.5.0 // Bootstrap is used to get the initial bootstrap token func (a *ACLTokens) Bootstrap(q *WriteOptions) (*ACLToken, *WriteMeta, error) { var resp ACLToken @@ -82,6 +83,23 @@ func (a *ACLTokens) Bootstrap(q *WriteOptions) (*ACLToken, *WriteMeta, error) { return &resp, wm, nil } +// BootstrapOpts is used to get the initial bootstrap token or pass in the one that was provided in the API +func (a *ACLTokens) BootstrapOpts(btoken string, q *WriteOptions) (*ACLToken, *WriteMeta, error) { + if q == nil { + q = &WriteOptions{} + } + req := &BootstrapRequest{ + BootstrapSecret: btoken, + } + + var resp ACLToken + wm, err := a.client.write("/v1/acl/bootstrap", req, &resp, q) + if err != nil { + return nil, nil, err + } + return &resp, wm, nil +} + // List is used to dump all of the tokens. func (a *ACLTokens) List(q *QueryOptions) ([]*ACLTokenListStub, *QueryMeta, error) { var resp []*ACLTokenListStub @@ -95,7 +113,7 @@ func (a *ACLTokens) List(q *QueryOptions) ([]*ACLTokenListStub, *QueryMeta, erro // Create is used to create a token func (a *ACLTokens) Create(token *ACLToken, q *WriteOptions) (*ACLToken, *WriteMeta, error) { if token.AccessorID != "" { - return nil, nil, fmt.Errorf("cannot specify Accessor ID") + return nil, nil, errors.New("cannot specify Accessor ID") } var resp ACLToken wm, err := a.client.write("/v1/acl/token", token, &resp, q) @@ -108,7 +126,7 @@ func (a *ACLTokens) Create(token *ACLToken, q *WriteOptions) (*ACLToken, *WriteM // Update is used to update an existing token func (a *ACLTokens) Update(token *ACLToken, q *WriteOptions) (*ACLToken, *WriteMeta, error) { if token.AccessorID == "" { - return nil, nil, fmt.Errorf("missing accessor ID") + return nil, nil, errors.New("missing accessor ID") } var resp ACLToken wm, err := a.client.write("/v1/acl/token/"+token.AccessorID, @@ -122,9 +140,9 @@ func (a *ACLTokens) Update(token *ACLToken, q *WriteOptions) (*ACLToken, *WriteM // Delete is used to delete a token func (a *ACLTokens) Delete(accessorID string, q *WriteOptions) (*WriteMeta, error) { if accessorID == "" { - return nil, fmt.Errorf("missing accessor ID") + return nil, errors.New("missing accessor ID") } - wm, err := a.client.delete("/v1/acl/token/"+accessorID, nil, q) + wm, err := a.client.delete("/v1/acl/token/"+accessorID, nil, nil, q) if err != nil { return nil, err } @@ -134,7 +152,7 @@ func (a *ACLTokens) Delete(accessorID string, q *WriteOptions) (*WriteMeta, erro // Info is used to query a token func (a *ACLTokens) Info(accessorID string, q *QueryOptions) (*ACLToken, *QueryMeta, error) { if accessorID == "" { - return nil, nil, fmt.Errorf("missing accessor ID") + return nil, nil, errors.New("missing accessor ID") } var resp ACLToken wm, err := a.client.query("/v1/acl/token/"+accessorID, &resp, q) @@ -162,7 +180,7 @@ func (a *ACLTokens) UpsertOneTimeToken(q *WriteOptions) (*OneTimeToken, *WriteMe return nil, nil, err } if resp == nil { - return nil, nil, fmt.Errorf("no one-time token returned") + return nil, nil, errors.New("no one-time token returned") } return resp.OneTimeToken, wm, nil } @@ -170,7 +188,7 @@ func (a *ACLTokens) UpsertOneTimeToken(q *WriteOptions) (*OneTimeToken, *WriteMe // ExchangeOneTimeToken is used to create a one-time token func (a *ACLTokens) ExchangeOneTimeToken(secret string, q *WriteOptions) (*ACLToken, *WriteMeta, error) { if secret == "" { - return nil, nil, fmt.Errorf("missing secret ID") + return nil, nil, errors.New("missing secret ID") } req := &OneTimeTokenExchangeRequest{OneTimeSecretID: secret} var resp *OneTimeTokenExchangeResponse @@ -179,7 +197,7 @@ func (a *ACLTokens) ExchangeOneTimeToken(secret string, q *WriteOptions) (*ACLTo return nil, nil, err } if resp == nil { - return nil, nil, fmt.Errorf("no ACL token returned") + return nil, nil, errors.New("no ACL token returned") } return resp.Token, wm, nil } @@ -244,3 +262,8 @@ type OneTimeTokenExchangeRequest struct { type OneTimeTokenExchangeResponse struct { Token *ACLToken } + +// BootstrapRequest is used for when operators provide an ACL Bootstrap Token +type BootstrapRequest struct { + BootstrapSecret string +} diff --git a/api/acl_test.go b/api/acl_test.go index 6e32df71c25..b480af5aaef 100644 --- a/api/acl_test.go +++ b/api/acl_test.go @@ -4,26 +4,21 @@ import ( "testing" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/stretchr/testify/assert" + "github.com/shoenig/test/must" ) func TestACLPolicies_ListUpsert(t *testing.T) { testutil.Parallel(t) + c, s, _ := makeACLClient(t, nil, nil) defer s.Stop() ap := c.ACLPolicies() // Listing when nothing exists returns empty result, qm, err := ap.List(nil) - if err != nil { - t.Fatalf("err: %s", err) - } - if qm.LastIndex != 1 { - t.Fatalf("bad index: %d", qm.LastIndex) - } - if n := len(result); n != 0 { - t.Fatalf("expected 0 policies, got: %d", n) - } + must.NoError(t, err) + must.One(t, qm.LastIndex) + must.Len(t, 0, result) // Register a policy policy := &ACLPolicy{ @@ -35,22 +30,20 @@ func TestACLPolicies_ListUpsert(t *testing.T) { `, } wm, err := ap.Upsert(policy, nil) - assert.Nil(t, err) + must.NoError(t, err) assertWriteMeta(t, wm) // Check the list again result, qm, err = ap.List(nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) + assertQueryMeta(t, qm) - if len(result) != 1 { - t.Fatalf("expected policy, got: %#v", result) - } + must.Len(t, 1, result) } func TestACLPolicies_Delete(t *testing.T) { testutil.Parallel(t) + c, s, _ := makeACLClient(t, nil, nil) defer s.Stop() ap := c.ACLPolicies() @@ -65,27 +58,25 @@ func TestACLPolicies_Delete(t *testing.T) { `, } wm, err := ap.Upsert(policy, nil) - assert.Nil(t, err) + must.NoError(t, err) assertWriteMeta(t, wm) // Delete the policy wm, err = ap.Delete(policy.Name, nil) - assert.Nil(t, err) + must.NoError(t, err) assertWriteMeta(t, wm) // Check the list again result, qm, err := ap.List(nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) + assertQueryMeta(t, qm) - if len(result) != 0 { - t.Fatalf("unexpected policy, got: %#v", result) - } + must.Len(t, 0, result) } func TestACLPolicies_Info(t *testing.T) { testutil.Parallel(t) + c, s, _ := makeACLClient(t, nil, nil) defer s.Stop() ap := c.ACLPolicies() @@ -100,33 +91,29 @@ func TestACLPolicies_Info(t *testing.T) { `, } wm, err := ap.Upsert(policy, nil) - assert.Nil(t, err) + must.NoError(t, err) assertWriteMeta(t, wm) // Query the policy out, qm, err := ap.Info(policy.Name, nil) - assert.Nil(t, err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.Equal(t, policy.Name, out.Name) + must.Eq(t, policy.Name, out.Name) } func TestACLTokens_List(t *testing.T) { testutil.Parallel(t) + c, s, _ := makeACLClient(t, nil, nil) defer s.Stop() at := c.ACLTokens() // Expect out bootstrap token result, qm, err := at.List(nil) - if err != nil { - t.Fatalf("err: %s", err) - } - if qm.LastIndex == 0 { - t.Fatalf("bad index: %d", qm.LastIndex) - } - if n := len(result); n != 1 { - t.Fatalf("expected 1 token, got: %d", n) - } + + must.NoError(t, err) + must.NonZero(t, qm.LastIndex) + must.Len(t, 1, result) } func TestACLTokens_CreateUpdate(t *testing.T) { @@ -143,19 +130,19 @@ func TestACLTokens_CreateUpdate(t *testing.T) { // Create the token out, wm, err := at.Create(token, nil) - assert.Nil(t, err) + must.NoError(t, err) assertWriteMeta(t, wm) - assert.NotNil(t, out) + must.NotNil(t, out) // Update the token out.Name = "other" out2, wm, err := at.Update(out, nil) - assert.Nil(t, err) + must.NoError(t, err) assertWriteMeta(t, wm) - assert.NotNil(t, out2) + must.NotNil(t, out2) // Verify the change took hold - assert.Equal(t, out.Name, out2.Name) + must.Eq(t, out.Name, out2.Name) } func TestACLTokens_Info(t *testing.T) { @@ -172,19 +159,20 @@ func TestACLTokens_Info(t *testing.T) { // Create the token out, wm, err := at.Create(token, nil) - assert.Nil(t, err) + must.NoError(t, err) assertWriteMeta(t, wm) - assert.NotNil(t, out) + must.NotNil(t, out) // Query the token out2, qm, err := at.Info(out.AccessorID, nil) - assert.Nil(t, err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.Equal(t, out, out2) + must.Eq(t, out, out2) } func TestACLTokens_Self(t *testing.T) { testutil.Parallel(t) + c, s, _ := makeACLClient(t, nil, nil) defer s.Stop() at := c.ACLTokens() @@ -197,9 +185,9 @@ func TestACLTokens_Self(t *testing.T) { // Create the token out, wm, err := at.Create(token, nil) - assert.Nil(t, err) + must.NoError(t, err) assertWriteMeta(t, wm) - assert.NotNil(t, out) + must.NotNil(t, out) // Set the clients token to the new token c.SetSecretID(out.SecretID) @@ -207,14 +195,14 @@ func TestACLTokens_Self(t *testing.T) { // Query the token out2, qm, err := at.Self(nil) - if assert.Nil(t, err) { - assertQueryMeta(t, qm) - assert.Equal(t, out, out2) - } + must.NoError(t, err) + assertQueryMeta(t, qm) + must.Eq(t, out, out2) } func TestACLTokens_Delete(t *testing.T) { testutil.Parallel(t) + c, s, _ := makeACLClient(t, nil, nil) defer s.Stop() at := c.ACLTokens() @@ -227,18 +215,19 @@ func TestACLTokens_Delete(t *testing.T) { // Create the token out, wm, err := at.Create(token, nil) - assert.Nil(t, err) + must.NoError(t, err) assertWriteMeta(t, wm) - assert.NotNil(t, out) + must.NotNil(t, out) // Delete the token wm, err = at.Delete(out.AccessorID, nil) - assert.Nil(t, err) + must.NoError(t, err) assertWriteMeta(t, wm) } func TestACL_OneTimeToken(t *testing.T) { testutil.Parallel(t) + c, s, _ := makeACLClient(t, nil, nil) defer s.Stop() at := c.ACLTokens() @@ -251,21 +240,52 @@ func TestACL_OneTimeToken(t *testing.T) { // Create the ACL token out, wm, err := at.Create(token, nil) - assert.Nil(t, err) + must.NoError(t, err) assertWriteMeta(t, wm) - assert.NotNil(t, out) + must.NotNil(t, out) // Get a one-time token c.SetSecretID(out.SecretID) out2, wm, err := at.UpsertOneTimeToken(nil) - assert.Nil(t, err) + must.NoError(t, err) assertWriteMeta(t, wm) - assert.NotNil(t, out2) + must.NotNil(t, out2) // Exchange the one-time token out3, wm, err := at.ExchangeOneTimeToken(out2.OneTimeSecretID, nil) - assert.Nil(t, err) + must.NoError(t, err) + assertWriteMeta(t, wm) + must.NotNil(t, out3) + must.Eq(t, out.AccessorID, out3.AccessorID) +} + +func TestACLTokens_BootstrapInvalidToken(t *testing.T) { + testutil.Parallel(t) + + c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { + c.ACL.Enabled = true + }) + defer s.Stop() + at := c.ACLTokens() + + bootkn := "badtoken" + // Bootstrap with invalid token + _, _, err := at.BootstrapOpts(bootkn, nil) + must.EqError(t, err, "Unexpected response code: 400 (invalid acl token)") +} + +func TestACLTokens_BootstrapValidToken(t *testing.T) { + testutil.Parallel(t) + c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { + c.ACL.Enabled = true + }) + defer s.Stop() + at := c.ACLTokens() + + bootkn := "2b778dd9-f5f1-6f29-b4b4-9a5fa948757a" + // Bootstrap with Valid token + out, wm, err := at.BootstrapOpts(bootkn, nil) + must.NoError(t, err) assertWriteMeta(t, wm) - assert.NotNil(t, out3) - assert.Equal(t, out3.AccessorID, out.AccessorID) + must.Eq(t, bootkn, out.SecretID) } diff --git a/api/agent.go b/api/agent.go index 2d19b953693..29d18af70a5 100644 --- a/api/agent.go +++ b/api/agent.go @@ -3,7 +3,7 @@ package api import ( "encoding/json" "fmt" - "io/ioutil" + "io" "net/url" "strconv" ) @@ -399,7 +399,7 @@ func (a *Agent) pprofRequest(req string, opts PprofOptions, q *QueryOptions) ([] return nil, err } - resp, err := ioutil.ReadAll(body) + resp, err := io.ReadAll(body) if err != nil { return nil, err } diff --git a/api/agent_test.go b/api/agent_test.go index 9598ba88b4a..e8a0ec9bd46 100644 --- a/api/agent_test.go +++ b/api/agent_test.go @@ -1,22 +1,18 @@ package api import ( - "fmt" - "net/http" - "reflect" "sort" "strings" "testing" "time" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/kr/pretty" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" ) func TestAgent_Self(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() @@ -25,55 +21,46 @@ func TestAgent_Self(t *testing.T) { // Query the endpoint res, err := a.Self() - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) // Check that we got a valid response - if res.Member.Name == "" { - t.Fatalf("bad member name in response: %#v", res) - } + must.NotEq(t, "", res.Member.Name, must.Sprint("missing member name")) // Local cache was populated - if a.nodeName == "" || a.datacenter == "" || a.region == "" { - t.Fatalf("cache should be populated, got: %#v", a) - } + must.NotEq(t, "", a.nodeName, must.Sprint("cache should be populated")) + must.NotEq(t, "", a.datacenter, must.Sprint("cache should be populated")) + must.NotEq(t, "", a.region, must.Sprint("cache should be populated")) } func TestAgent_NodeName(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() a := c.Agent() // Query the agent for the node name - res, err := a.NodeName() - if err != nil { - t.Fatalf("err: %s", err) - } - if res == "" { - t.Fatalf("expected node name, got nothing") - } + nodeName, err := a.NodeName() + must.NoError(t, err) + must.NotEq(t, "", nodeName) } func TestAgent_Datacenter(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() a := c.Agent() // Query the agent for the datacenter dc, err := a.Datacenter() - if err != nil { - t.Fatalf("err: %s", err) - } - if dc != "dc1" { - t.Fatalf("expected dc1, got: %q", dc) - } + must.NoError(t, err) + must.Eq(t, "dc1", dc) } func TestAgent_Join(t *testing.T) { testutil.Parallel(t) + c1, s1 := makeClient(t, nil, nil) defer s1.Stop() a1 := c1.Agent() @@ -85,54 +72,43 @@ func TestAgent_Join(t *testing.T) { // Attempting to join a nonexistent host returns error n, err := a1.Join("nope") - if err == nil { - t.Fatalf("expected error, got nothing") - } - if n != 0 { - t.Fatalf("expected 0 nodes, got: %d", n) - } + must.Error(t, err) + must.Zero(t, 0, must.Sprint("should be zero errors")) // Returns correctly if join succeeds n, err = a1.Join(s2.SerfAddr) - if err != nil { - t.Fatalf("err: %s", err) - } - if n != 1 { - t.Fatalf("expected 1 node, got: %d", n) - } + must.NoError(t, err) + must.One(t, n) } func TestAgent_Members(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() a := c.Agent() // Query nomad for all the known members mem, err := a.Members() - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) // Check that we got the expected result - if n := len(mem.Members); n != 1 { - t.Fatalf("expected 1 member, got: %d", n) - } - if m := mem.Members[0]; m.Name == "" || m.Addr == "" || m.Port == 0 { - t.Fatalf("bad member: %#v", m) - } + must.Len(t, 1, mem.Members) + must.NotEq(t, "", mem.Members[0].Name) + must.NotEq(t, "", mem.Members[0].Addr) + must.NotEq(t, 0, mem.Members[0].Port) } func TestAgent_ForceLeave(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() a := c.Agent() // Force-leave on a nonexistent node does not error - if err := a.ForceLeave("nope"); err != nil { - t.Fatalf("err: %s", err) - } + err := a.ForceLeave("nope") + must.NoError(t, err) // TODO: test force-leave on an existing node } @@ -143,6 +119,7 @@ func (a *AgentMember) String() string { func TestAgents_Sort(t *testing.T) { testutil.Parallel(t) + var sortTests = []struct { in []*AgentMember out []*AgentMember @@ -246,22 +223,20 @@ func TestAgents_Sort(t *testing.T) { } for _, tt := range sortTests { sort.Sort(AgentMembersNameSort(tt.in)) - if !reflect.DeepEqual(tt.in, tt.out) { - t.Errorf("\nexpected: %s\nget : %s", tt.in, tt.out) - } + must.Eq(t, tt.in, tt.out) } } func TestAgent_Health(t *testing.T) { testutil.Parallel(t) - assert := assert.New(t) + c, s := makeClient(t, nil, nil) defer s.Stop() a := c.Agent() health, err := a.Health() - assert.Nil(err) - assert.True(health.Server.Ok) + must.NoError(t, err) + must.True(t, health.Server.Ok) } // TestAgent_MonitorWithNode tests the Monitor endpoint @@ -269,39 +244,14 @@ func TestAgent_Health(t *testing.T) { // functionality for a specific client node func TestAgent_MonitorWithNode(t *testing.T) { testutil.Parallel(t) - rpcPort := 0 + c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { - rpcPort = c.Ports.RPC - c.Client = &testutil.ClientConfig{ - Enabled: true, - } + c.DevMode = true }) defer s.Stop() - require.NoError(t, c.Agent().SetServers([]string{fmt.Sprintf("127.0.0.1:%d", rpcPort)})) - agent := c.Agent() - - index := uint64(0) - var node *NodeListStub - // grab a node - testutil.WaitForResult(func() (bool, error) { - nodes, qm, err := c.Nodes().List(&QueryOptions{WaitIndex: index}) - if err != nil { - return false, err - } - index = qm.LastIndex - if len(nodes) != 1 { - return false, fmt.Errorf("expected 1 node but found: %s", pretty.Sprint(nodes)) - } - if nodes[0].Status != "ready" { - return false, fmt.Errorf("node not ready: %s", nodes[0].Status) - } - node = nodes[0] - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) + node := oneNodeFromNodeList(t, c.Nodes()) doneCh := make(chan struct{}) q := &QueryOptions{ @@ -316,7 +266,7 @@ func TestAgent_MonitorWithNode(t *testing.T) { // make a request to generate some logs _, err := agent.NodeName() - require.NoError(t, err) + must.NoError(t, err) // Wait for a log message OUTER: @@ -329,7 +279,7 @@ OUTER: case err := <-errCh: t.Errorf("Error: %v", err) case <-time.After(2 * time.Second): - require.Fail(t, "failed to get a DEBUG log message") + t.Fatal("failed to get a DEBUG log message") } } } @@ -339,6 +289,7 @@ OUTER: // monitor functionality func TestAgent_Monitor(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() @@ -356,7 +307,7 @@ func TestAgent_Monitor(t *testing.T) { // make a request to generate some logs _, err := agent.Region() - require.NoError(t, err) + must.NoError(t, err) // Wait for a log message OUTER: @@ -372,7 +323,7 @@ OUTER: case err := <-errCh: t.Fatalf("error: %v", err) case <-time.After(2 * time.Second): - require.Fail(t, "failed to get a DEBUG log message") + must.Unreachable(t, must.Sprint("failed to get DEBUG log message")) } } } @@ -395,8 +346,8 @@ func TestAgentCPUProfile(t *testing.T) { Seconds: 1, } resp, err := agent.CPUProfile(opts, q) - require.NoError(t, err) - require.NotNil(t, resp) + must.NoError(t, err) + must.NotNil(t, resp) } // Invalid server request @@ -406,9 +357,9 @@ func TestAgentCPUProfile(t *testing.T) { ServerID: "unknown.global", } resp, err := agent.CPUProfile(opts, q) - require.Error(t, err) - require.Contains(t, err.Error(), "500 (unknown Nomad server unknown.global)") - require.Nil(t, resp) + must.Error(t, err) + must.ErrorContains(t, err, "500 (unknown Nomad server unknown.global)") + must.Nil(t, resp) } } @@ -426,8 +377,8 @@ func TestAgentTrace(t *testing.T) { } resp, err := agent.Trace(PprofOptions{}, q) - require.NoError(t, err) - require.NotNil(t, resp) + must.NoError(t, err) + must.NotNil(t, resp) } func TestAgentProfile(t *testing.T) { @@ -444,16 +395,16 @@ func TestAgentProfile(t *testing.T) { { resp, err := agent.Lookup("heap", PprofOptions{}, q) - require.NoError(t, err) - require.NotNil(t, resp) + must.NoError(t, err) + must.NotNil(t, resp) } // unknown profile { resp, err := agent.Lookup("invalid", PprofOptions{}, q) - require.Error(t, err) - require.Contains(t, err.Error(), "Unexpected response code: 404") - require.Nil(t, resp) + must.Error(t, err) + must.ErrorContains(t, err, "Unexpected response code: 404") + must.Nil(t, resp) } } @@ -465,12 +416,12 @@ func TestAgent_SchedulerWorkerConfig(t *testing.T) { a := c.Agent() config, err := a.GetSchedulerWorkerConfig(nil) - require.NoError(t, err) - require.NotNil(t, config) + must.NoError(t, err) + must.NotNil(t, config) newConfig := SchedulerWorkerPoolArgs{NumSchedulers: 0, EnabledSchedulers: []string{"_core", "system"}} resp, err := a.SetSchedulerWorkerConfig(newConfig, nil) - require.NoError(t, err) - assert.NotEqual(t, config, resp) + must.NoError(t, err) + must.NotEq(t, config, resp) } func TestAgent_SchedulerWorkerConfig_BadRequest(t *testing.T) { @@ -481,25 +432,26 @@ func TestAgent_SchedulerWorkerConfig_BadRequest(t *testing.T) { a := c.Agent() config, err := a.GetSchedulerWorkerConfig(nil) - require.NoError(t, err) - require.NotNil(t, config) + must.NoError(t, err) + must.NotNil(t, config) newConfig := SchedulerWorkerPoolArgs{NumSchedulers: -1, EnabledSchedulers: []string{"_core", "system"}} _, err = a.SetSchedulerWorkerConfig(newConfig, nil) - require.Error(t, err) - require.Contains(t, err.Error(), fmt.Sprintf("%v (%s)", http.StatusBadRequest, "Invalid request")) + must.Error(t, err) + must.ErrorContains(t, err, "400 (Invalid request)") } func TestAgent_SchedulerWorkersInfo(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() a := c.Agent() info, err := a.GetSchedulerWorkersInfo(nil) - require.NoError(t, err) - require.NotNil(t, info) + must.NoError(t, err) + must.NotNil(t, info) defaultSchedulers := []string{"batch", "system", "sysbatch", "service", "_core"} for _, worker := range info.Schedulers { - require.ElementsMatch(t, defaultSchedulers, worker.EnabledSchedulers) + must.SliceContainsAll(t, defaultSchedulers, worker.EnabledSchedulers) } } diff --git a/api/allocations.go b/api/allocations.go index 67fc6ca2989..e6351744fc0 100644 --- a/api/allocations.go +++ b/api/allocations.go @@ -2,16 +2,17 @@ package api import ( "context" - "fmt" + "errors" "io" "sort" + "strings" "time" ) var ( // NodeDownErr marks an operation as not able to complete since the node is // down. - NodeDownErr = fmt.Errorf("node down") + NodeDownErr = errors.New("node down") ) const ( @@ -28,6 +29,10 @@ const ( AllocClientStatusLost = "lost" ) +const ( + AllocRestartReasonWithinPolicy = "Restart within policy" +) + // Allocations is used to query the alloc-related endpoints. type Allocations struct { client *Client @@ -101,8 +106,7 @@ func (a *Allocations) Exec(ctx context.Context, func (a *Allocations) Stats(alloc *Allocation, q *QueryOptions) (*AllocResourceUsage, error) { var resp AllocResourceUsage - path := fmt.Sprintf("/v1/client/allocation/%s/stats", alloc.ID) - _, err := a.client.query(path, &resp, q) + _, err := a.client.query("/v1/client/allocation/"+alloc.ID+"/stats", &resp, q) return &resp, err } @@ -112,6 +116,9 @@ func (a *Allocations) GC(alloc *Allocation, q *QueryOptions) error { return err } +// Restart restarts the tasks that are currently running or a specific task if +// taskName is provided. An error is returned if the task to be restarted is +// not running. func (a *Allocations) Restart(alloc *Allocation, taskName string, q *QueryOptions) error { req := AllocationRestartRequest{ TaskName: taskName, @@ -122,6 +129,18 @@ func (a *Allocations) Restart(alloc *Allocation, taskName string, q *QueryOption return err } +// RestartAllTasks restarts all tasks in the allocation, regardless of +// lifecycle type or state. Tasks will restart following their lifecycle order. +func (a *Allocations) RestartAllTasks(alloc *Allocation, q *QueryOptions) error { + req := AllocationRestartRequest{ + AllTasks: true, + } + + var resp struct{} + _, err := a.client.putQuery("/v1/client/allocation/"+alloc.ID+"/restart", &req, &resp, q) + return err +} + func (a *Allocations) Stop(alloc *Allocation, q *QueryOptions) (*AllocStopResponse, error) { var resp AllocStopResponse _, err := a.client.putQuery("/v1/allocation/"+alloc.ID+"/stop", nil, &resp, q) @@ -403,6 +422,7 @@ func (a Allocation) RescheduleInfo(t time.Time) (int, int) { type AllocationRestartRequest struct { TaskName string + AllTasks bool } type AllocSignalRequest struct { @@ -489,3 +509,12 @@ type ExecStreamingOutput struct { Exited bool `json:"exited,omitempty"` Result *ExecStreamingExitResult `json:"result,omitempty"` } + +func AllocSuffix(name string) string { + idx := strings.LastIndex(name, "[") + if idx == -1 { + return "" + } + suffix := name[idx:] + return suffix +} diff --git a/api/allocations_test.go b/api/allocations_test.go index 4f2993e82f2..c78ac0a3f99 100644 --- a/api/allocations_test.go +++ b/api/allocations_test.go @@ -4,41 +4,39 @@ import ( "context" "fmt" "os" - "reflect" "sort" "testing" "time" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" ) func TestAllocations_List(t *testing.T) { + testutil.RequireRoot(t) testutil.Parallel(t) + c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { c.DevMode = true }) defer s.Stop() a := c.Allocations() + // wait for node + _ = oneNodeFromNodeList(t, c.Nodes()) + // Querying when no allocs exist returns nothing allocs, qm, err := a.List(nil) - if err != nil { - t.Fatalf("err: %s", err) - } - if qm.LastIndex != 0 { - t.Fatalf("bad index: %d", qm.LastIndex) - } - if n := len(allocs); n != 0 { - t.Fatalf("expected 0 allocs, got: %d", n) - } + must.NoError(t, err) + must.Zero(t, qm.LastIndex) + must.Len(t, 0, allocs) // Create a job and attempt to register it job := testJob() resp, wm, err := c.Jobs().Register(job, nil) - require.NoError(t, err) - require.NotNil(t, resp) - require.NotEmpty(t, resp.EvalID) + must.NoError(t, err) + must.NotNil(t, resp) + must.UUIDv4(t, resp.EvalID) assertWriteMeta(t, wm) // List the allocations again @@ -46,34 +44,29 @@ func TestAllocations_List(t *testing.T) { WaitIndex: wm.LastIndex, } allocs, qm, err = a.List(qo) - require.NoError(t, err) - require.NotZero(t, qm.LastIndex) + must.NoError(t, err) + must.NonZero(t, qm.LastIndex) // Check that we got the allocation back - require.Len(t, allocs, 1) - require.Equal(t, resp.EvalID, allocs[0].EvalID) + must.Len(t, 1, allocs) + must.Eq(t, resp.EvalID, allocs[0].EvalID) // Resources should be unset by default - require.Nil(t, allocs[0].AllocatedResources) + must.Nil(t, allocs[0].AllocatedResources) } func TestAllocations_PrefixList(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() a := c.Allocations() // Querying when no allocs exist returns nothing allocs, qm, err := a.PrefixList("") - if err != nil { - t.Fatalf("err: %s", err) - } - if qm.LastIndex != 0 { - t.Fatalf("bad index: %d", qm.LastIndex) - } - if n := len(allocs); n != 0 { - t.Fatalf("expected 0 allocs, got: %d", n) - } + must.NoError(t, err) + must.Zero(t, qm.LastIndex) + must.Len(t, 0, allocs) // TODO: do something that causes an allocation to actually happen // so we can query for them. @@ -106,38 +99,49 @@ func TestAllocations_PrefixList(t *testing.T) { } func TestAllocations_List_Resources(t *testing.T) { + testutil.RequireRoot(t) testutil.Parallel(t) + c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { c.DevMode = true }) defer s.Stop() a := c.Allocations() + // wait for node + _ = oneNodeFromNodeList(t, c.Nodes()) + // Create a job and register it job := testJob() resp, wm, err := c.Jobs().Register(job, nil) - require.NoError(t, err) - require.NotNil(t, resp) - require.NotEmpty(t, resp.EvalID) + must.NoError(t, err) + must.NotNil(t, resp) + must.UUIDv4(t, resp.EvalID) assertWriteMeta(t, wm) - // List the allocations qo := &QueryOptions{ Params: map[string]string{"resources": "true"}, WaitIndex: wm.LastIndex, } - allocs, qm, err := a.List(qo) - require.NoError(t, err) - require.NotZero(t, qm.LastIndex) + var allocationStubs []*AllocationListStub + var qm *QueryMeta + allocationStubs, qm, err = a.List(qo) + must.NoError(t, err) // Check that we got the allocation back with resources - require.Len(t, allocs, 1) - require.Equal(t, resp.EvalID, allocs[0].EvalID) - require.NotNil(t, allocs[0].AllocatedResources) + must.Positive(t, qm.LastIndex) + must.Len(t, 1, allocationStubs) + alloc := allocationStubs[0] + must.Eq(t, resp.EvalID, alloc.EvalID, + must.Sprintf("registration: %#v", resp), + must.Sprintf("allocation: %#v", alloc), + ) + must.NotNil(t, alloc.AllocatedResources) } func TestAllocations_CreateIndexSort(t *testing.T) { testutil.Parallel(t) + allocs := []*AllocationListStub{ {CreateIndex: 2}, {CreateIndex: 1}, @@ -150,22 +154,21 @@ func TestAllocations_CreateIndexSort(t *testing.T) { {CreateIndex: 2}, {CreateIndex: 1}, } - if !reflect.DeepEqual(allocs, expect) { - t.Fatalf("\n\n%#v\n\n%#v", allocs, expect) - } + must.Eq(t, allocs, expect) } func TestAllocations_RescheduleInfo(t *testing.T) { testutil.Parallel(t) + // Create a job, task group and alloc job := &Job{ - Name: stringToPtr("foo"), - Namespace: stringToPtr(DefaultNamespace), - ID: stringToPtr("bar"), - ParentID: stringToPtr("lol"), + Name: pointerOf("foo"), + Namespace: pointerOf(DefaultNamespace), + ID: pointerOf("bar"), + ParentID: pointerOf("lol"), TaskGroups: []*TaskGroup{ { - Name: stringToPtr("bar"), + Name: pointerOf("bar"), Tasks: []*Task{ { Name: "task1", @@ -205,8 +208,8 @@ func TestAllocations_RescheduleInfo(t *testing.T) { { desc: "no reschedule events", reschedulePolicy: &ReschedulePolicy{ - Attempts: intToPtr(3), - Interval: timeToPtr(15 * time.Minute), + Attempts: pointerOf(3), + Interval: pointerOf(15 * time.Minute), }, expAttempted: 0, expTotal: 3, @@ -214,8 +217,8 @@ func TestAllocations_RescheduleInfo(t *testing.T) { { desc: "all reschedule events within interval", reschedulePolicy: &ReschedulePolicy{ - Attempts: intToPtr(3), - Interval: timeToPtr(15 * time.Minute), + Attempts: pointerOf(3), + Interval: pointerOf(15 * time.Minute), }, time: time.Now(), rescheduleTracker: &RescheduleTracker{ @@ -231,8 +234,8 @@ func TestAllocations_RescheduleInfo(t *testing.T) { { desc: "some reschedule events outside interval", reschedulePolicy: &ReschedulePolicy{ - Attempts: intToPtr(3), - Interval: timeToPtr(15 * time.Minute), + Attempts: pointerOf(3), + Interval: pointerOf(15 * time.Minute), }, time: time.Now(), rescheduleTracker: &RescheduleTracker{ @@ -258,12 +261,11 @@ func TestAllocations_RescheduleInfo(t *testing.T) { for _, tc := range testCases { t.Run(tc.desc, func(t *testing.T) { - require := require.New(t) alloc.RescheduleTracker = tc.rescheduleTracker job.TaskGroups[0].ReschedulePolicy = tc.reschedulePolicy attempted, total := alloc.RescheduleInfo(tc.time) - require.Equal(tc.expAttempted, attempted) - require.Equal(tc.expTotal, total) + must.Eq(t, tc.expAttempted, attempted) + must.Eq(t, tc.expTotal, total) }) } @@ -271,18 +273,20 @@ func TestAllocations_RescheduleInfo(t *testing.T) { // TestAllocations_ExecErrors ensures errors are properly formatted func TestAllocations_ExecErrors(t *testing.T) { + testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() a := c.Allocations() job := &Job{ - Name: stringToPtr("foo"), - Namespace: stringToPtr(DefaultNamespace), - ID: stringToPtr("bar"), - ParentID: stringToPtr("lol"), + Name: pointerOf("foo"), + Namespace: pointerOf(DefaultNamespace), + ID: pointerOf("bar"), + ParentID: pointerOf("lol"), TaskGroups: []*TaskGroup{ { - Name: stringToPtr("bar"), + Name: pointerOf("bar"), Tasks: []*Task{ { Name: "task1", @@ -312,8 +316,8 @@ func TestAllocations_ExecErrors(t *testing.T) { // ensure the error is what we expect exitCode, err := a.Exec(context.Background(), alloc, "bar", false, []string{"command"}, os.Stdin, os.Stdout, os.Stderr, sizeCh, nil) - require.Equal(t, exitCode, -2) - require.Equal(t, err.Error(), fmt.Sprintf("Unknown allocation \"%s\"", allocID)) + must.Eq(t, -2, exitCode) + must.EqError(t, err, fmt.Sprintf("Unknown allocation \"%s\"", allocID)) } func TestAllocation_ServerTerminalStatus(t *testing.T) { @@ -343,7 +347,7 @@ func TestAllocation_ServerTerminalStatus(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - require.Equal(t, tc.expectedOutput, tc.inputAllocation.ServerTerminalStatus(), tc.name) + must.Eq(t, tc.expectedOutput, tc.inputAllocation.ServerTerminalStatus()) }) } } @@ -385,18 +389,20 @@ func TestAllocation_ClientTerminalStatus(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - require.Equal(t, tc.expectedOutput, tc.inputAllocation.ClientTerminalStatus(), tc.name) + must.Eq(t, tc.expectedOutput, tc.inputAllocation.ClientTerminalStatus()) }) } } func TestAllocations_ShouldMigrate(t *testing.T) { testutil.Parallel(t) - require.True(t, DesiredTransition{Migrate: boolToPtr(true)}.ShouldMigrate()) - require.False(t, DesiredTransition{}.ShouldMigrate()) - require.False(t, DesiredTransition{Migrate: boolToPtr(false)}.ShouldMigrate()) + + must.True(t, DesiredTransition{Migrate: pointerOf(true)}.ShouldMigrate()) + must.False(t, DesiredTransition{}.ShouldMigrate()) + must.False(t, DesiredTransition{Migrate: pointerOf(false)}.ShouldMigrate()) } func TestAllocations_Services(t *testing.T) { + t.Skip("needs to be implemented") // TODO(jrasell) add tests once registration process is in place. } diff --git a/api/api.go b/api/api.go index e32707ebfdb..8d35fb01604 100644 --- a/api/api.go +++ b/api/api.go @@ -18,8 +18,8 @@ import ( "time" "github.com/gorilla/websocket" - cleanhttp "github.com/hashicorp/go-cleanhttp" - rootcerts "github.com/hashicorp/go-rootcerts" + "github.com/hashicorp/go-cleanhttp" + "github.com/hashicorp/go-rootcerts" ) var ( @@ -33,6 +33,11 @@ const ( // AllNamespacesNamespace is a sentinel Namespace value to indicate that api should search for // jobs and allocations in all the namespaces the requester can access. AllNamespacesNamespace = "*" + + // PermissionDeniedErrorContent is the string content of an error returned + // by the API which indicates the caller does not have permission to + // perform the action. + PermissionDeniedErrorContent = "Permission denied" ) // QueryOptions are used to parametrize a query @@ -340,9 +345,9 @@ func DefaultConfig() *Config { // otherwise, returns the same client func cloneWithTimeout(httpClient *http.Client, t time.Duration) (*http.Client, error) { if httpClient == nil { - return nil, fmt.Errorf("nil HTTP client") + return nil, errors.New("nil HTTP client") } else if httpClient.Transport == nil { - return nil, fmt.Errorf("nil HTTP client transport") + return nil, errors.New("nil HTTP client transport") } if t.Nanoseconds() < 0 { @@ -393,7 +398,7 @@ func ConfigureTLS(httpClient *http.Client, tlsConfig *TLSConfig) error { return nil } if httpClient == nil { - return fmt.Errorf("config HTTP Client must be set") + return errors.New("config HTTP Client must be set") } var clientCert tls.Certificate @@ -407,7 +412,7 @@ func ConfigureTLS(httpClient *http.Client, tlsConfig *TLSConfig) error { } foundClientCert = true } else { - return fmt.Errorf("Both client cert and client key must be provided") + return errors.New("Both client cert and client key must be provided") } } else if len(tlsConfig.ClientCertPEM) != 0 || len(tlsConfig.ClientKeyPEM) != 0 { if len(tlsConfig.ClientCertPEM) != 0 && len(tlsConfig.ClientKeyPEM) != 0 { @@ -418,7 +423,7 @@ func ConfigureTLS(httpClient *http.Client, tlsConfig *TLSConfig) error { } foundClientCert = true } else { - return fmt.Errorf("Both client cert and client key must be provided") + return errors.New("Both client cert and client key must be provided") } } @@ -844,7 +849,7 @@ func (c *Client) websocket(endpoint string, q *QueryOptions) (*websocket.Conn, * transport, ok := c.httpClient.Transport.(*http.Transport) if !ok { - return nil, nil, fmt.Errorf("unsupported transport") + return nil, nil, errors.New("unsupported transport") } dialer := websocket.Dialer{ ReadBufferSize: 4096, @@ -982,14 +987,15 @@ func (c *Client) write(endpoint string, in, out interface{}, q *WriteOptions) (* return wm, nil } -// delete is used to do a DELETE request against an endpoint -// and serialize/deserialized using the standard Nomad conventions. -func (c *Client) delete(endpoint string, out interface{}, q *WriteOptions) (*WriteMeta, error) { +// delete is used to do a DELETE request against an endpoint and +// serialize/deserialized using the standard Nomad conventions. +func (c *Client) delete(endpoint string, in, out interface{}, q *WriteOptions) (*WriteMeta, error) { r, err := c.newRequest("DELETE", endpoint) if err != nil { return nil, err } r.setWriteOptions(q) + r.obj = in rtt, resp, err := requireOK(c.doRequest(r)) if err != nil { return nil, err @@ -1090,9 +1096,10 @@ func requireOK(d time.Duration, resp *http.Response, e error) (time.Duration, *h } if resp.StatusCode != 200 { var buf bytes.Buffer - io.Copy(&buf, resp.Body) - resp.Body.Close() - return d, nil, fmt.Errorf("Unexpected response code: %d (%s)", resp.StatusCode, buf.Bytes()) + _, _ = io.Copy(&buf, resp.Body) + _ = resp.Body.Close() + body := strings.TrimSpace(buf.String()) + return d, nil, fmt.Errorf("Unexpected response code: %d (%s)", resp.StatusCode, body) } return d, resp, nil } diff --git a/api/api_test.go b/api/api_test.go index 7b5d35346e5..bde4315a83d 100644 --- a/api/api_test.go +++ b/api/api_test.go @@ -11,25 +11,16 @@ import ( "net/http" "net/http/httptest" "net/url" - "os" "strings" "testing" "time" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" ) type configCallback func(c *Config) -// seen is used to track which tests we have already marked as parallel -var seen map[*testing.T]struct{} - -func init() { - seen = make(map[*testing.T]struct{}) -} - func makeACLClient(t *testing.T, cb1 configCallback, cb2 testutil.ServerConfigCallback) (*Client, *testutil.TestServer, *ACLToken) { client, server := makeClient(t, cb1, func(c *testutil.TestServerConfig) { @@ -71,6 +62,7 @@ func makeClient(t *testing.T, cb1 configCallback, func TestRequestTime(t *testing.T) { testutil.Parallel(t) + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { time.Sleep(100 * time.Millisecond) d, err := json.Marshal(struct{ Done bool }{true}) @@ -78,7 +70,7 @@ func TestRequestTime(t *testing.T) { http.Error(w, err.Error(), http.StatusInternalServerError) return } - w.Write(d) + _, _ = w.Write(d) })) defer srv.Close() @@ -108,7 +100,7 @@ func TestRequestTime(t *testing.T) { t.Errorf("bad request time: %d", wm.RequestTime) } - wm, err = client.delete("/", &out, nil) + wm, err = client.delete("/", nil, &out, nil) if err != nil { t.Fatalf("delete err: %v", err) } @@ -118,32 +110,24 @@ func TestRequestTime(t *testing.T) { } func TestDefaultConfig_env(t *testing.T) { - testutil.Parallel(t) - url := "http://1.2.3.4:5678" + // no parallel + + testURL := "http://1.2.3.4:5678" auth := []string{"nomaduser", "12345"} region := "test" namespace := "dev" token := "foobar" - os.Setenv("NOMAD_ADDR", url) - defer os.Setenv("NOMAD_ADDR", "") - - os.Setenv("NOMAD_REGION", region) - defer os.Setenv("NOMAD_REGION", "") - - os.Setenv("NOMAD_NAMESPACE", namespace) - defer os.Setenv("NOMAD_NAMESPACE", "") - - os.Setenv("NOMAD_HTTP_AUTH", strings.Join(auth, ":")) - defer os.Setenv("NOMAD_HTTP_AUTH", "") - - os.Setenv("NOMAD_TOKEN", token) - defer os.Setenv("NOMAD_TOKEN", "") + t.Setenv("NOMAD_ADDR", testURL) + t.Setenv("NOMAD_REGION", region) + t.Setenv("NOMAD_NAMESPACE", namespace) + t.Setenv("NOMAD_HTTP_AUTH", strings.Join(auth, ":")) + t.Setenv("NOMAD_TOKEN", token) config := DefaultConfig() - if config.Address != url { - t.Errorf("expected %q to be %q", config.Address, url) + if config.Address != testURL { + t.Errorf("expected %q to be %q", config.Address, testURL) } if config.Region != region { @@ -186,11 +170,11 @@ func TestSetQueryOptions(t *testing.T) { try := func(key, exp string) { result := r.params.Get(key) - require.Equal(t, exp, result) + must.Eq(t, exp, result) } // Check auth token is set - require.Equal(t, "foobar", r.token) + must.Eq(t, "foobar", r.token) // Check query parameters are set try("region", "foo") @@ -385,12 +369,12 @@ func TestQueryString(t *testing.T) { } func TestClient_NodeClient(t *testing.T) { - http := "testdomain:4646" + addr := "testdomain:4646" tlsNode := func(string, *QueryOptions) (*Node, *QueryMeta, error) { return &Node{ ID: generateUUID(), Status: "ready", - HTTPAddr: http, + HTTPAddr: addr, TLSEnabled: true, }, nil, nil } @@ -398,7 +382,7 @@ func TestClient_NodeClient(t *testing.T) { return &Node{ ID: generateUUID(), Status: "ready", - HTTPAddr: http, + HTTPAddr: addr, TLSEnabled: false, }, nil, nil } @@ -409,15 +393,15 @@ func TestClient_NodeClient(t *testing.T) { } clientNoRegion, err := NewClient(DefaultConfig()) - assert.Nil(t, err) + must.NoError(t, err) regionConfig := DefaultConfig() regionConfig.Region = "bar" clientRegion, err := NewClient(regionConfig) - assert.Nil(t, err) + must.NoError(t, err) - expectedTLSAddr := fmt.Sprintf("https://%s", http) - expectedNoTLSAddr := fmt.Sprintf("http://%s", http) + expectedTLSAddr := fmt.Sprintf("https://%s", addr) + expectedNoTLSAddr := fmt.Sprintf("http://%s", addr) cases := []struct { Node nodeLookup @@ -496,13 +480,12 @@ func TestClient_NodeClient(t *testing.T) { for _, c := range cases { name := fmt.Sprintf("%s__%s__%s", c.ExpectedAddr, c.ExpectedRegion, c.ExpectedTLSServerName) t.Run(name, func(t *testing.T) { - assert := assert.New(t) - nodeClient, err := c.Client.getNodeClientImpl("testID", -1, c.QueryOptions, c.Node) - assert.Nil(err) - assert.Equal(c.ExpectedRegion, nodeClient.config.Region) - assert.Equal(c.ExpectedAddr, nodeClient.config.Address) - assert.NotNil(nodeClient.config.TLSConfig) - assert.Equal(c.ExpectedTLSServerName, nodeClient.config.TLSConfig.TLSServerName) + nodeClient, getErr := c.Client.getNodeClientImpl("testID", -1, c.QueryOptions, c.Node) + must.NoError(t, getErr) + must.Eq(t, c.ExpectedRegion, nodeClient.config.Region) + must.Eq(t, c.ExpectedAddr, nodeClient.config.Address) + must.NotNil(t, nodeClient.config.TLSConfig) + must.Eq(t, c.ExpectedTLSServerName, nodeClient.config.TLSConfig.TLSServerName) }) } } @@ -511,48 +494,44 @@ func TestCloneHttpClient(t *testing.T) { client := defaultHttpClient() originalTransport := client.Transport.(*http.Transport) originalTransport.Proxy = func(*http.Request) (*url.URL, error) { - return nil, fmt.Errorf("stub function") + return nil, errors.New("stub function") } t.Run("closing with negative timeout", func(t *testing.T) { clone, err := cloneWithTimeout(client, -1) - require.True(t, originalTransport == client.Transport, "original transport changed") - require.NoError(t, err) - require.Equal(t, client, clone) - require.True(t, client == clone) + must.True(t, originalTransport == client.Transport, must.Sprint("original transport changed")) + must.NoError(t, err) + must.True(t, client == clone) }) t.Run("closing with positive timeout", func(t *testing.T) { clone, err := cloneWithTimeout(client, 1*time.Second) - require.True(t, originalTransport == client.Transport, "original transport changed") - require.NoError(t, err) - require.NotEqual(t, client, clone) - require.True(t, client != clone) - require.True(t, client.Transport != clone.Transport) + must.True(t, originalTransport == client.Transport, must.Sprint("original transport changed")) + must.NoError(t, err) + must.True(t, client != clone) + must.True(t, client.Transport != clone.Transport) // test that proxy function is the same in clone clonedProxy := clone.Transport.(*http.Transport).Proxy - require.NotNil(t, clonedProxy) + must.NotNil(t, clonedProxy) _, err = clonedProxy(nil) - require.Error(t, err) - require.Equal(t, "stub function", err.Error()) + must.Error(t, err) + must.EqError(t, err, "stub function") // if we reset transport, the strutcs are equal clone.Transport = originalTransport - require.Equal(t, client, clone) + must.Eq(t, client, clone) }) } func TestClient_HeaderRaceCondition(t *testing.T) { - require := require.New(t) - conf := DefaultConfig() conf.Headers = map[string][]string{ "test-header": {"a"}, } client, err := NewClient(conf) - require.NoError(err) + must.NoError(t, err) c := make(chan int) @@ -564,9 +543,9 @@ func TestClient_HeaderRaceCondition(t *testing.T) { req, _ := client.newRequest("GET", "/any/path/will/do") r, _ := req.toHTTP() - require.Len(r.Header, 2, "local request should have two headers") - require.Equal(2, <-c, "goroutine request should have two headers") - require.Len(conf.Headers, 1, "config headers should not mutate") + must.MapLen(t, 2, r.Header, must.Sprint("local request should have two headers")) + must.Eq(t, 2, <-c, must.Sprint("goroutine request should have two headers")) + must.MapLen(t, 1, conf.Headers, must.Sprint("config headers should not mutate")) } func TestClient_autoUnzip(t *testing.T) { @@ -574,7 +553,7 @@ func TestClient_autoUnzip(t *testing.T) { try := func(resp *http.Response, exp error) { err := client.autoUnzip(resp) - require.Equal(t, exp, err) + must.Eq(t, exp, err) } // response object is nil @@ -604,9 +583,9 @@ func TestClient_autoUnzip(t *testing.T) { var b bytes.Buffer w := gzip.NewWriter(&b) _, err := w.Write([]byte("hello world")) - require.NoError(t, err) + must.NoError(t, err) err = w.Close() - require.NoError(t, err) + must.NoError(t, err) // content-encoding is gzip and body is gzip data try(&http.Response{ diff --git a/api/compose_test.go b/api/compose_test.go index c70b244cdc5..b92cabf4e69 100644 --- a/api/compose_test.go +++ b/api/compose_test.go @@ -1,10 +1,10 @@ package api import ( - "reflect" "testing" "github.com/hashicorp/nomad/api/internal/testutil" + "github.com/shoenig/test/must" ) func TestCompose(t *testing.T) { @@ -15,13 +15,13 @@ func TestCompose(t *testing.T) { SetMeta("foo", "bar"). Constrain(NewConstraint("kernel.name", "=", "linux")). Require(&Resources{ - CPU: intToPtr(1250), - MemoryMB: intToPtr(1024), - DiskMB: intToPtr(2048), + CPU: pointerOf(1250), + MemoryMB: pointerOf(1024), + DiskMB: pointerOf(2048), Networks: []*NetworkResource{ { CIDR: "0.0.0.0/0", - MBits: intToPtr(100), + MBits: pointerOf(100), ReservedPorts: []Port{{"", 80, 0, ""}, {"", 443, 0, ""}}, }, }, @@ -47,11 +47,11 @@ func TestCompose(t *testing.T) { // Check that the composed result looks correct expect := &Job{ - Region: stringToPtr("global"), - ID: stringToPtr("job1"), - Name: stringToPtr("myjob"), - Type: stringToPtr(JobTypeService), - Priority: intToPtr(2), + Region: pointerOf("global"), + ID: pointerOf("job1"), + Name: pointerOf("myjob"), + Type: pointerOf(JobTypeService), + Priority: pointerOf(2), Datacenters: []string{ "dc1", }, @@ -67,8 +67,8 @@ func TestCompose(t *testing.T) { }, TaskGroups: []*TaskGroup{ { - Name: stringToPtr("grp1"), - Count: intToPtr(2), + Name: pointerOf("grp1"), + Count: pointerOf(2), Constraints: []*Constraint{ { LTarget: "kernel.name", @@ -81,13 +81,13 @@ func TestCompose(t *testing.T) { LTarget: "${node.class}", RTarget: "large", Operand: "=", - Weight: int8ToPtr(50), + Weight: pointerOf(int8(50)), }, }, Spreads: []*Spread{ { Attribute: "${node.datacenter}", - Weight: int8ToPtr(30), + Weight: pointerOf(int8(30)), SpreadTarget: []*SpreadTarget{ { Value: "dc1", @@ -105,13 +105,13 @@ func TestCompose(t *testing.T) { Name: "task1", Driver: "exec", Resources: &Resources{ - CPU: intToPtr(1250), - MemoryMB: intToPtr(1024), - DiskMB: intToPtr(2048), + CPU: pointerOf(1250), + MemoryMB: pointerOf(1024), + DiskMB: pointerOf(2048), Networks: []*NetworkResource{ { CIDR: "0.0.0.0/0", - MBits: intToPtr(100), + MBits: pointerOf(100), ReservedPorts: []Port{ {"", 80, 0, ""}, {"", 443, 0, ""}, @@ -140,7 +140,5 @@ func TestCompose(t *testing.T) { }, }, } - if !reflect.DeepEqual(job, expect) { - t.Fatalf("expect: %#v, got: %#v", expect, job) - } + must.Eq(t, expect, job) } diff --git a/api/constraint_test.go b/api/constraint_test.go index 498da0f9fcc..48887b91000 100644 --- a/api/constraint_test.go +++ b/api/constraint_test.go @@ -1,21 +1,20 @@ package api import ( - "reflect" "testing" "github.com/hashicorp/nomad/api/internal/testutil" + "github.com/shoenig/test/must" ) func TestCompose_Constraints(t *testing.T) { testutil.Parallel(t) + c := NewConstraint("kernel.name", "=", "darwin") expect := &Constraint{ LTarget: "kernel.name", RTarget: "darwin", Operand: "=", } - if !reflect.DeepEqual(c, expect) { - t.Fatalf("expect: %#v, got: %#v", expect, c) - } + must.Eq(t, expect, c) } diff --git a/api/consul.go b/api/consul.go index 9a11187c08c..2c1c9d10aa8 100644 --- a/api/consul.go +++ b/api/consul.go @@ -1,6 +1,8 @@ package api -import "time" +import ( + "time" +) // Consul represents configuration related to consul. type Consul struct { @@ -36,7 +38,7 @@ func (c *Consul) MergeNamespace(namespace *string) { } } -// ConsulConnect represents a Consul Connect jobspec stanza. +// ConsulConnect represents a Consul Connect jobspec block. type ConsulConnect struct { Native bool `hcl:"native,optional"` Gateway *ConsulGateway `hcl:"gateway,block"` @@ -55,7 +57,7 @@ func (cc *ConsulConnect) Canonicalize() { } // ConsulSidecarService represents a Consul Connect SidecarService jobspec -// stanza. +// block. type ConsulSidecarService struct { Tags []string `hcl:"tags,optional"` Port string `hcl:"port,optional"` @@ -75,7 +77,6 @@ func (css *ConsulSidecarService) Canonicalize() { css.Proxy.Canonicalize() } - // SidecarTask represents a subset of Task fields that can be set to override // the fields of the Task generated for the sidecar type SidecarTask struct { @@ -122,19 +123,20 @@ func (st *SidecarTask) Canonicalize() { } if st.KillTimeout == nil { - st.KillTimeout = timeToPtr(5 * time.Second) + st.KillTimeout = pointerOf(5 * time.Second) } if st.ShutdownDelay == nil { - st.ShutdownDelay = timeToPtr(0) + st.ShutdownDelay = pointerOf(time.Duration(0)) } } -// ConsulProxy represents a Consul Connect sidecar proxy jobspec stanza. +// ConsulProxy represents a Consul Connect sidecar proxy jobspec block. type ConsulProxy struct { LocalServiceAddress string `mapstructure:"local_service_address" hcl:"local_service_address,optional"` LocalServicePort int `mapstructure:"local_service_port" hcl:"local_service_port,optional"` - ExposeConfig *ConsulExposeConfig `mapstructure:"expose" hcl:"expose,block"` + Expose *ConsulExposeConfig `mapstructure:"expose" hcl:"expose,block"` + ExposeConfig *ConsulExposeConfig // Deprecated: only to maintain backwards compatibility. Use Expose instead. Upstreams []*ConsulUpstream `hcl:"upstreams,block"` Config map[string]interface{} `hcl:"config,block"` } @@ -144,7 +146,7 @@ func (cp *ConsulProxy) Canonicalize() { return } - cp.ExposeConfig.Canonicalize() + cp.Expose.Canonicalize() if len(cp.Upstreams) == 0 { cp.Upstreams = nil @@ -194,13 +196,14 @@ func (c *ConsulMeshGateway) Copy() *ConsulMeshGateway { } } -// ConsulUpstream represents a Consul Connect upstream jobspec stanza. +// ConsulUpstream represents a Consul Connect upstream jobspec block. type ConsulUpstream struct { - DestinationName string `mapstructure:"destination_name" hcl:"destination_name,optional"` - LocalBindPort int `mapstructure:"local_bind_port" hcl:"local_bind_port,optional"` - Datacenter string `mapstructure:"datacenter" hcl:"datacenter,optional"` - LocalBindAddress string `mapstructure:"local_bind_address" hcl:"local_bind_address,optional"` - MeshGateway *ConsulMeshGateway `mapstructure:"mesh_gateway" hcl:"mesh_gateway,block"` + DestinationName string `mapstructure:"destination_name" hcl:"destination_name,optional"` + DestinationNamespace string `mapstructure:"destination_namespace" hcl:"destination_namespace,optional"` + LocalBindPort int `mapstructure:"local_bind_port" hcl:"local_bind_port,optional"` + Datacenter string `mapstructure:"datacenter" hcl:"datacenter,optional"` + LocalBindAddress string `mapstructure:"local_bind_address" hcl:"local_bind_address,optional"` + MeshGateway *ConsulMeshGateway `mapstructure:"mesh_gateway" hcl:"mesh_gateway,block"` } func (cu *ConsulUpstream) Copy() *ConsulUpstream { @@ -208,11 +211,12 @@ func (cu *ConsulUpstream) Copy() *ConsulUpstream { return nil } return &ConsulUpstream{ - DestinationName: cu.DestinationName, - LocalBindPort: cu.LocalBindPort, - Datacenter: cu.Datacenter, - LocalBindAddress: cu.LocalBindAddress, - MeshGateway: cu.MeshGateway.Copy(), + DestinationName: cu.DestinationName, + DestinationNamespace: cu.DestinationNamespace, + LocalBindPort: cu.LocalBindPort, + Datacenter: cu.Datacenter, + LocalBindAddress: cu.LocalBindAddress, + MeshGateway: cu.MeshGateway.Copy(), } } @@ -224,7 +228,8 @@ func (cu *ConsulUpstream) Canonicalize() { } type ConsulExposeConfig struct { - Path []*ConsulExposePath `mapstructure:"path" hcl:"path,block"` + Paths []*ConsulExposePath `mapstructure:"path" hcl:"path,block"` + Path []*ConsulExposePath // Deprecated: only to maintain backwards compatibility. Use Paths instead. } func (cec *ConsulExposeConfig) Canonicalize() { @@ -232,6 +237,10 @@ func (cec *ConsulExposeConfig) Canonicalize() { return } + if len(cec.Paths) == 0 { + cec.Paths = nil + } + if len(cec.Path) == 0 { cec.Path = nil } @@ -312,7 +321,7 @@ func (p *ConsulGatewayProxy) Canonicalize() { if p.ConnectTimeout == nil { // same as the default from consul - p.ConnectTimeout = timeToPtr(defaultGatewayConnectTimeout) + p.ConnectTimeout = pointerOf(defaultGatewayConnectTimeout) } if len(p.EnvoyGatewayBindAddresses) == 0 { @@ -346,7 +355,7 @@ func (p *ConsulGatewayProxy) Copy() *ConsulGatewayProxy { } return &ConsulGatewayProxy{ - ConnectTimeout: timeToPtr(*p.ConnectTimeout), + ConnectTimeout: pointerOf(*p.ConnectTimeout), EnvoyGatewayBindTaggedAddresses: p.EnvoyGatewayBindTaggedAddresses, EnvoyGatewayBindAddresses: binds, EnvoyGatewayNoDefaultBind: p.EnvoyGatewayNoDefaultBind, @@ -357,7 +366,10 @@ func (p *ConsulGatewayProxy) Copy() *ConsulGatewayProxy { // ConsulGatewayTLSConfig is used to configure TLS for a gateway. type ConsulGatewayTLSConfig struct { - Enabled bool `hcl:"enabled,optional"` + Enabled bool `hcl:"enabled,optional"` + TLSMinVersion string `hcl:"tls_min_version,optional" mapstructure:"tls_min_version"` + TLSMaxVersion string `hcl:"tls_max_version,optional" mapstructure:"tls_max_version"` + CipherSuites []string `hcl:"cipher_suites,optional" mapstructure:"cipher_suites"` } func (tc *ConsulGatewayTLSConfig) Canonicalize() { @@ -368,9 +380,18 @@ func (tc *ConsulGatewayTLSConfig) Copy() *ConsulGatewayTLSConfig { return nil } - return &ConsulGatewayTLSConfig{ - Enabled: tc.Enabled, + result := &ConsulGatewayTLSConfig{ + Enabled: tc.Enabled, + TLSMinVersion: tc.TLSMinVersion, + TLSMaxVersion: tc.TLSMaxVersion, + } + if len(tc.CipherSuites) != 0 { + cipherSuites := make([]string, len(tc.CipherSuites)) + copy(cipherSuites, tc.CipherSuites) + result.CipherSuites = cipherSuites } + + return result } // ConsulIngressService is used to configure a service fronted by the ingress gateway. diff --git a/api/consul_test.go b/api/consul_test.go index 0c32c4c8168..539bef2510e 100644 --- a/api/consul_test.go +++ b/api/consul_test.go @@ -5,31 +5,33 @@ import ( "time" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" ) func TestConsul_Canonicalize(t *testing.T) { testutil.Parallel(t) + t.Run("missing ns", func(t *testing.T) { c := new(Consul) c.Canonicalize() - require.Empty(t, c.Namespace) + must.Eq(t, "", c.Namespace) }) t.Run("complete", func(t *testing.T) { c := &Consul{Namespace: "foo"} c.Canonicalize() - require.Equal(t, "foo", c.Namespace) + must.Eq(t, "foo", c.Namespace) }) } func TestConsul_Copy(t *testing.T) { testutil.Parallel(t) + t.Run("complete", func(t *testing.T) { result := (&Consul{ Namespace: "foo", }).Copy() - require.Equal(t, &Consul{ + must.Eq(t, &Consul{ Namespace: "foo", }, result) }) @@ -37,28 +39,29 @@ func TestConsul_Copy(t *testing.T) { func TestConsul_MergeNamespace(t *testing.T) { testutil.Parallel(t) + t.Run("already set", func(t *testing.T) { a := &Consul{Namespace: "foo"} - ns := stringToPtr("bar") + ns := pointerOf("bar") a.MergeNamespace(ns) - require.Equal(t, "foo", a.Namespace) - require.Equal(t, "bar", *ns) + must.Eq(t, "foo", a.Namespace) + must.Eq(t, "bar", *ns) }) t.Run("inherit", func(t *testing.T) { a := &Consul{Namespace: ""} - ns := stringToPtr("bar") + ns := pointerOf("bar") a.MergeNamespace(ns) - require.Equal(t, "bar", a.Namespace) - require.Equal(t, "bar", *ns) + must.Eq(t, "bar", a.Namespace) + must.Eq(t, "bar", *ns) }) t.Run("parent is nil", func(t *testing.T) { a := &Consul{Namespace: "foo"} ns := (*string)(nil) a.MergeNamespace(ns) - require.Equal(t, "foo", a.Namespace) - require.Nil(t, ns) + must.Eq(t, "foo", a.Namespace) + must.Nil(t, ns) }) } @@ -68,15 +71,15 @@ func TestConsulConnect_Canonicalize(t *testing.T) { t.Run("nil connect", func(t *testing.T) { cc := (*ConsulConnect)(nil) cc.Canonicalize() - require.Nil(t, cc) + must.Nil(t, cc) }) t.Run("empty connect", func(t *testing.T) { cc := new(ConsulConnect) cc.Canonicalize() - require.Empty(t, cc.Native) - require.Nil(t, cc.SidecarService) - require.Nil(t, cc.SidecarTask) + must.False(t, cc.Native) + must.Nil(t, cc.SidecarService) + must.Nil(t, cc.SidecarTask) }) } @@ -86,14 +89,14 @@ func TestConsulSidecarService_Canonicalize(t *testing.T) { t.Run("nil sidecar_service", func(t *testing.T) { css := (*ConsulSidecarService)(nil) css.Canonicalize() - require.Nil(t, css) + must.Nil(t, css) }) t.Run("empty sidecar_service", func(t *testing.T) { css := new(ConsulSidecarService) css.Canonicalize() - require.Empty(t, css.Tags) - require.Nil(t, css.Proxy) + must.SliceEmpty(t, css.Tags) + must.Nil(t, css.Proxy) }) t.Run("non-empty sidecar_service", func(t *testing.T) { @@ -106,7 +109,7 @@ func TestConsulSidecarService_Canonicalize(t *testing.T) { }, } css.Canonicalize() - require.Equal(t, &ConsulSidecarService{ + must.Eq(t, &ConsulSidecarService{ Tags: nil, Port: "port", Proxy: &ConsulProxy{ @@ -122,33 +125,33 @@ func TestConsulProxy_Canonicalize(t *testing.T) { t.Run("nil proxy", func(t *testing.T) { cp := (*ConsulProxy)(nil) cp.Canonicalize() - require.Nil(t, cp) + must.Nil(t, cp) }) t.Run("empty proxy", func(t *testing.T) { cp := new(ConsulProxy) cp.Canonicalize() - require.Empty(t, cp.LocalServiceAddress) - require.Zero(t, cp.LocalServicePort) - require.Nil(t, cp.ExposeConfig) - require.Nil(t, cp.Upstreams) - require.Empty(t, cp.Config) + must.Eq(t, "", cp.LocalServiceAddress) + must.Zero(t, cp.LocalServicePort) + must.Nil(t, cp.Expose) + must.Nil(t, cp.Upstreams) + must.MapEmpty(t, cp.Config) }) t.Run("non empty proxy", func(t *testing.T) { cp := &ConsulProxy{ LocalServiceAddress: "127.0.0.1", LocalServicePort: 80, - ExposeConfig: new(ConsulExposeConfig), + Expose: new(ConsulExposeConfig), Upstreams: make([]*ConsulUpstream, 0), Config: make(map[string]interface{}), } cp.Canonicalize() - require.Equal(t, "127.0.0.1", cp.LocalServiceAddress) - require.Equal(t, 80, cp.LocalServicePort) - require.Equal(t, &ConsulExposeConfig{}, cp.ExposeConfig) - require.Nil(t, cp.Upstreams) - require.Nil(t, cp.Config) + must.Eq(t, "127.0.0.1", cp.LocalServiceAddress) + must.Eq(t, 80, cp.LocalServicePort) + must.Eq(t, &ConsulExposeConfig{}, cp.Expose) + must.Nil(t, cp.Upstreams) + must.Nil(t, cp.Config) }) } @@ -158,19 +161,20 @@ func TestConsulUpstream_Copy(t *testing.T) { t.Run("nil upstream", func(t *testing.T) { cu := (*ConsulUpstream)(nil) result := cu.Copy() - require.Nil(t, result) + must.Nil(t, result) }) t.Run("complete upstream", func(t *testing.T) { cu := &ConsulUpstream{ - DestinationName: "dest1", - Datacenter: "dc2", - LocalBindPort: 2000, - LocalBindAddress: "10.0.0.1", - MeshGateway: &ConsulMeshGateway{Mode: "remote"}, + DestinationName: "dest1", + DestinationNamespace: "ns2", + Datacenter: "dc2", + LocalBindPort: 2000, + LocalBindAddress: "10.0.0.1", + MeshGateway: &ConsulMeshGateway{Mode: "remote"}, } result := cu.Copy() - require.Equal(t, cu, result) + must.Eq(t, cu, result) }) } @@ -180,24 +184,26 @@ func TestConsulUpstream_Canonicalize(t *testing.T) { t.Run("nil upstream", func(t *testing.T) { cu := (*ConsulUpstream)(nil) cu.Canonicalize() - require.Nil(t, cu) + must.Nil(t, cu) }) t.Run("complete", func(t *testing.T) { cu := &ConsulUpstream{ - DestinationName: "dest1", - Datacenter: "dc2", - LocalBindPort: 2000, - LocalBindAddress: "10.0.0.1", - MeshGateway: &ConsulMeshGateway{Mode: ""}, + DestinationName: "dest1", + DestinationNamespace: "ns2", + Datacenter: "dc2", + LocalBindPort: 2000, + LocalBindAddress: "10.0.0.1", + MeshGateway: &ConsulMeshGateway{Mode: ""}, } cu.Canonicalize() - require.Equal(t, &ConsulUpstream{ - DestinationName: "dest1", - Datacenter: "dc2", - LocalBindPort: 2000, - LocalBindAddress: "10.0.0.1", - MeshGateway: &ConsulMeshGateway{Mode: ""}, + must.Eq(t, &ConsulUpstream{ + DestinationName: "dest1", + DestinationNamespace: "ns2", + Datacenter: "dc2", + LocalBindPort: 2000, + LocalBindAddress: "10.0.0.1", + MeshGateway: &ConsulMeshGateway{Mode: ""}, }, cu) }) } @@ -208,29 +214,29 @@ func TestSidecarTask_Canonicalize(t *testing.T) { t.Run("nil sidecar_task", func(t *testing.T) { st := (*SidecarTask)(nil) st.Canonicalize() - require.Nil(t, st) + must.Nil(t, st) }) t.Run("empty sidecar_task", func(t *testing.T) { st := new(SidecarTask) st.Canonicalize() - require.Nil(t, st.Config) - require.Nil(t, st.Env) - require.Equal(t, DefaultResources(), st.Resources) - require.Equal(t, DefaultLogConfig(), st.LogConfig) - require.Nil(t, st.Meta) - require.Equal(t, 5*time.Second, *st.KillTimeout) - require.Equal(t, 0*time.Second, *st.ShutdownDelay) + must.Nil(t, st.Config) + must.Nil(t, st.Env) + must.Eq(t, DefaultResources(), st.Resources) + must.Eq(t, DefaultLogConfig(), st.LogConfig) + must.Nil(t, st.Meta) + must.Eq(t, 5*time.Second, *st.KillTimeout) + must.Eq(t, 0*time.Second, *st.ShutdownDelay) }) t.Run("non empty sidecar_task resources", func(t *testing.T) { exp := DefaultResources() - exp.MemoryMB = intToPtr(333) + exp.MemoryMB = pointerOf(333) st := &SidecarTask{ - Resources: &Resources{MemoryMB: intToPtr(333)}, + Resources: &Resources{MemoryMB: pointerOf(333)}, } st.Canonicalize() - require.Equal(t, exp, st.Resources) + must.Eq(t, exp, st.Resources) }) } @@ -240,7 +246,7 @@ func TestConsulGateway_Canonicalize(t *testing.T) { t.Run("nil", func(t *testing.T) { cg := (*ConsulGateway)(nil) cg.Canonicalize() - require.Nil(t, cg) + must.Nil(t, cg) }) t.Run("set defaults", func(t *testing.T) { @@ -260,13 +266,13 @@ func TestConsulGateway_Canonicalize(t *testing.T) { }, } cg.Canonicalize() - require.Equal(t, timeToPtr(5*time.Second), cg.Proxy.ConnectTimeout) - require.True(t, cg.Proxy.EnvoyGatewayBindTaggedAddresses) - require.Nil(t, cg.Proxy.EnvoyGatewayBindAddresses) - require.True(t, cg.Proxy.EnvoyGatewayNoDefaultBind) - require.Empty(t, cg.Proxy.EnvoyDNSDiscoveryType) - require.Nil(t, cg.Proxy.Config) - require.Nil(t, cg.Ingress.Listeners) + must.Eq(t, pointerOf(5*time.Second), cg.Proxy.ConnectTimeout) + must.True(t, cg.Proxy.EnvoyGatewayBindTaggedAddresses) + must.Nil(t, cg.Proxy.EnvoyGatewayBindAddresses) + must.True(t, cg.Proxy.EnvoyGatewayNoDefaultBind) + must.Eq(t, "", cg.Proxy.EnvoyDNSDiscoveryType) + must.Nil(t, cg.Proxy.Config) + must.Nil(t, cg.Ingress.Listeners) }) } @@ -275,12 +281,12 @@ func TestConsulGateway_Copy(t *testing.T) { t.Run("nil", func(t *testing.T) { result := (*ConsulGateway)(nil).Copy() - require.Nil(t, result) + must.Nil(t, result) }) gateway := &ConsulGateway{ Proxy: &ConsulGatewayProxy{ - ConnectTimeout: timeToPtr(3 * time.Second), + ConnectTimeout: pointerOf(3 * time.Second), EnvoyGatewayBindTaggedAddresses: true, EnvoyGatewayBindAddresses: map[string]*ConsulGatewayBindAddress{ "listener1": {Address: "10.0.0.1", Port: 2000}, @@ -317,7 +323,7 @@ func TestConsulGateway_Copy(t *testing.T) { t.Run("complete", func(t *testing.T) { result := gateway.Copy() - require.Equal(t, gateway, result) + must.Eq(t, gateway, result) }) } @@ -327,7 +333,7 @@ func TestConsulIngressConfigEntry_Canonicalize(t *testing.T) { t.Run("nil", func(t *testing.T) { c := (*ConsulIngressConfigEntry)(nil) c.Canonicalize() - require.Nil(t, c) + must.Nil(t, c) }) t.Run("empty fields", func(t *testing.T) { @@ -336,8 +342,8 @@ func TestConsulIngressConfigEntry_Canonicalize(t *testing.T) { Listeners: []*ConsulIngressListener{}, } c.Canonicalize() - require.Nil(t, c.TLS) - require.Nil(t, c.Listeners) + must.Nil(t, c.TLS) + must.Nil(t, c.Listeners) }) t.Run("complete", func(t *testing.T) { @@ -353,7 +359,7 @@ func TestConsulIngressConfigEntry_Canonicalize(t *testing.T) { }}, } c.Canonicalize() - require.Equal(t, &ConsulIngressConfigEntry{ + must.Eq(t, &ConsulIngressConfigEntry{ TLS: &ConsulGatewayTLSConfig{Enabled: true}, Listeners: []*ConsulIngressListener{{ Port: 9090, @@ -372,7 +378,7 @@ func TestConsulIngressConfigEntry_Copy(t *testing.T) { t.Run("nil", func(t *testing.T) { result := (*ConsulIngressConfigEntry)(nil).Copy() - require.Nil(t, result) + must.Nil(t, result) }) entry := &ConsulIngressConfigEntry{ @@ -394,7 +400,7 @@ func TestConsulIngressConfigEntry_Copy(t *testing.T) { t.Run("complete", func(t *testing.T) { result := entry.Copy() - require.Equal(t, entry, result) + must.Eq(t, entry, result) }) } @@ -404,7 +410,7 @@ func TestConsulTerminatingConfigEntry_Canonicalize(t *testing.T) { t.Run("nil", func(t *testing.T) { c := (*ConsulTerminatingConfigEntry)(nil) c.Canonicalize() - require.Nil(t, c) + must.Nil(t, c) }) t.Run("empty services", func(t *testing.T) { @@ -412,7 +418,7 @@ func TestConsulTerminatingConfigEntry_Canonicalize(t *testing.T) { Services: []*ConsulLinkedService{}, } c.Canonicalize() - require.Nil(t, c.Services) + must.Nil(t, c.Services) }) } @@ -421,7 +427,7 @@ func TestConsulTerminatingConfigEntry_Copy(t *testing.T) { t.Run("nil", func(t *testing.T) { result := (*ConsulIngressConfigEntry)(nil).Copy() - require.Nil(t, result) + must.Nil(t, result) }) entry := &ConsulTerminatingConfigEntry{ @@ -438,7 +444,7 @@ func TestConsulTerminatingConfigEntry_Copy(t *testing.T) { t.Run("complete", func(t *testing.T) { result := entry.Copy() - require.Equal(t, entry, result) + must.Eq(t, entry, result) }) } @@ -448,13 +454,13 @@ func TestConsulMeshConfigEntry_Canonicalize(t *testing.T) { t.Run("nil", func(t *testing.T) { ce := (*ConsulMeshConfigEntry)(nil) ce.Canonicalize() - require.Nil(t, ce) + must.Nil(t, ce) }) t.Run("instantiated", func(t *testing.T) { ce := new(ConsulMeshConfigEntry) ce.Canonicalize() - require.NotNil(t, ce) + must.NotNil(t, ce) }) } @@ -464,13 +470,13 @@ func TestConsulMeshConfigEntry_Copy(t *testing.T) { t.Run("nil", func(t *testing.T) { ce := (*ConsulMeshConfigEntry)(nil) ce2 := ce.Copy() - require.Nil(t, ce2) + must.Nil(t, ce2) }) t.Run("instantiated", func(t *testing.T) { ce := new(ConsulMeshConfigEntry) ce2 := ce.Copy() - require.NotNil(t, ce2) + must.NotNil(t, ce2) }) } @@ -480,19 +486,19 @@ func TestConsulMeshGateway_Canonicalize(t *testing.T) { t.Run("nil", func(t *testing.T) { c := (*ConsulMeshGateway)(nil) c.Canonicalize() - require.Nil(t, c) + must.Nil(t, c) }) t.Run("unset mode", func(t *testing.T) { c := &ConsulMeshGateway{Mode: ""} c.Canonicalize() - require.Equal(t, "", c.Mode) + must.Eq(t, "", c.Mode) }) t.Run("set mode", func(t *testing.T) { c := &ConsulMeshGateway{Mode: "remote"} c.Canonicalize() - require.Equal(t, "remote", c.Mode) + must.Eq(t, "remote", c.Mode) }) } @@ -502,7 +508,7 @@ func TestConsulMeshGateway_Copy(t *testing.T) { t.Run("nil", func(t *testing.T) { c := (*ConsulMeshGateway)(nil) result := c.Copy() - require.Nil(t, result) + must.Nil(t, result) }) t.Run("instantiated", func(t *testing.T) { @@ -510,6 +516,35 @@ func TestConsulMeshGateway_Copy(t *testing.T) { Mode: "local", } result := c.Copy() - require.Equal(t, c, result) + must.Eq(t, c, result) + }) +} + +func TestConsulGatewayTLSConfig_Copy(t *testing.T) { + testutil.Parallel(t) + + t.Run("nil", func(t *testing.T) { + c := (*ConsulGatewayTLSConfig)(nil) + result := c.Copy() + must.Nil(t, result) + }) + + t.Run("enabled", func(t *testing.T) { + c := &ConsulGatewayTLSConfig{ + Enabled: true, + } + result := c.Copy() + must.Eq(t, c, result) + }) + + t.Run("customized", func(t *testing.T) { + c := &ConsulGatewayTLSConfig{ + Enabled: true, + TLSMinVersion: "TLSv1_2", + TLSMaxVersion: "TLSv1_3", + CipherSuites: []string{"foo", "bar"}, + } + result := c.Copy() + must.Eq(t, c, result) }) } diff --git a/api/csi.go b/api/csi.go index b42202b67c1..dc31db8f2d4 100644 --- a/api/csi.go +++ b/api/csi.go @@ -82,7 +82,7 @@ func (v *CSIVolumes) Register(vol *CSIVolume, w *WriteOptions) (*WriteMeta, erro // Deregister deregisters a single CSIVolume from Nomad. The volume will not be deleted from the external storage provider. func (v *CSIVolumes) Deregister(id string, force bool, w *WriteOptions) error { - _, err := v.client.delete(fmt.Sprintf("/v1/volume/csi/%v?force=%t", url.PathEscape(id), force), nil, w) + _, err := v.client.delete(fmt.Sprintf("/v1/volume/csi/%v?force=%t", url.PathEscape(id), force), nil, nil, w) return err } @@ -104,7 +104,7 @@ func (v *CSIVolumes) Create(vol *CSIVolume, w *WriteOptions) ([]*CSIVolume, *Wri // passed as an argument here is for the storage provider's ID, so a volume // that's already been deregistered can be deleted. func (v *CSIVolumes) Delete(externalVolID string, w *WriteOptions) error { - _, err := v.client.delete(fmt.Sprintf("/v1/volume/csi/%v/delete", url.PathEscape(externalVolID)), nil, w) + _, err := v.client.delete(fmt.Sprintf("/v1/volume/csi/%v/delete", url.PathEscape(externalVolID)), nil, nil, w) return err } @@ -117,7 +117,7 @@ func (v *CSIVolumes) DeleteOpts(req *CSIVolumeDeleteRequest, w *WriteOptions) er w = &WriteOptions{} } w.SetHeadersFromCSISecrets(req.Secrets) - _, err := v.client.delete(fmt.Sprintf("/v1/volume/csi/%v/delete", url.PathEscape(req.ExternalVolumeID)), nil, w) + _, err := v.client.delete(fmt.Sprintf("/v1/volume/csi/%v/delete", url.PathEscape(req.ExternalVolumeID)), nil, nil, w) return err } @@ -125,7 +125,7 @@ func (v *CSIVolumes) DeleteOpts(req *CSIVolumeDeleteRequest, w *WriteOptions) er // node. This is used in the case that the node is temporarily lost and the // allocations are unable to drop their claims automatically. func (v *CSIVolumes) Detach(volID, nodeID string, w *WriteOptions) error { - _, err := v.client.delete(fmt.Sprintf("/v1/volume/csi/%v/detach?node=%v", url.PathEscape(volID), nodeID), nil, w) + _, err := v.client.delete(fmt.Sprintf("/v1/volume/csi/%v/detach?node=%v", url.PathEscape(volID), nodeID), nil, nil, w) return err } @@ -152,7 +152,7 @@ func (v *CSIVolumes) DeleteSnapshot(snap *CSISnapshot, w *WriteOptions) error { w = &WriteOptions{} } w.SetHeadersFromCSISecrets(snap.Secrets) - _, err := v.client.delete("/v1/volumes/snapshot?"+qp.Encode(), nil, w) + _, err := v.client.delete("/v1/volumes/snapshot?"+qp.Encode(), nil, nil, w) return err } @@ -229,6 +229,11 @@ const ( CSIVolumeAccessModeMultiNodeMultiWriter CSIVolumeAccessMode = "multi-node-multi-writer" ) +const ( + CSIVolumeTypeHost = "host" + CSIVolumeTypeCSI = "csi" +) + // CSIMountOptions contain optional additional configuration that can be used // when specifying that a Volume should be used with VolumeAccessTypeMount. type CSIMountOptions struct { @@ -244,6 +249,18 @@ type CSIMountOptions struct { ExtraKeysHCL []string `hcl1:",unusedKeys" json:"-"` // report unexpected keys } +func (o *CSIMountOptions) Merge(p *CSIMountOptions) { + if p == nil { + return + } + if p.FSType != "" { + o.FSType = p.FSType + } + if p.MountFlags != nil { + o.MountFlags = p.MountFlags + } +} + // CSISecrets contain optional additional credentials that may be needed by // the storage provider. These values will be redacted when reported in the // API or in Nomad's logs. @@ -367,6 +384,8 @@ type CSIVolumeListStub struct { Topologies []*CSITopology AccessMode CSIVolumeAccessMode AttachmentMode CSIVolumeAttachmentMode + CurrentReaders int + CurrentWriters int Schedulable bool PluginID string Provider string diff --git a/api/csi_test.go b/api/csi_test.go index 6c75de4c071..8b9c41929e5 100644 --- a/api/csi_test.go +++ b/api/csi_test.go @@ -4,7 +4,7 @@ import ( "testing" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" ) // TestCSIVolumes_CRUD fails because of a combination of removing the job to plugin creation @@ -14,89 +14,94 @@ import ( // 2. Build and deploy a dummy CSI plugin via a job, and have it really fingerprint func TestCSIVolumes_CRUD(t *testing.T) { testutil.Parallel(t) + c, s, root := makeACLClient(t, nil, nil) defer s.Stop() v := c.CSIVolumes() // Successful empty result vols, qm, err := v.List(nil) - require.NoError(t, err) - require.NotEqual(t, 0, qm.LastIndex) - require.Equal(t, 0, len(vols)) + must.NoError(t, err) + // must.Positive(t, qm.LastIndex) TODO(tgross), this was always broken? + _ = qm + must.SliceEmpty(t, vols) + _ = root // FIXME we're bailing out here until one of the fixes is available - return - - // Authorized QueryOpts. Use the root token to just bypass ACL details - opts := &QueryOptions{ - Region: "global", - Namespace: "default", - AuthToken: root.SecretID, - } - - wpts := &WriteOptions{ - Region: "global", - Namespace: "default", - AuthToken: root.SecretID, - } - - // Create node plugins - nodes, _, err := c.Nodes().List(nil) - require.NoError(t, err) - require.Equal(t, 1, len(nodes)) - - nodeStub := nodes[0] - node, _, err := c.Nodes().Info(nodeStub.ID, nil) - require.NoError(t, err) - node.CSINodePlugins = map[string]*CSIInfo{ - "foo": { - PluginID: "foo", - Healthy: true, - RequiresControllerPlugin: false, - RequiresTopologies: false, - NodeInfo: &CSINodeInfo{ - ID: nodeStub.ID, - MaxVolumes: 200, + /* + + // Authorized QueryOpts. Use the root token to just bypass ACL details + opts := &QueryOptions{ + Region: "global", + Namespace: "default", + AuthToken: root.SecretID, + } + + wpts := &WriteOptions{ + Region: "global", + Namespace: "default", + AuthToken: root.SecretID, + } + + // Create node plugins + nodes, _, err := c.Nodes().List(nil) + require.NoError(t, err) + require.Equal(t, 1, len(nodes)) + + nodeStub := nodes[0] + node, _, err := c.Nodes().Info(nodeStub.ID, nil) + require.NoError(t, err) + node.CSINodePlugins = map[string]*CSIInfo{ + "foo": { + PluginID: "foo", + Healthy: true, + RequiresControllerPlugin: false, + RequiresTopologies: false, + NodeInfo: &CSINodeInfo{ + ID: nodeStub.ID, + MaxVolumes: 200, + }, }, - }, - } - - // Register a volume - // This id is here as a string to avoid importing helper, which causes the lint - // rule that checks that the api package is isolated to fail - id := "DEADBEEF-31B5-8F78-7986-DD404FDA0CD1" - _, err = v.Register(&CSIVolume{ - ID: id, - Namespace: "default", - PluginID: "foo", - AccessMode: CSIVolumeAccessModeMultiNodeSingleWriter, - AttachmentMode: CSIVolumeAttachmentModeFilesystem, - Topologies: []*CSITopology{{Segments: map[string]string{"foo": "bar"}}}, - }, wpts) - require.NoError(t, err) - - // Successful result with volumes - vols, qm, err = v.List(opts) - require.NoError(t, err) - require.NotEqual(t, 0, qm.LastIndex) - require.Equal(t, 1, len(vols)) - - // Successful info query - vol, qm, err := v.Info(id, opts) - require.NoError(t, err) - require.Equal(t, "bar", vol.Topologies[0].Segments["foo"]) - - // Deregister the volume - err = v.Deregister(id, true, wpts) - require.NoError(t, err) + } - // Successful empty result - vols, qm, err = v.List(nil) - require.NoError(t, err) - require.NotEqual(t, 0, qm.LastIndex) - require.Equal(t, 0, len(vols)) - - // Failed info query - vol, qm, err = v.Info(id, opts) - require.Error(t, err, "missing") + // Register a volume + // This id is here as a string to avoid importing helper, which causes the lint + // rule that checks that the api package is isolated to fail + id := "DEADBEEF-31B5-8F78-7986-DD404FDA0CD1" + _, err = v.Register(&CSIVolume{ + ID: id, + Namespace: "default", + PluginID: "foo", + AccessMode: CSIVolumeAccessModeMultiNodeSingleWriter, + AttachmentMode: CSIVolumeAttachmentModeFilesystem, + Topologies: []*CSITopology{{Segments: map[string]string{"foo": "bar"}}}, + }, wpts) + require.NoError(t, err) + + // Successful result with volumes + vols, qm, err = v.List(opts) + require.NoError(t, err) + require.NotEqual(t, 0, qm.LastIndex) + require.Equal(t, 1, len(vols)) + + // Successful info query + vol, qm, err := v.Info(id, opts) + require.NoError(t, err) + require.Equal(t, "bar", vol.Topologies[0].Segments["foo"]) + + // Deregister the volume + err = v.Deregister(id, true, wpts) + require.NoError(t, err) + + // Successful empty result + vols, qm, err = v.List(nil) + require.NoError(t, err) + require.NotEqual(t, 0, qm.LastIndex) + require.Equal(t, 0, len(vols)) + + // Failed info query + vol, qm, err = v.Info(id, opts) + require.Error(t, err, "missing") + + */ } diff --git a/api/deployments.go b/api/deployments.go index 4a4844246fa..1c8011a6fbb 100644 --- a/api/deployments.go +++ b/api/deployments.go @@ -136,6 +136,17 @@ func (d *Deployments) SetAllocHealth(deploymentID string, healthy, unhealthy []s return &resp, wm, nil } +const ( + DeploymentStatusRunning = "running" + DeploymentStatusPaused = "paused" + DeploymentStatusFailed = "failed" + DeploymentStatusSuccessful = "successful" + DeploymentStatusCancelled = "cancelled" + DeploymentStatusPending = "pending" + DeploymentStatusBlocked = "blocked" + DeploymentStatusUnblocking = "unblocking" +) + // Deployment is used to serialize an deployment. type Deployment struct { // ID is a generated UUID for the deployment diff --git a/api/evaluations.go b/api/evaluations.go index 62d699ef320..bcb0eb2b960 100644 --- a/api/evaluations.go +++ b/api/evaluations.go @@ -40,6 +40,18 @@ func (e *Evaluations) Info(evalID string, q *QueryOptions) (*Evaluation, *QueryM return &resp, qm, nil } +// Delete is used to batch delete evaluations using their IDs. +func (e *Evaluations) Delete(evalIDs []string, w *WriteOptions) (*WriteMeta, error) { + req := EvalDeleteRequest{ + EvalIDs: evalIDs, + } + wm, err := e.client.delete("/v1/evaluations", &req, nil, w) + if err != nil { + return nil, err + } + return wm, nil +} + // Allocations is used to retrieve a set of allocations given // an evaluation ID. func (e *Evaluations) Allocations(evalID string, q *QueryOptions) ([]*AllocationListStub, *QueryMeta, error) { @@ -52,6 +64,14 @@ func (e *Evaluations) Allocations(evalID string, q *QueryOptions) ([]*Allocation return resp, qm, nil } +const ( + EvalStatusBlocked = "blocked" + EvalStatusPending = "pending" + EvalStatusComplete = "complete" + EvalStatusFailed = "failed" + EvalStatusCancelled = "canceled" +) + // Evaluation is used to serialize an evaluation. type Evaluation struct { ID string @@ -108,6 +128,11 @@ type EvaluationStub struct { ModifyTime int64 } +type EvalDeleteRequest struct { + EvalIDs []string + WriteRequest +} + // EvalIndexSort is a wrapper to sort evaluations by CreateIndex. // We reverse the test so that we get the highest index first. type EvalIndexSort []*Evaluation diff --git a/api/evaluations_test.go b/api/evaluations_test.go index 0939f87e17e..692d4544418 100644 --- a/api/evaluations_test.go +++ b/api/evaluations_test.go @@ -1,152 +1,187 @@ package api import ( + "fmt" "sort" "testing" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" + "github.com/shoenig/test/wait" ) func TestEvaluations_List(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() e := c.Evaluations() // Listing when nothing exists returns empty result, qm, err := e.List(nil) - require.NoError(t, err) - require.Equal(t, uint64(0), qm.LastIndex, "bad index") - require.Equal(t, 0, len(result), "expected 0 evaluations") + must.NoError(t, err) + must.Eq(t, 0, qm.LastIndex) + must.SliceEmpty(t, result) // Register a job. This will create an evaluation. jobs := c.Jobs() job := testJob() resp, wm, err := jobs.Register(job, nil) - require.NoError(t, err) + must.NoError(t, err) assertWriteMeta(t, wm) // Check the evaluations again result, qm, err = e.List(nil) - require.NoError(t, err) + must.NoError(t, err) assertQueryMeta(t, qm) // if the eval fails fast there can be more than 1 // but they are in order of most recent first, so look at the last one - require.Greater(t, len(result), 0, "expected eval (%s), got none", resp.EvalID) + must.Positive(t, len(result)) idx := len(result) - 1 - require.Equal(t, resp.EvalID, result[idx].ID, "expected eval (%s), got: %#v", resp.EvalID, result[idx]) + must.Eq(t, resp.EvalID, result[idx].ID) // wait until the 2nd eval shows up before we try paging - results := []*Evaluation{} - testutil.WaitForResult(func() (bool, error) { + var results []*Evaluation + + f := func() error { results, _, err = e.List(nil) - if len(results) < 2 || err != nil { - return false, err + if err != nil { + return fmt.Errorf("failed to list evaluations: %w", err) } - return true, nil - }, func(err error) { - t.Fatalf("err: %s", err) - }) + if len(results) < 2 { + return fmt.Errorf("fewer than 2 results, got: %d", len(results)) + } + return nil + } + must.Wait(t, wait.InitialSuccess(wait.ErrorFunc(f))) // query first page result, qm, err = e.List(&QueryOptions{ PerPage: int32(1), }) - require.NoError(t, err) - require.Equal(t, 1, len(result), "expected no evals after last one but got %d: %#v", len(result), result) + must.NoError(t, err) + must.Len(t, 1, result) // query second page result, qm, err = e.List(&QueryOptions{ PerPage: int32(1), NextToken: qm.NextToken, }) - require.NoError(t, err) - require.Equal(t, 1, len(result), "expected no evals after last one but got %d: %#v", len(result), result) + must.NoError(t, err) + must.Len(t, 1, result) // Query evaluations using a filter. results, _, err = e.List(&QueryOptions{ Filter: `TriggeredBy == "job-register"`, }) - require.Equal(t, 1, len(result), "expected 1 eval, got %d", len(result)) + must.Len(t, 1, result) } func TestEvaluations_PrefixList(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() e := c.Evaluations() // Listing when nothing exists returns empty result, qm, err := e.PrefixList("abcdef") - require.NoError(t, err) - require.Equal(t, uint64(0), qm.LastIndex, "bad index") - require.Equal(t, 0, len(result), "expected 0 evaluations") + must.NoError(t, err) + must.Eq(t, 0, qm.LastIndex) + must.SliceEmpty(t, result) // Register a job. This will create an evaluation. jobs := c.Jobs() job := testJob() resp, wm, err := jobs.Register(job, nil) - require.NoError(t, err) + must.NoError(t, err) assertWriteMeta(t, wm) // Check the evaluations again result, qm, err = e.PrefixList(resp.EvalID[:4]) - require.NoError(t, err) + must.NoError(t, err) assertQueryMeta(t, qm) // Check if we have the right list - require.Equal(t, 1, len(result)) - require.Equal(t, resp.EvalID, result[0].ID) + must.Len(t, 1, result) + must.Eq(t, resp.EvalID, result[0].ID) } func TestEvaluations_Info(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() e := c.Evaluations() // Querying a nonexistent evaluation returns error _, _, err := e.Info("8E231CF4-CA48-43FF-B694-5801E69E22FA", nil) - require.Error(t, err) + must.Error(t, err) // Register a job. Creates a new evaluation. jobs := c.Jobs() job := testJob() resp, wm, err := jobs.Register(job, nil) - require.NoError(t, err) + must.NoError(t, err) assertWriteMeta(t, wm) // Try looking up by the new eval ID result, qm, err := e.Info(resp.EvalID, nil) - require.NoError(t, err) + must.NoError(t, err) assertQueryMeta(t, qm) // Check that we got the right result - require.NotNil(t, result) - require.Equal(t, resp.EvalID, result.ID) + must.NotNil(t, result) + must.Eq(t, resp.EvalID, result.ID) // Register the job again to get a related eval resp, wm, err = jobs.Register(job, nil) evals, _, err := e.List(nil) - require.NoError(t, err) + must.NoError(t, err) // Find an eval that should have related evals for _, eval := range evals { if eval.NextEval != "" || eval.PreviousEval != "" || eval.BlockedEval != "" { - result, qm, err := e.Info(eval.ID, &QueryOptions{ + result, qm, err = e.Info(eval.ID, &QueryOptions{ Params: map[string]string{ "related": "true", }, }) - require.NoError(t, err) + must.NoError(t, err) assertQueryMeta(t, qm) - require.NotNil(t, result.RelatedEvals) + must.NotNil(t, result.RelatedEvals) } } } +func TestEvaluations_Delete(t *testing.T) { + testutil.Parallel(t) + + testClient, testServer := makeClient(t, nil, nil) + defer testServer.Stop() + + // Attempting to delete an evaluation when the eval broker is not paused + // should return an error. + wm, err := testClient.Evaluations().Delete([]string{"8E231CF4-CA48-43FF-B694-5801E69E22FA"}, nil) + must.Nil(t, wm) + must.ErrorContains(t, err, "eval broker is enabled") + + // Pause the eval broker, and try to delete an evaluation that does not + // exist. + schedulerConfig, _, err := testClient.Operator().SchedulerGetConfiguration(nil) + must.NoError(t, err) + must.NotNil(t, schedulerConfig) + + schedulerConfig.SchedulerConfig.PauseEvalBroker = true + schedulerConfigUpdated, _, err := testClient.Operator().SchedulerCASConfiguration(schedulerConfig.SchedulerConfig, nil) + must.NoError(t, err) + must.True(t, schedulerConfigUpdated.Updated) + + wm, err = testClient.Evaluations().Delete([]string{"8E231CF4-CA48-43FF-B694-5801E69E22FA"}, nil) + must.ErrorContains(t, err, "eval not found") +} + func TestEvaluations_Allocations(t *testing.T) { testutil.Parallel(t) c, s := makeClient(t, nil, nil) @@ -155,9 +190,9 @@ func TestEvaluations_Allocations(t *testing.T) { // Returns empty if no allocations allocs, qm, err := e.Allocations("8E231CF4-CA48-43FF-B694-5801E69E22FA", nil) - require.NoError(t, err) - require.Equal(t, uint64(0), qm.LastIndex, "bad index") - require.Equal(t, 0, len(allocs), "expected 0 evaluations") + must.NoError(t, err) + must.Eq(t, 0, qm.LastIndex) + must.SliceEmpty(t, allocs) } func TestEvaluations_Sort(t *testing.T) { @@ -174,5 +209,5 @@ func TestEvaluations_Sort(t *testing.T) { {CreateIndex: 2}, {CreateIndex: 1}, } - require.Equal(t, expect, evals) + must.Eq(t, expect, evals) } diff --git a/api/event_stream_test.go b/api/event_stream_test.go index d0f55f91f47..4afacb3fe3d 100644 --- a/api/event_stream_test.go +++ b/api/event_stream_test.go @@ -8,7 +8,7 @@ import ( "github.com/hashicorp/nomad/api/internal/testutil" "github.com/mitchellh/mapstructure" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" ) func TestEvent_Stream(t *testing.T) { @@ -21,8 +21,8 @@ func TestEvent_Stream(t *testing.T) { jobs := c.Jobs() job := testJob() resp2, _, err := jobs.Register(job, nil) - require.Nil(t, err) - require.NotNil(t, resp2) + must.NoError(t, err) + must.NotNil(t, resp2) // build event stream request events := c.EventStream() @@ -35,17 +35,17 @@ func TestEvent_Stream(t *testing.T) { defer cancel() streamCh, err := events.Stream(ctx, topics, 0, q) - require.NoError(t, err) + must.NoError(t, err) select { case event := <-streamCh: if event.Err != nil { - require.Fail(t, err.Error()) + must.Unreachable(t, must.Sprintf("unexpected %v", event.Err)) } - require.Equal(t, len(event.Events), 1) - require.Equal(t, "Evaluation", string(event.Events[0].Topic)) + must.Len(t, 1, event.Events) + must.Eq(t, "Evaluation", string(event.Events[0].Topic)) case <-time.After(5 * time.Second): - require.Fail(t, "failed waiting for event stream event") + must.Unreachable(t, must.Sprint("failed waiting for event stream event")) } } @@ -59,8 +59,8 @@ func TestEvent_Stream_Err_InvalidQueryParam(t *testing.T) { jobs := c.Jobs() job := testJob() resp2, _, err := jobs.Register(job, nil) - require.Nil(t, err) - require.NotNil(t, resp2) + must.NoError(t, err) + must.NotNil(t, resp2) // build event stream request events := c.EventStream() @@ -73,9 +73,7 @@ func TestEvent_Stream_Err_InvalidQueryParam(t *testing.T) { defer cancel() _, err = events.Stream(ctx, topics, 0, q) - require.Error(t, err) - require.Contains(t, err.Error(), "400") - require.Contains(t, err.Error(), "Invalid key value pair") + must.ErrorContains(t, err, "Invalid key value pair") } func TestEvent_Stream_CloseCtx(t *testing.T) { @@ -88,8 +86,8 @@ func TestEvent_Stream_CloseCtx(t *testing.T) { jobs := c.Jobs() job := testJob() resp2, _, err := jobs.Register(job, nil) - require.Nil(t, err) - require.NotNil(t, resp2) + must.NoError(t, err) + must.NotNil(t, resp2) // build event stream request events := c.EventStream() @@ -101,17 +99,17 @@ func TestEvent_Stream_CloseCtx(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) streamCh, err := events.Stream(ctx, topics, 0, q) - require.NoError(t, err) + must.NoError(t, err) // cancel the request cancel() select { case event, ok := <-streamCh: - require.False(t, ok) - require.Nil(t, event) + must.False(t, ok) + must.Nil(t, event) case <-time.After(5 * time.Second): - require.Fail(t, "failed waiting for event stream event") + must.Unreachable(t, must.Sprint("failed waiting for event stream event")) } } @@ -127,8 +125,8 @@ func TestEventStream_PayloadValue(t *testing.T) { jobs := c.Jobs() job := testJob() resp2, _, err := jobs.Register(job, nil) - require.Nil(t, err) - require.NotNil(t, resp2) + must.NoError(t, err) + must.NotNil(t, resp2) // build event stream request events := c.EventStream() @@ -141,18 +139,18 @@ func TestEventStream_PayloadValue(t *testing.T) { defer cancel() streamCh, err := events.Stream(ctx, topics, 0, q) - require.NoError(t, err) + must.NoError(t, err) select { case event := <-streamCh: if event.Err != nil { - require.NoError(t, err) + must.NoError(t, err) } for _, e := range event.Events { // verify that we get a node n, err := e.Node() - require.NoError(t, err) - require.NotEmpty(t, n.ID) + must.NoError(t, err) + must.UUIDv4(t, n.ID) // perform a raw decoding and look for: // - "ID" to make sure that raw decoding is working correctly @@ -162,15 +160,15 @@ func TestEventStream_PayloadValue(t *testing.T) { Result: &raw, } dec, err := mapstructure.NewDecoder(cfg) - require.NoError(t, err) - require.NoError(t, dec.Decode(e.Payload)) - require.Contains(t, raw, "Node") + must.NoError(t, err) + must.NoError(t, dec.Decode(e.Payload)) + must.MapContainsKeys(t, raw, []string{"Node"}) rawNode := raw["Node"] - require.Equal(t, n.ID, rawNode["ID"]) - require.Empty(t, rawNode["SecretID"]) + must.Eq(t, n.ID, rawNode["ID"].(string)) + must.Eq(t, "", rawNode["SecretID"]) } case <-time.After(5 * time.Second): - require.Fail(t, "failed waiting for event stream event") + must.Unreachable(t, must.Sprint("failed waiting for event stream event")) } } @@ -189,13 +187,12 @@ func TestEventStream_PayloadValueHelpers(t *testing.T) { input: []byte(`{"Topic": "Deployment", "Payload": {"Deployment":{"ID":"some-id","JobID":"some-job-id", "TaskGroups": {"tg1": {"RequireProgressBy": "2020-11-05T11:52:54.370774000-05:00"}}}}}`), expectFn: func(t *testing.T, event Event) { eventTime, err := time.Parse(time.RFC3339, "2020-11-05T11:52:54.370774000-05:00") - require.NoError(t, err) - require.Equal(t, TopicDeployment, event.Topic) + must.NoError(t, err) + must.Eq(t, TopicDeployment, event.Topic) d, err := event.Deployment() - require.NoError(t, err) - require.NoError(t, err) - require.Equal(t, &Deployment{ + must.NoError(t, err) + must.Eq(t, &Deployment{ ID: "some-id", JobID: "some-job-id", TaskGroups: map[string]*DeploymentState{ @@ -210,11 +207,10 @@ func TestEventStream_PayloadValueHelpers(t *testing.T) { desc: "evaluation", input: []byte(`{"Topic": "Evaluation", "Payload": {"Evaluation":{"ID":"some-id","Namespace":"some-namespace-id"}}}`), expectFn: func(t *testing.T, event Event) { - require.Equal(t, TopicEvaluation, event.Topic) + must.Eq(t, TopicEvaluation, event.Topic) eval, err := event.Evaluation() - require.NoError(t, err) - - require.Equal(t, &Evaluation{ + must.NoError(t, err) + must.Eq(t, &Evaluation{ ID: "some-id", Namespace: "some-namespace-id", }, eval) @@ -224,10 +220,10 @@ func TestEventStream_PayloadValueHelpers(t *testing.T) { desc: "allocation", input: []byte(`{"Topic": "Allocation", "Payload": {"Allocation":{"ID":"some-id","Namespace":"some-namespace-id"}}}`), expectFn: func(t *testing.T, event Event) { - require.Equal(t, TopicAllocation, event.Topic) + must.Eq(t, TopicAllocation, event.Topic) a, err := event.Allocation() - require.NoError(t, err) - require.Equal(t, &Allocation{ + must.NoError(t, err) + must.Eq(t, &Allocation{ ID: "some-id", Namespace: "some-namespace-id", }, a) @@ -236,12 +232,12 @@ func TestEventStream_PayloadValueHelpers(t *testing.T) { { input: []byte(`{"Topic": "Job", "Payload": {"Job":{"ID":"some-id","Namespace":"some-namespace-id"}}}`), expectFn: func(t *testing.T, event Event) { - require.Equal(t, TopicJob, event.Topic) + must.Eq(t, TopicJob, event.Topic) j, err := event.Job() - require.NoError(t, err) - require.Equal(t, &Job{ - ID: stringToPtr("some-id"), - Namespace: stringToPtr("some-namespace-id"), + must.NoError(t, err) + must.Eq(t, &Job{ + ID: pointerOf("some-id"), + Namespace: pointerOf("some-namespace-id"), }, j) }, }, @@ -249,10 +245,10 @@ func TestEventStream_PayloadValueHelpers(t *testing.T) { desc: "node", input: []byte(`{"Topic": "Node", "Payload": {"Node":{"ID":"some-id","Datacenter":"some-dc-id"}}}`), expectFn: func(t *testing.T, event Event) { - require.Equal(t, TopicNode, event.Topic) + must.Eq(t, TopicNode, event.Topic) n, err := event.Node() - require.NoError(t, err) - require.Equal(t, &Node{ + must.NoError(t, err) + must.Eq(t, &Node{ ID: "some-id", Datacenter: "some-dc-id", }, n) @@ -262,12 +258,12 @@ func TestEventStream_PayloadValueHelpers(t *testing.T) { desc: "service", input: []byte(`{"Topic": "Service", "Payload": {"Service":{"ID":"some-service-id","Namespace":"some-service-namespace-id","Datacenter":"us-east-1a"}}}`), expectFn: func(t *testing.T, event Event) { - require.Equal(t, TopicService, event.Topic) + must.Eq(t, TopicService, event.Topic) a, err := event.Service() - require.NoError(t, err) - require.Equal(t, "us-east-1a", a.Datacenter) - require.Equal(t, "some-service-id", a.ID) - require.Equal(t, "some-service-namespace-id", a.Namespace) + must.NoError(t, err) + must.Eq(t, "us-east-1a", a.Datacenter) + must.Eq(t, "some-service-id", a.ID) + must.Eq(t, "some-service-namespace-id", a.Namespace) }, }, } @@ -276,7 +272,7 @@ func TestEventStream_PayloadValueHelpers(t *testing.T) { t.Run(tc.desc, func(t *testing.T) { var out Event err := json.Unmarshal(tc.input, &out) - require.NoError(t, err) + must.NoError(t, err) tc.expectFn(t, out) }) } diff --git a/api/fs_test.go b/api/fs_test.go index 449239f9654..5879d41ab34 100644 --- a/api/fs_test.go +++ b/api/fs_test.go @@ -2,73 +2,46 @@ package api import ( "bytes" + "errors" "fmt" "io" - "reflect" "strings" "testing" "time" - units "github.com/docker/go-units" + "github.com/docker/go-units" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/kr/pretty" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" + "github.com/shoenig/test/wait" ) func TestFS_Logs(t *testing.T) { + testutil.RequireRoot(t) testutil.Parallel(t) - require := require.New(t) - rpcPort := 0 c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { - rpcPort = c.Ports.RPC - c.Client = &testutil.ClientConfig{ - Enabled: true, - } + c.DevMode = true }) defer s.Stop() - //TODO There should be a way to connect the client to the servers in - //makeClient above - require.NoError(c.Agent().SetServers([]string{fmt.Sprintf("127.0.0.1:%d", rpcPort)})) - - index := uint64(0) - testutil.WaitForResult(func() (bool, error) { - nodes, qm, err := c.Nodes().List(&QueryOptions{WaitIndex: index}) - if err != nil { - return false, err - } - index = qm.LastIndex - if len(nodes) != 1 { - return false, fmt.Errorf("expected 1 node but found: %s", pretty.Sprint(nodes)) - } - if nodes[0].Status != "ready" { - return false, fmt.Errorf("node not ready: %s", nodes[0].Status) - } - if _, ok := nodes[0].Drivers["mock_driver"]; !ok { - return false, fmt.Errorf("mock_driver not ready") - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) + node := oneNodeFromNodeList(t, c.Nodes()) + index := node.ModifyIndex var input strings.Builder input.Grow(units.MB) lines := 80 * units.KB for i := 0; i < lines; i++ { - fmt.Fprintf(&input, "%d\n", i) + _, _ = fmt.Fprintf(&input, "%d\n", i) } job := &Job{ - ID: stringToPtr("TestFS_Logs"), - Region: stringToPtr("global"), + ID: pointerOf("TestFS_Logs"), + Region: pointerOf("global"), Datacenters: []string{"dc1"}, - Type: stringToPtr("batch"), + Type: pointerOf("batch"), TaskGroups: []*TaskGroup{ { - Name: stringToPtr("TestFS_LogsGroup"), + Name: pointerOf("TestFS_LogsGroup"), Tasks: []*Task{ { Name: "logger", @@ -84,47 +57,52 @@ func TestFS_Logs(t *testing.T) { jobs := c.Jobs() jobResp, _, err := jobs.Register(job, nil) - require.NoError(err) + must.NoError(t, err) index = jobResp.EvalCreateIndex - evals := c.Evaluations() - testutil.WaitForResult(func() (bool, error) { - evalResp, qm, err := evals.Info(jobResp.EvalID, &QueryOptions{WaitIndex: index}) + evaluations := c.Evaluations() + + f := func() error { + resp, qm, err := evaluations.Info(jobResp.EvalID, &QueryOptions{WaitIndex: index}) if err != nil { - return false, err - } - if evalResp.BlockedEval != "" { - t.Fatalf("Eval blocked: %s", pretty.Sprint(evalResp)) + return fmt.Errorf("failed to get evaluation info: %w", err) } + must.Eq(t, "", resp.BlockedEval) index = qm.LastIndex - if evalResp.Status != "complete" { - return false, fmt.Errorf("eval status: %v", evalResp.Status) + if resp.Status != "complete" { + return fmt.Errorf("evaluation status is not complete, got: %s", resp.Status) } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) + return nil + } + must.Wait(t, wait.InitialSuccess( + wait.ErrorFunc(f), + wait.Timeout(10*time.Second), + wait.Gap(1*time.Second), + )) allocID := "" - testutil.WaitForResult(func() (bool, error) { + g := func() error { allocs, _, err := jobs.Allocations(*job.ID, true, &QueryOptions{WaitIndex: index}) if err != nil { - return false, err + return fmt.Errorf("failed to get allocations: %w", err) } - if len(allocs) != 1 { - return false, fmt.Errorf("unexpected number of allocs: %d", len(allocs)) + if n := len(allocs); n != 1 { + return fmt.Errorf("expected 1 allocation, got: %d", n) } if allocs[0].ClientStatus != "complete" { - return false, fmt.Errorf("alloc not complete: %s", allocs[0].ClientStatus) + return fmt.Errorf("allocation not complete: %s", allocs[0].ClientStatus) } allocID = allocs[0].ID - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) + return nil + } + must.Wait(t, wait.InitialSuccess( + wait.ErrorFunc(g), + wait.Timeout(10*time.Second), + wait.Gap(1*time.Second), + )) alloc, _, err := c.Allocations().Info(allocID, nil) - require.NoError(err) + must.NoError(t, err) for i := 0; i < 3; i++ { stopCh := make(chan struct{}) @@ -143,25 +121,26 @@ func TestFS_Logs(t *testing.T) { result.Write(f.Data) case err := <-errors: // Don't Fatal here as the other assertions may - // contain helpeful information. + // contain helpful information. t.Errorf("Error: %v", err) } } // Check length - assert.Equal(t, input.Len(), result.Len(), "file size mismatch") + must.Eq(t, input.Len(), result.Len()) // Check complete ordering for i := 0; i < lines; i++ { - line, err := result.ReadBytes('\n') - require.NoErrorf(err, "unexpected error on line %d: %v", i, err) - require.Equal(fmt.Sprintf("%d\n", i), string(line)) + line, readErr := result.ReadBytes('\n') + must.NoError(t, readErr, must.Sprintf("unexpected error on line %d: %v", i, readErr)) + must.Eq(t, fmt.Sprintf("%d\n", i), string(line)) } } } func TestFS_FrameReader(t *testing.T) { testutil.Parallel(t) + // Create a channel of the frames and a cancel channel framesCh := make(chan *StreamFrame, 3) errCh := make(chan error) @@ -196,12 +175,8 @@ func TestFS_FrameReader(t *testing.T) { p := make([]byte, 12) n, err := r.Read(p[:5]) - if err != nil { - t.Fatalf("Read failed: %v", err) - } - if off := r.Offset(); off != n { - t.Fatalf("unexpected read bytes: got %v; wanted %v", n, off) - } + must.NoError(t, err) + must.Eq(t, n, r.Offset()) off := n for { @@ -210,24 +185,16 @@ func TestFS_FrameReader(t *testing.T) { if err == io.EOF { break } - t.Fatalf("Read failed: %v", err) + must.NoError(t, err) } off += n } - if !reflect.DeepEqual(p, expected) { - t.Fatalf("read %q, wanted %q", string(p), string(expected)) - } - - if err := r.Close(); err != nil { - t.Fatalf("Close() failed: %v", err) - } - if _, ok := <-cancelCh; ok { - t.Fatalf("Close() didn't close cancel channel") - } - if len(expected) != r.Offset() { - t.Fatalf("offset %d, wanted %d", r.Offset(), len(expected)) - } + must.Eq(t, expected, p) + must.NoError(t, r.Close()) + _, ok := <-cancelCh + must.False(t, ok) + must.Eq(t, len(expected), r.Offset()) } func TestFS_FrameReader_Unblock(t *testing.T) { @@ -244,13 +211,8 @@ func TestFS_FrameReader_Unblock(t *testing.T) { p := make([]byte, 12) n, err := r.Read(p) - if err != nil { - t.Fatalf("Read failed: %v", err) - } - - if n != 0 { - t.Fatalf("should have unblocked") - } + must.NoError(t, err) + must.Zero(t, n) // Unset the unblock r.SetUnblockTime(0) @@ -263,7 +225,7 @@ func TestFS_FrameReader_Unblock(t *testing.T) { select { case <-resultCh: - t.Fatalf("shouldn't have unblocked") + must.Unreachable(t, must.Sprint("must not have unblocked")) case <-time.After(300 * time.Millisecond): } } @@ -279,14 +241,12 @@ func TestFS_FrameReader_Error(t *testing.T) { r.SetUnblockTime(10 * time.Millisecond) // Send an error - expected := fmt.Errorf("test error") + expected := errors.New("test error") errCh <- expected // Read a little p := make([]byte, 12) _, err := r.Read(p) - if err == nil || !strings.Contains(err.Error(), expected.Error()) { - t.Fatalf("bad error: %v", err) - } + must.ErrorIs(t, err, expected) } diff --git a/api/go.mod b/api/go.mod index 9619f2ae2fa..5bd82793cd1 100644 --- a/api/go.mod +++ b/api/go.mod @@ -1,6 +1,6 @@ module github.com/hashicorp/nomad/api -go 1.17 +go 1.19 require ( github.com/docker/go-units v0.3.3 @@ -8,18 +8,13 @@ require ( github.com/hashicorp/cronexpr v1.1.1 github.com/hashicorp/go-cleanhttp v0.5.2 github.com/hashicorp/go-rootcerts v1.0.2 - github.com/kr/pretty v0.3.0 github.com/mitchellh/go-testing-interface v1.14.1 - github.com/mitchellh/mapstructure v1.4.3 - github.com/stretchr/testify v1.7.1 + github.com/mitchellh/mapstructure v1.5.0 + github.com/shoenig/test v0.5.2 ) require ( - github.com/davecgh/go-spew v1.1.1 // indirect - github.com/kr/text v0.2.0 // indirect + github.com/google/go-cmp v0.5.8 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/rogpeppe/go-internal v1.6.1 // indirect - gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect - gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect + github.com/stretchr/testify v1.8.1 // indirect ) diff --git a/api/go.sum b/api/go.sum index a2ab1719ebf..b3e7904e555 100644 --- a/api/go.sum +++ b/api/go.sum @@ -1,9 +1,10 @@ -github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/docker/go-units v0.3.3 h1:Xk8S3Xj5sLGlG5g67hJmYMmUgXv5N4PhkjJHHqrwnTk= github.com/docker/go-units v0.3.3/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= +github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= +github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/hashicorp/cronexpr v1.1.1 h1:NJZDd87hGXjoZBdvyCF9mX4DCq5Wy7+A/w+A7q0wn6c= @@ -12,30 +13,24 @@ github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9n github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= github.com/hashicorp/go-rootcerts v1.0.2 h1:jzhAVGtqPKbwpyCPELlgNWhE1znq+qwJtW5Oi2viEzc= github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8= -github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0= -github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= -github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-testing-interface v1.14.1 h1:jrgshOhYAUVNMAJiKbEu7EqAwgJJ2JqpQmpLJOu07cU= github.com/mitchellh/go-testing-interface v1.14.1/go.mod h1:gfgS7OtZj6MA4U1UrDRp04twqAjfvlZyCfX3sDjEym8= -github.com/mitchellh/mapstructure v1.4.3 h1:OVowDSCllw/YjdLkam3/sm7wEtOy59d8ndGgCcyj8cs= -github.com/mitchellh/mapstructure v1.4.3/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= +github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBOAvL+k= -github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= +github.com/shoenig/test v0.5.2 h1:ELZ7qZ/6CPrT71PXrSe2TFzLs4/cGCqqU5lZ5RhZ+B8= +github.com/shoenig/test v0.5.2/go.mod h1:xYtyGBC5Q3kzCNyJg/SjgNpfAa2kvmgA0i5+lQso8x0= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= -gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/api/internal/testutil/freeport/freeport.go b/api/internal/testutil/freeport/freeport.go deleted file mode 100644 index f21698de723..00000000000 --- a/api/internal/testutil/freeport/freeport.go +++ /dev/null @@ -1,139 +0,0 @@ -// Package freeport provides a helper for allocating free ports across multiple -// processes on the same machine. -package freeport - -import ( - "fmt" - "math/rand" - "net" - "sync" - "time" - - "github.com/mitchellh/go-testing-interface" -) - -const ( - // blockSize is the size of the allocated port block. ports are given out - // consecutively from that block with roll-over for the lifetime of the - // application/test run. - blockSize = 100 - - // maxBlocks is the number of available port blocks. - // lowPort + maxBlocks * blockSize must be less than 65535. - maxBlocks = 10 - - // lowPort is the lowest port number that should be used. - lowPort = 8000 - - // attempts is how often we try to allocate a port block - // before giving up. - attempts = 10 -) - -var ( - // firstPort is the first port of the allocated block. - firstPort int - - // lockLn is the system-wide mutex for the port block. - lockLn net.Listener - - // mu guards nextPort - mu sync.Mutex - - // once is used to do the initialization on the first call to retrieve free - // ports - once sync.Once - - // port is the last allocated port. - port int -) - -// initialize is used to initialize freeport. -func initialize() { - if lowPort+maxBlocks*blockSize > 65535 { - panic("freeport: block size too big or too many blocks requested") - } - - rand.Seed(time.Now().UnixNano()) - firstPort, lockLn = alloc() -} - -// alloc reserves a port block for exclusive use for the lifetime of the -// application. lockLn serves as a system-wide mutex for the port block and is -// implemented as a TCP listener which is bound to the firstPort and which will -// be automatically released when the application terminates. -func alloc() (int, net.Listener) { - for i := 0; i < attempts; i++ { - block := int(rand.Int31n(int32(maxBlocks))) - firstPort := lowPort + block*blockSize - ln, err := net.ListenTCP("tcp", tcpAddr("127.0.0.1", firstPort)) - if err != nil { - continue - } - // log.Printf("[DEBUG] freeport: allocated port block %d (%d-%d)", block, firstPort, firstPort+blockSize-1) - return firstPort, ln - } - panic("freeport: cannot allocate port block") -} - -func tcpAddr(ip string, port int) *net.TCPAddr { - return &net.TCPAddr{IP: net.ParseIP(ip), Port: port} -} - -// Get wraps the Free function and panics on any failure retrieving ports. -func Get(n int) (ports []int) { - ports, err := Free(n) - if err != nil { - panic(err) - } - - return ports -} - -// GetT is suitable for use when retrieving unused ports in tests. If there is -// an error retrieving free ports, the test will be failed. -func GetT(t testing.T, n int) (ports []int) { - ports, err := Free(n) - if err != nil { - t.Fatalf("Failed retrieving free port: %v", err) - } - - return ports -} - -// Free returns a list of free ports from the allocated port block. It is safe -// to call this method concurrently. Ports have been tested to be available on -// 127.0.0.1 TCP but there is no guarantee that they will remain free in the -// future. -func Free(n int) (ports []int, err error) { - mu.Lock() - defer mu.Unlock() - - if n > blockSize-1 { - return nil, fmt.Errorf("freeport: block size too small") - } - - // Reserve a port block - once.Do(initialize) - - for len(ports) < n { - port++ - - // roll-over the port - if port < firstPort+1 || port >= firstPort+blockSize { - port = firstPort + 1 - } - - // if the port is in use then skip it - ln, err := net.ListenTCP("tcp", tcpAddr("127.0.0.1", port)) - if err != nil { - // log.Println("[DEBUG] freeport: port already in use: ", port) - continue - } - ln.Close() - - ports = append(ports, port) - } - // log.Println("[DEBUG] freeport: free ports:", ports) - return ports, nil -} diff --git a/api/internal/testutil/server.go b/api/internal/testutil/server.go index 950ce9c0160..d62f7f5b61e 100644 --- a/api/internal/testutil/server.go +++ b/api/internal/testutil/server.go @@ -16,16 +16,18 @@ import ( "encoding/json" "fmt" "io" - "io/ioutil" "net/http" "os" "os/exec" "time" - cleanhttp "github.com/hashicorp/go-cleanhttp" + "github.com/hashicorp/go-cleanhttp" "github.com/hashicorp/nomad/api/internal/testutil/discover" - "github.com/hashicorp/nomad/api/internal/testutil/freeport" testing "github.com/mitchellh/go-testing-interface" + "github.com/shoenig/test" + "github.com/shoenig/test/must" + "github.com/shoenig/test/portal" + "github.com/shoenig/test/wait" ) // TestServerConfig is the main server configuration struct. @@ -100,12 +102,13 @@ type Telemetry struct { // passed to NewTestServerConfig to modify the server config. type ServerConfigCallback func(c *TestServerConfig) -// defaultServerConfig returns a new TestServerConfig struct -// with all of the listen ports incremented by one. +// defaultServerConfig returns a new TestServerConfig struct pre-populated with +// usable config for running as server. func defaultServerConfig(t testing.T) *TestServerConfig { - ports := freeport.GetT(t, 3) + grabber := portal.New(t) + ports := grabber.Grab(3) - logLevel := "DEBUG" + logLevel := "ERROR" if envLogLevel := os.Getenv("NOMAD_TEST_LOG_LEVEL"); envLogLevel != "" { logLevel = envLogLevel } @@ -155,24 +158,14 @@ func NewTestServer(t testing.T, cb ServerConfigCallback) *TestServer { } // Check that we are actually running nomad - vcmd := exec.Command(path, "-version") - vcmd.Stdout = nil - vcmd.Stderr = nil - if err := vcmd.Run(); err != nil { - t.Skipf("nomad version failed: %v", err) - } + _, err = exec.Command(path, "-version").CombinedOutput() + must.NoError(t, err) - dataDir, err := ioutil.TempDir("", "nomad") - if err != nil { - t.Fatalf("err: %s", err) - } + dataDir, err := os.MkdirTemp("", "nomad") + must.NoError(t, err) - configFile, err := ioutil.TempFile(dataDir, "nomad") - if err != nil { - defer os.RemoveAll(dataDir) - t.Fatalf("err: %s", err) - } - defer configFile.Close() + configFile, err := os.CreateTemp(dataDir, "nomad") + must.NoError(t, err) nomadConfig := defaultServerConfig(t) nomadConfig.DataDir = dataDir @@ -189,14 +182,17 @@ func NewTestServer(t testing.T, cb ServerConfigCallback) *TestServer { } configContent, err := json.Marshal(nomadConfig) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) + + _, err = configFile.Write(configContent) + must.NoError(t, err) + must.NoError(t, configFile.Sync()) + must.NoError(t, configFile.Close()) - if _, err := configFile.Write(configContent); err != nil { - t.Fatalf("err: %s", err) + args := []string{"agent", "-config", configFile.Name()} + if nomadConfig.DevMode { + args = append(args, "-dev") } - configFile.Close() stdout := io.Writer(os.Stdout) if nomadConfig.Stdout != nil { @@ -208,20 +204,14 @@ func NewTestServer(t testing.T, cb ServerConfigCallback) *TestServer { stderr = nomadConfig.Stderr } - args := []string{"agent", "-config", configFile.Name()} - if nomadConfig.DevMode { - args = append(args, "-dev") - } - // Start the server cmd := exec.Command(path, args...) cmd.Stdout = stdout cmd.Stderr = stderr - if err := cmd.Start(); err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, cmd.Start()) client := cleanhttp.DefaultClient() + client.Timeout = 10 * time.Second server := &TestServer{ Config: nomadConfig, @@ -250,21 +240,19 @@ func NewTestServer(t testing.T, cb ServerConfigCallback) *TestServer { // Stop stops the test Nomad server, and removes the Nomad data // directory once we are done. func (s *TestServer) Stop() { - defer os.RemoveAll(s.Config.DataDir) + defer func() { _ = os.RemoveAll(s.Config.DataDir) }() // wait for the process to exit to be sure that the data dir can be // deleted on all platforms. done := make(chan struct{}) go func() { defer close(done) - - s.cmd.Wait() + _ = s.cmd.Wait() }() // kill and wait gracefully - if err := s.cmd.Process.Signal(os.Interrupt); err != nil { - s.t.Errorf("err: %s", err) - } + err := s.cmd.Process.Signal(os.Interrupt) + must.NoError(s.t, err) select { case <-done: @@ -273,9 +261,9 @@ func (s *TestServer) Stop() { s.t.Logf("timed out waiting for process to gracefully terminate") } - if err := s.cmd.Process.Kill(); err != nil { - s.t.Errorf("err: %s", err) - } + err = s.cmd.Process.Kill() + must.NoError(s.t, err, must.Sprint("failed to kill process")) + select { case <-done: case <-time.After(5 * time.Second): @@ -287,44 +275,52 @@ func (s *TestServer) Stop() { // responding. This is an indication that the agent has started, // but will likely return before a leader is elected. func (s *TestServer) waitForAPI() { - WaitForResult(func() (bool, error) { - // Using this endpoint as it is does not have restricted access + f := func() error { resp, err := s.HTTPClient.Get(s.url("/v1/metrics")) if err != nil { - return false, err + return fmt.Errorf("failed to get metrics: %w", err) } - defer resp.Body.Close() - if err := s.requireOK(resp); err != nil { - return false, err + defer func() { _ = resp.Body.Close() }() + if err = s.requireOK(resp); err != nil { + return fmt.Errorf("metrics response is not ok: %w", err) } - return true, nil - }, func(err error) { - defer s.Stop() - s.t.Fatalf("err: %s", err) - }) + return nil + } + test.Wait(s.t, + wait.InitialSuccess( + wait.ErrorFunc(f), + wait.Timeout(10*time.Second), + wait.Gap(1*time.Second), + ), + must.Sprint("failed to wait for api"), + ) } // waitForLeader waits for the Nomad server's HTTP API to become // available, and then waits for a known leader and an index of // 1 or more to be observed to confirm leader election is done. func (s *TestServer) waitForLeader() { - WaitForResult(func() (bool, error) { + f := func() error { // Query the API and check the status code // Using this endpoint as it is does not have restricted access resp, err := s.HTTPClient.Get(s.url("/v1/status/leader")) if err != nil { - return false, err + return fmt.Errorf("failed to get leader: %w", err) } - defer resp.Body.Close() - if err := s.requireOK(resp); err != nil { - return false, err + defer func() { _ = resp.Body.Close() }() + if err = s.requireOK(resp); err != nil { + return fmt.Errorf("leader response is not ok: %w", err) } - - return true, nil - }, func(err error) { - defer s.Stop() - s.t.Fatalf("err: %s", err) - }) + return nil + } + test.Wait(s.t, + wait.InitialSuccess( + wait.ErrorFunc(f), + wait.Timeout(10*time.Second), + wait.Gap(1*time.Second), + ), + must.Sprint("failed to wait for leader"), + ) } // waitForClient waits for the Nomad client to be ready. The function returns @@ -333,36 +329,32 @@ func (s *TestServer) waitForClient() { if !s.Config.DevMode { return } - - WaitForResult(func() (bool, error) { + f := func() error { resp, err := s.HTTPClient.Get(s.url("/v1/nodes")) if err != nil { - return false, err + return fmt.Errorf("failed to get nodes: %w", err) } - defer resp.Body.Close() - if err := s.requireOK(resp); err != nil { - return false, err + defer func() { _ = resp.Body.Close() }() + if err = s.requireOK(resp); err != nil { + return fmt.Errorf("nodes response not ok: %w", err) } - var decoded []struct { ID string Status string } - - dec := json.NewDecoder(resp.Body) - if err := dec.Decode(&decoded); err != nil { - return false, err - } - - if len(decoded) != 1 || decoded[0].Status != "ready" { - return false, fmt.Errorf("Node not ready: %v", decoded) + if err = json.NewDecoder(resp.Body).Decode(&decoded); err != nil { + return fmt.Errorf("failed to decode nodes response: %w", err) } - - return true, nil - }, func(err error) { - defer s.Stop() - s.t.Fatalf("err: %s", err) - }) + return nil + } + test.Wait(s.t, + wait.InitialSuccess( + wait.ErrorFunc(f), + wait.Timeout(10*time.Second), + wait.Gap(1*time.Second), + ), + must.Sprint("failed to wait for client (node)"), + ) } // url is a helper function which takes a relative URL and @@ -374,7 +366,7 @@ func (s *TestServer) url(path string) string { // requireOK checks the HTTP response code and ensures it is acceptable. func (s *TestServer) requireOK(resp *http.Response) error { if resp.StatusCode != 200 { - return fmt.Errorf("Bad status code: %d", resp.StatusCode) + return fmt.Errorf("bad status code: %d", resp.StatusCode) } return nil } @@ -382,16 +374,14 @@ func (s *TestServer) requireOK(resp *http.Response) error { // put performs a new HTTP PUT request. func (s *TestServer) put(path string, body io.Reader) *http.Response { req, err := http.NewRequest("PUT", s.url(path), body) - if err != nil { - s.t.Fatalf("err: %s", err) - } + must.NoError(s.t, err) + resp, err := s.HTTPClient.Do(req) - if err != nil { - s.t.Fatalf("err: %s", err) - } - if err := s.requireOK(resp); err != nil { - defer resp.Body.Close() - s.t.Fatal(err) + must.NoError(s.t, err) + + if err = s.requireOK(resp); err != nil { + _ = resp.Body.Close() + must.NoError(s.t, err) } return resp } @@ -399,23 +389,20 @@ func (s *TestServer) put(path string, body io.Reader) *http.Response { // get performs a new HTTP GET request. func (s *TestServer) get(path string) *http.Response { resp, err := s.HTTPClient.Get(s.url(path)) - if err != nil { - s.t.Fatalf("err: %s", err) - } - if err := s.requireOK(resp); err != nil { - defer resp.Body.Close() - s.t.Fatal(err) + must.NoError(s.t, err) + + if err = s.requireOK(resp); err != nil { + _ = resp.Body.Close() + must.NoError(s.t, err) } return resp } // encodePayload returns a new io.Reader wrapping the encoded contents // of the payload, suitable for passing directly to a new request. -func (s *TestServer) encodePayload(payload interface{}) io.Reader { +func (s *TestServer) encodePayload(payload any) io.Reader { var encoded bytes.Buffer - enc := json.NewEncoder(&encoded) - if err := enc.Encode(payload); err != nil { - s.t.Fatalf("err: %s", err) - } + err := json.NewEncoder(&encoded).Encode(payload) + must.NoError(s.t, err) return &encoded } diff --git a/api/internal/testutil/slow.go b/api/internal/testutil/slow.go index d4776326f75..ea636346666 100644 --- a/api/internal/testutil/slow.go +++ b/api/internal/testutil/slow.go @@ -3,6 +3,7 @@ package testutil import ( "os" "strconv" + "syscall" "testing" ) @@ -17,14 +18,15 @@ func SkipSlow(t *testing.T, reason string) { } } -// Parallel runs t in parallel, unless CI is set to a true value. +// Parallel runs t in parallel. // -// In CI (CircleCI / GitHub Actions) we get better performance by running tests -// in serial while not restricting GOMAXPROCS. +// The API package has been vetted to be concurrency safe (ish). func Parallel(t *testing.T) { - value := os.Getenv("CI") - isCI, err := strconv.ParseBool(value) - if !isCI || err != nil { - t.Parallel() + t.Parallel() // :) +} + +func RequireRoot(t *testing.T) { + if syscall.Getuid() != 0 { + t.Skip("test requires root") } } diff --git a/api/internal/testutil/wait.go b/api/internal/testutil/wait.go deleted file mode 100644 index dba69d2dbdf..00000000000 --- a/api/internal/testutil/wait.go +++ /dev/null @@ -1,74 +0,0 @@ -package testutil - -import ( - "os" - "time" -) - -type testFn func() (bool, error) -type errorFn func(error) - -func WaitForResult(test testFn, error errorFn) { - WaitForResultRetries(500*TestMultiplier(), test, error) -} - -func WaitForResultRetries(retries int64, test testFn, error errorFn) { - for retries > 0 { - time.Sleep(10 * time.Millisecond) - retries-- - - success, err := test() - if success { - return - } - - if retries == 0 { - error(err) - } - } -} - -// AssertUntil asserts the test function passes throughout the given duration. -// Otherwise error is called on failure. -func AssertUntil(until time.Duration, test testFn, error errorFn) { - deadline := time.Now().Add(until) - for time.Now().Before(deadline) { - success, err := test() - if !success { - error(err) - return - } - // Sleep some arbitrary fraction of the deadline - time.Sleep(until / 30) - } -} - -// TestMultiplier returns a multiplier for retries and waits given environment -// the tests are being run under. -func TestMultiplier() int64 { - if IsCI() { - return 4 - } - - return 1 -} - -// Timeout takes the desired timeout and increases it if running in Travis -func Timeout(original time.Duration) time.Duration { - return original * time.Duration(TestMultiplier()) -} - -func IsCI() bool { - _, ok := os.LookupEnv("CI") - return ok -} - -func IsTravis() bool { - _, ok := os.LookupEnv("TRAVIS") - return ok -} - -func IsAppVeyor() bool { - _, ok := os.LookupEnv("APPVEYOR") - return ok -} diff --git a/api/ioutil.go b/api/ioutil.go index 4f585dba061..fe3cce5ac87 100644 --- a/api/ioutil.go +++ b/api/ioutil.go @@ -5,13 +5,13 @@ import ( "crypto/sha256" "crypto/sha512" "encoding/base64" - "fmt" + "errors" "hash" "io" "strings" ) -var errMismatchChecksum = fmt.Errorf("mismatch checksum") +var errMismatchChecksum = errors.New("mismatch checksum") // checksumValidatingReader is a wrapper reader that validates // the checksum of the underlying reader. @@ -38,7 +38,7 @@ type checksumValidatingReader struct { func newChecksumValidatingReader(r io.ReadCloser, digest string) (io.ReadCloser, error) { parts := strings.SplitN(digest, "=", 2) if len(parts) != 2 { - return nil, fmt.Errorf("invalid digest format") + return nil, errors.New("invalid digest format") } algo := parts[0] diff --git a/api/ioutil_test.go b/api/ioutil_test.go index 97e43f694ad..c93e3e31698 100644 --- a/api/ioutil_test.go +++ b/api/ioutil_test.go @@ -5,23 +5,23 @@ import ( "crypto/sha256" "crypto/sha512" "encoding/base64" - "fmt" + "errors" "hash" "io" - "io/ioutil" "math/rand" "testing" "testing/iotest" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" ) func TestChecksumValidatingReader(t *testing.T) { testutil.Parallel(t) + data := make([]byte, 4096) _, err := rand.Read(data) - require.NoError(t, err) + must.NoError(t, err) cases := []struct { algo string @@ -33,23 +33,23 @@ func TestChecksumValidatingReader(t *testing.T) { for _, c := range cases { t.Run("valid: "+c.algo, func(t *testing.T) { - _, err := c.hash.Write(data) - require.NoError(t, err) + _, err = c.hash.Write(data) + must.NoError(t, err) checksum := c.hash.Sum(nil) digest := c.algo + "=" + base64.StdEncoding.EncodeToString(checksum) r := iotest.HalfReader(bytes.NewReader(data)) - cr, err := newChecksumValidatingReader(ioutil.NopCloser(r), digest) - require.NoError(t, err) + cr, err := newChecksumValidatingReader(io.NopCloser(r), digest) + must.NoError(t, err) - _, err = io.Copy(ioutil.Discard, cr) - require.NoError(t, err) + _, err = io.Copy(io.Discard, cr) + must.NoError(t, err) }) t.Run("invalid: "+c.algo, func(t *testing.T) { - _, err := c.hash.Write(data) - require.NoError(t, err) + _, err = c.hash.Write(data) + must.NoError(t, err) checksum := c.hash.Sum(nil) // mess up checksum @@ -57,33 +57,32 @@ func TestChecksumValidatingReader(t *testing.T) { digest := c.algo + "=" + base64.StdEncoding.EncodeToString(checksum) r := iotest.HalfReader(bytes.NewReader(data)) - cr, err := newChecksumValidatingReader(ioutil.NopCloser(r), digest) - require.NoError(t, err) + cr, err := newChecksumValidatingReader(io.NopCloser(r), digest) + must.NoError(t, err) - _, err = io.Copy(ioutil.Discard, cr) - require.Error(t, err) - require.Equal(t, errMismatchChecksum, err) + _, err = io.Copy(io.Discard, cr) + must.ErrorIs(t, err, errMismatchChecksum) }) } } func TestChecksumValidatingReader_PropagatesError(t *testing.T) { testutil.Parallel(t) + pr, pw := io.Pipe() - defer pr.Close() - defer pw.Close() + defer func() { _ = pr.Close() }() + defer func() { _ = pw.Close() }() - expectedErr := fmt.Errorf("some error") + expectedErr := errors.New("some error") go func() { - pw.Write([]byte("some input")) - pw.CloseWithError(expectedErr) + _, _ = pw.Write([]byte("some input")) + _ = pw.CloseWithError(expectedErr) }() cr, err := newChecksumValidatingReader(pr, "sha-256=aaaa") - require.NoError(t, err) + must.NoError(t, err) - _, err = io.Copy(ioutil.Discard, cr) - require.Error(t, err) - require.Equal(t, expectedErr, err) + _, err = io.Copy(io.Discard, cr) + must.ErrorIs(t, err, expectedErr) } diff --git a/api/jobs.go b/api/jobs.go index bbecbf5c534..cd98e243558 100644 --- a/api/jobs.go +++ b/api/jobs.go @@ -1,6 +1,7 @@ package api import ( + "errors" "fmt" "net/url" "sort" @@ -20,6 +21,10 @@ const ( // JobTypeSystem indicates a system process that should run on all clients JobTypeSystem = "system" + // JobTypeSysbatch indicates a short-lived system process that should run + // on all clients. + JobTypeSysbatch = "sysbatch" + // PeriodicSpecCron is used for a cron spec. PeriodicSpecCron = "cron" @@ -40,6 +45,16 @@ const ( RegisterEnforceIndexErrPrefix = "Enforcing job modify index" ) +const ( + // JobPeriodicLaunchSuffix is the string appended to the periodic jobs ID + // when launching derived instances of it. + JobPeriodicLaunchSuffix = "/periodic-" + + // JobDispatchLaunchSuffix is the string appended to the parameterized job's ID + // when dispatching instances of it. + JobDispatchLaunchSuffix = "/dispatch-" +) + // Jobs is used to access the job-specific endpoints. type Jobs struct { client *Client @@ -174,7 +189,7 @@ func (j *Jobs) Scale(jobID, group string, count *int, message string, error bool var count64 *int64 if count != nil { - count64 = int64ToPtr(int64(*count)) + count64 = pointerOf(int64(*count)) } req := &ScalingRequest{ Count: count64, @@ -284,7 +299,7 @@ func (j *Jobs) Evaluations(jobID string, q *QueryOptions) ([]*Evaluation, *Query // eventually GC'ed from the system. Most callers should not specify purge. func (j *Jobs) Deregister(jobID string, purge bool, q *WriteOptions) (string, *WriteMeta, error) { var resp JobDeregisterResponse - wm, err := j.client.delete(fmt.Sprintf("/v1/job/%v?purge=%t", url.PathEscape(jobID), purge), &resp, q) + wm, err := j.client.delete(fmt.Sprintf("/v1/job/%v?purge=%t", url.PathEscape(jobID), purge), nil, &resp, q) if err != nil { return "", nil, err } @@ -330,7 +345,7 @@ func (j *Jobs) DeregisterOpts(jobID string, opts *DeregisterOptions, q *WriteOpt opts.Purge, opts.Global, opts.EvalPriority, opts.NoShutdownDelay) } - wm, err := j.client.delete(endpoint, &resp, q) + wm, err := j.client.delete(endpoint, nil, &resp, q) if err != nil { return "", nil, err } @@ -386,7 +401,7 @@ func (j *Jobs) Plan(job *Job, diff bool, q *WriteOptions) (*JobPlanResponse, *Wr func (j *Jobs) PlanOpts(job *Job, opts *PlanOptions, q *WriteOptions) (*JobPlanResponse, *WriteMeta, error) { if job == nil { - return nil, nil, fmt.Errorf("must pass non-nil job") + return nil, nil, errors.New("must pass non-nil job") } // Setup the request @@ -441,8 +456,8 @@ func (j *Jobs) Revert(jobID string, version uint64, enforcePriorVersion *uint64, JobID: jobID, JobVersion: version, EnforcePriorVersion: enforcePriorVersion, - // ConsulToken: consulToken, // TODO(shoenig) enable! - VaultToken: vaultToken, + ConsulToken: consulToken, + VaultToken: vaultToken, } wm, err := j.client.write("/v1/job/"+url.PathEscape(jobID)+"/revert", req, &resp, q) if err != nil { @@ -498,15 +513,15 @@ type UpdateStrategy struct { // jobs with the old policy or for populating field defaults. func DefaultUpdateStrategy() *UpdateStrategy { return &UpdateStrategy{ - Stagger: timeToPtr(30 * time.Second), - MaxParallel: intToPtr(1), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(10 * time.Second), - HealthyDeadline: timeToPtr(5 * time.Minute), - ProgressDeadline: timeToPtr(10 * time.Minute), - AutoRevert: boolToPtr(false), - Canary: intToPtr(0), - AutoPromote: boolToPtr(false), + Stagger: pointerOf(30 * time.Second), + MaxParallel: pointerOf(1), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(10 * time.Second), + HealthyDeadline: pointerOf(5 * time.Minute), + ProgressDeadline: pointerOf(10 * time.Minute), + AutoRevert: pointerOf(false), + Canary: pointerOf(0), + AutoPromote: pointerOf(false), } } @@ -518,39 +533,39 @@ func (u *UpdateStrategy) Copy() *UpdateStrategy { copy := new(UpdateStrategy) if u.Stagger != nil { - copy.Stagger = timeToPtr(*u.Stagger) + copy.Stagger = pointerOf(*u.Stagger) } if u.MaxParallel != nil { - copy.MaxParallel = intToPtr(*u.MaxParallel) + copy.MaxParallel = pointerOf(*u.MaxParallel) } if u.HealthCheck != nil { - copy.HealthCheck = stringToPtr(*u.HealthCheck) + copy.HealthCheck = pointerOf(*u.HealthCheck) } if u.MinHealthyTime != nil { - copy.MinHealthyTime = timeToPtr(*u.MinHealthyTime) + copy.MinHealthyTime = pointerOf(*u.MinHealthyTime) } if u.HealthyDeadline != nil { - copy.HealthyDeadline = timeToPtr(*u.HealthyDeadline) + copy.HealthyDeadline = pointerOf(*u.HealthyDeadline) } if u.ProgressDeadline != nil { - copy.ProgressDeadline = timeToPtr(*u.ProgressDeadline) + copy.ProgressDeadline = pointerOf(*u.ProgressDeadline) } if u.AutoRevert != nil { - copy.AutoRevert = boolToPtr(*u.AutoRevert) + copy.AutoRevert = pointerOf(*u.AutoRevert) } if u.Canary != nil { - copy.Canary = intToPtr(*u.Canary) + copy.Canary = pointerOf(*u.Canary) } if u.AutoPromote != nil { - copy.AutoPromote = boolToPtr(*u.AutoPromote) + copy.AutoPromote = pointerOf(*u.AutoPromote) } return copy @@ -562,39 +577,39 @@ func (u *UpdateStrategy) Merge(o *UpdateStrategy) { } if o.Stagger != nil { - u.Stagger = timeToPtr(*o.Stagger) + u.Stagger = pointerOf(*o.Stagger) } if o.MaxParallel != nil { - u.MaxParallel = intToPtr(*o.MaxParallel) + u.MaxParallel = pointerOf(*o.MaxParallel) } if o.HealthCheck != nil { - u.HealthCheck = stringToPtr(*o.HealthCheck) + u.HealthCheck = pointerOf(*o.HealthCheck) } if o.MinHealthyTime != nil { - u.MinHealthyTime = timeToPtr(*o.MinHealthyTime) + u.MinHealthyTime = pointerOf(*o.MinHealthyTime) } if o.HealthyDeadline != nil { - u.HealthyDeadline = timeToPtr(*o.HealthyDeadline) + u.HealthyDeadline = pointerOf(*o.HealthyDeadline) } if o.ProgressDeadline != nil { - u.ProgressDeadline = timeToPtr(*o.ProgressDeadline) + u.ProgressDeadline = pointerOf(*o.ProgressDeadline) } if o.AutoRevert != nil { - u.AutoRevert = boolToPtr(*o.AutoRevert) + u.AutoRevert = pointerOf(*o.AutoRevert) } if o.Canary != nil { - u.Canary = intToPtr(*o.Canary) + u.Canary = pointerOf(*o.Canary) } if o.AutoPromote != nil { - u.AutoPromote = boolToPtr(*o.AutoPromote) + u.AutoPromote = pointerOf(*o.AutoPromote) } } @@ -691,15 +706,15 @@ type Multiregion struct { func (m *Multiregion) Canonicalize() { if m.Strategy == nil { m.Strategy = &MultiregionStrategy{ - MaxParallel: intToPtr(0), - OnFailure: stringToPtr(""), + MaxParallel: pointerOf(0), + OnFailure: pointerOf(""), } } else { if m.Strategy.MaxParallel == nil { - m.Strategy.MaxParallel = intToPtr(0) + m.Strategy.MaxParallel = pointerOf(0) } if m.Strategy.OnFailure == nil { - m.Strategy.OnFailure = stringToPtr("") + m.Strategy.OnFailure = pointerOf("") } } if m.Regions == nil { @@ -707,7 +722,7 @@ func (m *Multiregion) Canonicalize() { } for _, region := range m.Regions { if region.Count == nil { - region.Count = intToPtr(1) + region.Count = pointerOf(1) } if region.Datacenters == nil { region.Datacenters = []string{} @@ -725,13 +740,13 @@ func (m *Multiregion) Copy() *Multiregion { copy := new(Multiregion) if m.Strategy != nil { copy.Strategy = new(MultiregionStrategy) - copy.Strategy.MaxParallel = intToPtr(*m.Strategy.MaxParallel) - copy.Strategy.OnFailure = stringToPtr(*m.Strategy.OnFailure) + copy.Strategy.MaxParallel = pointerOf(*m.Strategy.MaxParallel) + copy.Strategy.OnFailure = pointerOf(*m.Strategy.OnFailure) } for _, region := range m.Regions { copyRegion := new(MultiregionRegion) copyRegion.Name = region.Name - copyRegion.Count = intToPtr(*region.Count) + copyRegion.Count = pointerOf(*region.Count) copyRegion.Datacenters = append(copyRegion.Datacenters, region.Datacenters...) for k, v := range region.Meta { copyRegion.Meta[k] = v @@ -764,19 +779,19 @@ type PeriodicConfig struct { func (p *PeriodicConfig) Canonicalize() { if p.Enabled == nil { - p.Enabled = boolToPtr(true) + p.Enabled = pointerOf(true) } if p.Spec == nil { - p.Spec = stringToPtr("") + p.Spec = pointerOf("") } if p.SpecType == nil { - p.SpecType = stringToPtr(PeriodicSpecCron) + p.SpecType = pointerOf(PeriodicSpecCron) } if p.ProhibitOverlap == nil { - p.ProhibitOverlap = boolToPtr(false) + p.ProhibitOverlap = pointerOf(false) } if p.TimeZone == nil || *p.TimeZone == "" { - p.TimeZone = stringToPtr("UTC") + p.TimeZone = pointerOf("UTC") } } @@ -785,7 +800,7 @@ func (p *PeriodicConfig) Canonicalize() { // returned. The `time.Location` of the returned value matches that of the // passed time. func (p *PeriodicConfig) Next(fromTime time.Time) (time.Time, error) { - if *p.SpecType == PeriodicSpecCron { + if p != nil && *p.SpecType == PeriodicSpecCron { e, err := cronexpr.Parse(*p.Spec) if err != nil { return time.Time{}, fmt.Errorf("failed parsing cron expression %q: %v", *p.Spec, err) @@ -889,70 +904,70 @@ func (j *Job) IsMultiregion() bool { func (j *Job) Canonicalize() { if j.ID == nil { - j.ID = stringToPtr("") + j.ID = pointerOf("") } if j.Name == nil { - j.Name = stringToPtr(*j.ID) + j.Name = pointerOf(*j.ID) } if j.ParentID == nil { - j.ParentID = stringToPtr("") + j.ParentID = pointerOf("") } if j.Namespace == nil { - j.Namespace = stringToPtr(DefaultNamespace) + j.Namespace = pointerOf(DefaultNamespace) } if j.Priority == nil { - j.Priority = intToPtr(50) + j.Priority = pointerOf(50) } if j.Stop == nil { - j.Stop = boolToPtr(false) + j.Stop = pointerOf(false) } if j.Region == nil { - j.Region = stringToPtr(GlobalRegion) + j.Region = pointerOf(GlobalRegion) } if j.Namespace == nil { - j.Namespace = stringToPtr("default") + j.Namespace = pointerOf("default") } if j.Type == nil { - j.Type = stringToPtr("service") + j.Type = pointerOf("service") } if j.AllAtOnce == nil { - j.AllAtOnce = boolToPtr(false) + j.AllAtOnce = pointerOf(false) } if j.ConsulToken == nil { - j.ConsulToken = stringToPtr("") + j.ConsulToken = pointerOf("") } if j.ConsulNamespace == nil { - j.ConsulNamespace = stringToPtr("") + j.ConsulNamespace = pointerOf("") } if j.VaultToken == nil { - j.VaultToken = stringToPtr("") + j.VaultToken = pointerOf("") } if j.VaultNamespace == nil { - j.VaultNamespace = stringToPtr("") + j.VaultNamespace = pointerOf("") } if j.NomadTokenID == nil { - j.NomadTokenID = stringToPtr("") + j.NomadTokenID = pointerOf("") } if j.Status == nil { - j.Status = stringToPtr("") + j.Status = pointerOf("") } if j.StatusDescription == nil { - j.StatusDescription = stringToPtr("") + j.StatusDescription = pointerOf("") } if j.Stable == nil { - j.Stable = boolToPtr(false) + j.Stable = pointerOf(false) } if j.Version == nil { - j.Version = uint64ToPtr(0) + j.Version = pointerOf(uint64(0)) } if j.CreateIndex == nil { - j.CreateIndex = uint64ToPtr(0) + j.CreateIndex = pointerOf(uint64(0)) } if j.ModifyIndex == nil { - j.ModifyIndex = uint64ToPtr(0) + j.ModifyIndex = pointerOf(uint64(0)) } if j.JobModifyIndex == nil { - j.JobModifyIndex = uint64ToPtr(0) + j.JobModifyIndex = pointerOf(uint64(0)) } if j.Periodic != nil { j.Periodic.Canonicalize() @@ -1085,6 +1100,13 @@ func NewSystemJob(id, name, region string, pri int) *Job { return newJob(id, name, region, JobTypeSystem, pri) } +// NewSysbatchJob creates and returns a new sysbatch-style job for short-lived +// processes designed to run on all clients, using the provided name and ID +// along with the relative job priority. +func NewSysbatchJob(id, name, region string, pri int) *Job { + return newJob(id, name, region, JobTypeSysbatch, pri) +} + // newJob is used to create a new Job struct. func newJob(id, name, region, typ string, pri int) *Job { return &Job{ diff --git a/api/jobs_test.go b/api/jobs_test.go index 00dff79eee0..b3beda48010 100644 --- a/api/jobs_test.go +++ b/api/jobs_test.go @@ -1,20 +1,18 @@ package api import ( - "reflect" + "fmt" "sort" - "strings" "testing" "time" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/kr/pretty" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" + "github.com/shoenig/test/wait" ) func TestJobs_Register(t *testing.T) { testutil.Parallel(t) - require := require.New(t) c, s := makeClient(t, nil, nil) defer s.Stop() @@ -22,31 +20,29 @@ func TestJobs_Register(t *testing.T) { // Listing jobs before registering returns nothing resp, _, err := jobs.List(nil) - require.Nil(err) - require.Emptyf(resp, "expected 0 jobs, got: %d", len(resp)) + must.NoError(t, err) + must.SliceEmpty(t, resp) // Create a job and attempt to register it job := testJob() resp2, wm, err := jobs.Register(job, nil) - require.Nil(err) - require.NotNil(resp2) - require.NotEmpty(resp2.EvalID) + must.NoError(t, err) + must.NotNil(t, resp2) + must.UUIDv4(t, resp2.EvalID) assertWriteMeta(t, wm) // Query the jobs back out again resp, qm, err := jobs.List(nil) assertQueryMeta(t, qm) - require.Nil(err) + must.Nil(t, err) // Check that we got the expected response - if len(resp) != 1 || resp[0].ID != *job.ID { - t.Fatalf("bad: %#v", resp[0]) - } + must.Len(t, 1, resp) + must.Eq(t, *job.ID, resp[0].ID) } func TestJobs_Register_PreserveCounts(t *testing.T) { testutil.Parallel(t) - require := require.New(t) c, s := makeClient(t, nil, nil) defer s.Stop() @@ -54,30 +50,30 @@ func TestJobs_Register_PreserveCounts(t *testing.T) { // Listing jobs before registering returns nothing resp, _, err := jobs.List(nil) - require.Nil(err) - require.Emptyf(resp, "expected 0 jobs, got: %d", len(resp)) + must.NoError(t, err) + must.SliceEmpty(t, resp) // Create a job task := NewTask("task", "exec"). SetConfig("command", "/bin/sleep"). Require(&Resources{ - CPU: intToPtr(100), - MemoryMB: intToPtr(256), + CPU: pointerOf(100), + MemoryMB: pointerOf(256), }). SetLogConfig(&LogConfig{ - MaxFiles: intToPtr(1), - MaxFileSizeMB: intToPtr(2), + MaxFiles: pointerOf(1), + MaxFileSizeMB: pointerOf(2), }) group1 := NewTaskGroup("group1", 1). AddTask(task). RequireDisk(&EphemeralDisk{ - SizeMB: intToPtr(25), + SizeMB: pointerOf(25), }) group2 := NewTaskGroup("group2", 2). AddTask(task). RequireDisk(&EphemeralDisk{ - SizeMB: intToPtr(25), + SizeMB: pointerOf(25), }) job := NewBatchJob("job", "redis", "global", 1). @@ -87,18 +83,18 @@ func TestJobs_Register_PreserveCounts(t *testing.T) { // Create a job and register it resp2, wm, err := jobs.Register(job, nil) - require.Nil(err) - require.NotNil(resp2) - require.NotEmpty(resp2.EvalID) + must.NoError(t, err) + must.NotNil(t, resp2) + must.UUIDv4(t, resp2.EvalID) assertWriteMeta(t, wm) // Update the job, new groups to test PreserveCounts group1.Count = nil - group2.Count = intToPtr(0) + group2.Count = pointerOf(0) group3 := NewTaskGroup("group3", 3). AddTask(task). RequireDisk(&EphemeralDisk{ - SizeMB: intToPtr(25), + SizeMB: pointerOf(25), }) job.AddTaskGroup(group3) @@ -106,19 +102,18 @@ func TestJobs_Register_PreserveCounts(t *testing.T) { _, _, err = jobs.RegisterOpts(job, &RegisterOptions{ PreserveCounts: true, }, nil) - require.NoError(err) + must.NoError(t, err) // Query the job scale status status, _, err := jobs.ScaleStatus(*job.ID, nil) - require.NoError(err) - require.Equal(1, status.TaskGroups["group1"].Desired) // present and nil => preserved - require.Equal(2, status.TaskGroups["group2"].Desired) // present and specified => preserved - require.Equal(3, status.TaskGroups["group3"].Desired) // new => as specific in job spec + must.NoError(t, err) + must.Eq(t, 1, status.TaskGroups["group1"].Desired) // present and nil => preserved + must.Eq(t, 2, status.TaskGroups["group2"].Desired) // present and specified => preserved + must.Eq(t, 3, status.TaskGroups["group3"].Desired) // new => as specific in job spec } func TestJobs_Register_NoPreserveCounts(t *testing.T) { testutil.Parallel(t) - require := require.New(t) c, s := makeClient(t, nil, nil) defer s.Stop() @@ -126,30 +121,30 @@ func TestJobs_Register_NoPreserveCounts(t *testing.T) { // Listing jobs before registering returns nothing resp, _, err := jobs.List(nil) - require.Nil(err) - require.Emptyf(resp, "expected 0 jobs, got: %d", len(resp)) + must.NoError(t, err) + must.SliceEmpty(t, resp) // Create a job task := NewTask("task", "exec"). SetConfig("command", "/bin/sleep"). Require(&Resources{ - CPU: intToPtr(100), - MemoryMB: intToPtr(256), + CPU: pointerOf(100), + MemoryMB: pointerOf(256), }). SetLogConfig(&LogConfig{ - MaxFiles: intToPtr(1), - MaxFileSizeMB: intToPtr(2), + MaxFiles: pointerOf(1), + MaxFileSizeMB: pointerOf(2), }) group1 := NewTaskGroup("group1", 1). AddTask(task). RequireDisk(&EphemeralDisk{ - SizeMB: intToPtr(25), + SizeMB: pointerOf(25), }) group2 := NewTaskGroup("group2", 2). AddTask(task). RequireDisk(&EphemeralDisk{ - SizeMB: intToPtr(25), + SizeMB: pointerOf(25), }) job := NewBatchJob("job", "redis", "global", 1). @@ -159,90 +154,89 @@ func TestJobs_Register_NoPreserveCounts(t *testing.T) { // Create a job and register it resp2, wm, err := jobs.Register(job, nil) - require.Nil(err) - require.NotNil(resp2) - require.NotEmpty(resp2.EvalID) + must.NoError(t, err) + must.NotNil(t, resp2) + must.UUIDv4(t, resp2.EvalID) assertWriteMeta(t, wm) // Update the job, new groups to test PreserveCounts - group1.Count = intToPtr(0) + group1.Count = pointerOf(0) group2.Count = nil group3 := NewTaskGroup("group3", 3). AddTask(task). RequireDisk(&EphemeralDisk{ - SizeMB: intToPtr(25), + SizeMB: pointerOf(25), }) job.AddTaskGroup(group3) // Update the job, with PreserveCounts = default [false] _, _, err = jobs.Register(job, nil) - require.NoError(err) + must.NoError(t, err) // Query the job scale status status, _, err := jobs.ScaleStatus(*job.ID, nil) - require.NoError(err) - require.Equal("default", status.Namespace) - require.Equal(0, status.TaskGroups["group1"].Desired) // present => as specified - require.Equal(1, status.TaskGroups["group2"].Desired) // nil => default (1) - require.Equal(3, status.TaskGroups["group3"].Desired) // new => as specified + must.NoError(t, err) + must.Eq(t, "default", status.Namespace) + must.Eq(t, 0, status.TaskGroups["group1"].Desired) // present => as specified + must.Eq(t, 1, status.TaskGroups["group2"].Desired) // nil => default (1) + must.Eq(t, 3, status.TaskGroups["group3"].Desired) // new => as specified } func TestJobs_Register_EvalPriority(t *testing.T) { testutil.Parallel(t) - requireAssert := require.New(t) c, s := makeClient(t, nil, nil) defer s.Stop() // Listing jobs before registering returns nothing listResp, _, err := c.Jobs().List(nil) - requireAssert.Nil(err) - requireAssert.Len(listResp, 0) + must.NoError(t, err) + must.Len(t, 0, listResp) // Create a job and register it with an eval priority. job := testJob() registerResp, wm, err := c.Jobs().RegisterOpts(job, &RegisterOptions{EvalPriority: 99}, nil) - requireAssert.Nil(err) - requireAssert.NotNil(registerResp) - requireAssert.NotEmpty(registerResp.EvalID) + must.NoError(t, err) + must.NotNil(t, registerResp) + must.UUIDv4(t, registerResp.EvalID) assertWriteMeta(t, wm) // Check the created job evaluation has a priority that matches our desired // value. evalInfo, _, err := c.Evaluations().Info(registerResp.EvalID, nil) - requireAssert.NoError(err) - requireAssert.Equal(99, evalInfo.Priority) + must.NoError(t, err) + must.Eq(t, 99, evalInfo.Priority) } func TestJobs_Register_NoEvalPriority(t *testing.T) { testutil.Parallel(t) - requireAssert := require.New(t) c, s := makeClient(t, nil, nil) defer s.Stop() // Listing jobs before registering returns nothing listResp, _, err := c.Jobs().List(nil) - requireAssert.Nil(err) - requireAssert.Len(listResp, 0) + must.NoError(t, err) + must.Len(t, 0, listResp) // Create a job and register it with an eval priority. job := testJob() registerResp, wm, err := c.Jobs().RegisterOpts(job, nil, nil) - requireAssert.Nil(err) - requireAssert.NotNil(registerResp) - requireAssert.NotEmpty(registerResp.EvalID) + must.NoError(t, err) + must.NotNil(t, registerResp) + must.UUIDv4(t, registerResp.EvalID) assertWriteMeta(t, wm) // Check the created job evaluation has a priority that matches the job // priority. evalInfo, _, err := c.Evaluations().Info(registerResp.EvalID, nil) - requireAssert.NoError(err) - requireAssert.Equal(*job.Priority, evalInfo.Priority) + must.NoError(t, err) + must.Eq(t, *job.Priority, evalInfo.Priority) } func TestJobs_Validate(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() jobs := c.Jobs() @@ -250,27 +244,18 @@ func TestJobs_Validate(t *testing.T) { // Create a job and attempt to register it job := testJob() resp, _, err := jobs.Validate(job, nil) - if err != nil { - t.Fatalf("err: %s", err) - } - - if len(resp.ValidationErrors) != 0 { - t.Fatalf("bad %v", resp) - } + must.NoError(t, err) + must.SliceEmpty(t, resp.ValidationErrors) job.ID = nil resp1, _, err := jobs.Validate(job, nil) - if err != nil { - t.Fatalf("err: %v", err) - } - - if len(resp1.ValidationErrors) == 0 { - t.Fatalf("bad %v", resp1) - } + must.NoError(t, err) + must.Positive(t, len(resp1.ValidationErrors)) } func TestJobs_Canonicalize(t *testing.T) { testutil.Parallel(t) + testCases := []struct { name string expected *Job @@ -288,79 +273,79 @@ func TestJobs_Canonicalize(t *testing.T) { }, }, expected: &Job{ - ID: stringToPtr(""), - Name: stringToPtr(""), - Region: stringToPtr("global"), - Namespace: stringToPtr(DefaultNamespace), - Type: stringToPtr("service"), - ParentID: stringToPtr(""), - Priority: intToPtr(50), - AllAtOnce: boolToPtr(false), - ConsulToken: stringToPtr(""), - ConsulNamespace: stringToPtr(""), - VaultToken: stringToPtr(""), - VaultNamespace: stringToPtr(""), - NomadTokenID: stringToPtr(""), - Status: stringToPtr(""), - StatusDescription: stringToPtr(""), - Stop: boolToPtr(false), - Stable: boolToPtr(false), - Version: uint64ToPtr(0), - CreateIndex: uint64ToPtr(0), - ModifyIndex: uint64ToPtr(0), - JobModifyIndex: uint64ToPtr(0), + ID: pointerOf(""), + Name: pointerOf(""), + Region: pointerOf("global"), + Namespace: pointerOf(DefaultNamespace), + Type: pointerOf("service"), + ParentID: pointerOf(""), + Priority: pointerOf(50), + AllAtOnce: pointerOf(false), + ConsulToken: pointerOf(""), + ConsulNamespace: pointerOf(""), + VaultToken: pointerOf(""), + VaultNamespace: pointerOf(""), + NomadTokenID: pointerOf(""), + Status: pointerOf(""), + StatusDescription: pointerOf(""), + Stop: pointerOf(false), + Stable: pointerOf(false), + Version: pointerOf(uint64(0)), + CreateIndex: pointerOf(uint64(0)), + ModifyIndex: pointerOf(uint64(0)), + JobModifyIndex: pointerOf(uint64(0)), Update: &UpdateStrategy{ - Stagger: timeToPtr(30 * time.Second), - MaxParallel: intToPtr(1), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(10 * time.Second), - HealthyDeadline: timeToPtr(5 * time.Minute), - ProgressDeadline: timeToPtr(10 * time.Minute), - AutoRevert: boolToPtr(false), - Canary: intToPtr(0), - AutoPromote: boolToPtr(false), + Stagger: pointerOf(30 * time.Second), + MaxParallel: pointerOf(1), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(10 * time.Second), + HealthyDeadline: pointerOf(5 * time.Minute), + ProgressDeadline: pointerOf(10 * time.Minute), + AutoRevert: pointerOf(false), + Canary: pointerOf(0), + AutoPromote: pointerOf(false), }, TaskGroups: []*TaskGroup{ { - Name: stringToPtr(""), - Count: intToPtr(1), + Name: pointerOf(""), + Count: pointerOf(1), EphemeralDisk: &EphemeralDisk{ - Sticky: boolToPtr(false), - Migrate: boolToPtr(false), - SizeMB: intToPtr(300), + Sticky: pointerOf(false), + Migrate: pointerOf(false), + SizeMB: pointerOf(300), }, RestartPolicy: &RestartPolicy{ - Delay: timeToPtr(15 * time.Second), - Attempts: intToPtr(2), - Interval: timeToPtr(30 * time.Minute), - Mode: stringToPtr("fail"), + Delay: pointerOf(15 * time.Second), + Attempts: pointerOf(2), + Interval: pointerOf(30 * time.Minute), + Mode: pointerOf("fail"), }, ReschedulePolicy: &ReschedulePolicy{ - Attempts: intToPtr(0), - Interval: timeToPtr(0), - DelayFunction: stringToPtr("exponential"), - Delay: timeToPtr(30 * time.Second), - MaxDelay: timeToPtr(1 * time.Hour), - Unlimited: boolToPtr(true), + Attempts: pointerOf(0), + Interval: pointerOf(time.Duration(0)), + DelayFunction: pointerOf("exponential"), + Delay: pointerOf(30 * time.Second), + MaxDelay: pointerOf(1 * time.Hour), + Unlimited: pointerOf(true), }, Consul: &Consul{ Namespace: "", }, Update: &UpdateStrategy{ - Stagger: timeToPtr(30 * time.Second), - MaxParallel: intToPtr(1), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(10 * time.Second), - HealthyDeadline: timeToPtr(5 * time.Minute), - ProgressDeadline: timeToPtr(10 * time.Minute), - AutoRevert: boolToPtr(false), - Canary: intToPtr(0), - AutoPromote: boolToPtr(false), + Stagger: pointerOf(30 * time.Second), + MaxParallel: pointerOf(1), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(10 * time.Second), + HealthyDeadline: pointerOf(5 * time.Minute), + ProgressDeadline: pointerOf(10 * time.Minute), + AutoRevert: pointerOf(false), + Canary: pointerOf(0), + AutoPromote: pointerOf(false), }, Migrate: DefaultMigrateStrategy(), Tasks: []*Task{ { - KillTimeout: timeToPtr(5 * time.Second), + KillTimeout: pointerOf(5 * time.Second), LogConfig: DefaultLogConfig(), Resources: DefaultResources(), RestartPolicy: defaultServiceJobRestartPolicy(), @@ -373,7 +358,7 @@ func TestJobs_Canonicalize(t *testing.T) { { name: "batch", input: &Job{ - Type: stringToPtr("batch"), + Type: pointerOf("batch"), TaskGroups: []*TaskGroup{ { Tasks: []*Task{ @@ -383,56 +368,56 @@ func TestJobs_Canonicalize(t *testing.T) { }, }, expected: &Job{ - ID: stringToPtr(""), - Name: stringToPtr(""), - Region: stringToPtr("global"), - Namespace: stringToPtr(DefaultNamespace), - Type: stringToPtr("batch"), - ParentID: stringToPtr(""), - Priority: intToPtr(50), - AllAtOnce: boolToPtr(false), - ConsulToken: stringToPtr(""), - ConsulNamespace: stringToPtr(""), - VaultToken: stringToPtr(""), - VaultNamespace: stringToPtr(""), - NomadTokenID: stringToPtr(""), - Status: stringToPtr(""), - StatusDescription: stringToPtr(""), - Stop: boolToPtr(false), - Stable: boolToPtr(false), - Version: uint64ToPtr(0), - CreateIndex: uint64ToPtr(0), - ModifyIndex: uint64ToPtr(0), - JobModifyIndex: uint64ToPtr(0), + ID: pointerOf(""), + Name: pointerOf(""), + Region: pointerOf("global"), + Namespace: pointerOf(DefaultNamespace), + Type: pointerOf("batch"), + ParentID: pointerOf(""), + Priority: pointerOf(50), + AllAtOnce: pointerOf(false), + ConsulToken: pointerOf(""), + ConsulNamespace: pointerOf(""), + VaultToken: pointerOf(""), + VaultNamespace: pointerOf(""), + NomadTokenID: pointerOf(""), + Status: pointerOf(""), + StatusDescription: pointerOf(""), + Stop: pointerOf(false), + Stable: pointerOf(false), + Version: pointerOf(uint64(0)), + CreateIndex: pointerOf(uint64(0)), + ModifyIndex: pointerOf(uint64(0)), + JobModifyIndex: pointerOf(uint64(0)), TaskGroups: []*TaskGroup{ { - Name: stringToPtr(""), - Count: intToPtr(1), + Name: pointerOf(""), + Count: pointerOf(1), EphemeralDisk: &EphemeralDisk{ - Sticky: boolToPtr(false), - Migrate: boolToPtr(false), - SizeMB: intToPtr(300), + Sticky: pointerOf(false), + Migrate: pointerOf(false), + SizeMB: pointerOf(300), }, RestartPolicy: &RestartPolicy{ - Delay: timeToPtr(15 * time.Second), - Attempts: intToPtr(3), - Interval: timeToPtr(24 * time.Hour), - Mode: stringToPtr("fail"), + Delay: pointerOf(15 * time.Second), + Attempts: pointerOf(3), + Interval: pointerOf(24 * time.Hour), + Mode: pointerOf("fail"), }, ReschedulePolicy: &ReschedulePolicy{ - Attempts: intToPtr(1), - Interval: timeToPtr(24 * time.Hour), - DelayFunction: stringToPtr("constant"), - Delay: timeToPtr(5 * time.Second), - MaxDelay: timeToPtr(0), - Unlimited: boolToPtr(false), + Attempts: pointerOf(1), + Interval: pointerOf(24 * time.Hour), + DelayFunction: pointerOf("constant"), + Delay: pointerOf(5 * time.Second), + MaxDelay: pointerOf(time.Duration(0)), + Unlimited: pointerOf(false), }, Consul: &Consul{ Namespace: "", }, Tasks: []*Task{ { - KillTimeout: timeToPtr(5 * time.Second), + KillTimeout: pointerOf(5 * time.Second), LogConfig: DefaultLogConfig(), Resources: DefaultResources(), RestartPolicy: defaultBatchJobRestartPolicy(), @@ -445,13 +430,13 @@ func TestJobs_Canonicalize(t *testing.T) { { name: "partial", input: &Job{ - Name: stringToPtr("foo"), - Namespace: stringToPtr("bar"), - ID: stringToPtr("bar"), - ParentID: stringToPtr("lol"), + Name: pointerOf("foo"), + Namespace: pointerOf("bar"), + ID: pointerOf("bar"), + ParentID: pointerOf("lol"), TaskGroups: []*TaskGroup{ { - Name: stringToPtr("bar"), + Name: pointerOf("bar"), Tasks: []*Task{ { Name: "task1", @@ -461,74 +446,74 @@ func TestJobs_Canonicalize(t *testing.T) { }, }, expected: &Job{ - Namespace: stringToPtr("bar"), - ID: stringToPtr("bar"), - Name: stringToPtr("foo"), - Region: stringToPtr("global"), - Type: stringToPtr("service"), - ParentID: stringToPtr("lol"), - Priority: intToPtr(50), - AllAtOnce: boolToPtr(false), - ConsulToken: stringToPtr(""), - ConsulNamespace: stringToPtr(""), - VaultToken: stringToPtr(""), - VaultNamespace: stringToPtr(""), - NomadTokenID: stringToPtr(""), - Stop: boolToPtr(false), - Stable: boolToPtr(false), - Version: uint64ToPtr(0), - Status: stringToPtr(""), - StatusDescription: stringToPtr(""), - CreateIndex: uint64ToPtr(0), - ModifyIndex: uint64ToPtr(0), - JobModifyIndex: uint64ToPtr(0), + Namespace: pointerOf("bar"), + ID: pointerOf("bar"), + Name: pointerOf("foo"), + Region: pointerOf("global"), + Type: pointerOf("service"), + ParentID: pointerOf("lol"), + Priority: pointerOf(50), + AllAtOnce: pointerOf(false), + ConsulToken: pointerOf(""), + ConsulNamespace: pointerOf(""), + VaultToken: pointerOf(""), + VaultNamespace: pointerOf(""), + NomadTokenID: pointerOf(""), + Stop: pointerOf(false), + Stable: pointerOf(false), + Version: pointerOf(uint64(0)), + Status: pointerOf(""), + StatusDescription: pointerOf(""), + CreateIndex: pointerOf(uint64(0)), + ModifyIndex: pointerOf(uint64(0)), + JobModifyIndex: pointerOf(uint64(0)), Update: &UpdateStrategy{ - Stagger: timeToPtr(30 * time.Second), - MaxParallel: intToPtr(1), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(10 * time.Second), - HealthyDeadline: timeToPtr(5 * time.Minute), - ProgressDeadline: timeToPtr(10 * time.Minute), - AutoRevert: boolToPtr(false), - Canary: intToPtr(0), - AutoPromote: boolToPtr(false), + Stagger: pointerOf(30 * time.Second), + MaxParallel: pointerOf(1), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(10 * time.Second), + HealthyDeadline: pointerOf(5 * time.Minute), + ProgressDeadline: pointerOf(10 * time.Minute), + AutoRevert: pointerOf(false), + Canary: pointerOf(0), + AutoPromote: pointerOf(false), }, TaskGroups: []*TaskGroup{ { - Name: stringToPtr("bar"), - Count: intToPtr(1), + Name: pointerOf("bar"), + Count: pointerOf(1), EphemeralDisk: &EphemeralDisk{ - Sticky: boolToPtr(false), - Migrate: boolToPtr(false), - SizeMB: intToPtr(300), + Sticky: pointerOf(false), + Migrate: pointerOf(false), + SizeMB: pointerOf(300), }, RestartPolicy: &RestartPolicy{ - Delay: timeToPtr(15 * time.Second), - Attempts: intToPtr(2), - Interval: timeToPtr(30 * time.Minute), - Mode: stringToPtr("fail"), + Delay: pointerOf(15 * time.Second), + Attempts: pointerOf(2), + Interval: pointerOf(30 * time.Minute), + Mode: pointerOf("fail"), }, ReschedulePolicy: &ReschedulePolicy{ - Attempts: intToPtr(0), - Interval: timeToPtr(0), - DelayFunction: stringToPtr("exponential"), - Delay: timeToPtr(30 * time.Second), - MaxDelay: timeToPtr(1 * time.Hour), - Unlimited: boolToPtr(true), + Attempts: pointerOf(0), + Interval: pointerOf(time.Duration(0)), + DelayFunction: pointerOf("exponential"), + Delay: pointerOf(30 * time.Second), + MaxDelay: pointerOf(1 * time.Hour), + Unlimited: pointerOf(true), }, Consul: &Consul{ Namespace: "", }, Update: &UpdateStrategy{ - Stagger: timeToPtr(30 * time.Second), - MaxParallel: intToPtr(1), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(10 * time.Second), - HealthyDeadline: timeToPtr(5 * time.Minute), - ProgressDeadline: timeToPtr(10 * time.Minute), - AutoRevert: boolToPtr(false), - Canary: intToPtr(0), - AutoPromote: boolToPtr(false), + Stagger: pointerOf(30 * time.Second), + MaxParallel: pointerOf(1), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(10 * time.Second), + HealthyDeadline: pointerOf(5 * time.Minute), + ProgressDeadline: pointerOf(10 * time.Minute), + AutoRevert: pointerOf(false), + Canary: pointerOf(0), + AutoPromote: pointerOf(false), }, Migrate: DefaultMigrateStrategy(), Tasks: []*Task{ @@ -536,7 +521,7 @@ func TestJobs_Canonicalize(t *testing.T) { Name: "task1", LogConfig: DefaultLogConfig(), Resources: DefaultResources(), - KillTimeout: timeToPtr(5 * time.Second), + KillTimeout: pointerOf(5 * time.Second), RestartPolicy: defaultServiceJobRestartPolicy(), }, }, @@ -547,50 +532,50 @@ func TestJobs_Canonicalize(t *testing.T) { { name: "example_template", input: &Job{ - ID: stringToPtr("example_template"), - Name: stringToPtr("example_template"), + ID: pointerOf("example_template"), + Name: pointerOf("example_template"), Datacenters: []string{"dc1"}, - Type: stringToPtr("service"), + Type: pointerOf("service"), Update: &UpdateStrategy{ - MaxParallel: intToPtr(1), - AutoPromote: boolToPtr(true), + MaxParallel: pointerOf(1), + AutoPromote: pointerOf(true), }, TaskGroups: []*TaskGroup{ { - Name: stringToPtr("cache"), - Count: intToPtr(1), + Name: pointerOf("cache"), + Count: pointerOf(1), RestartPolicy: &RestartPolicy{ - Interval: timeToPtr(5 * time.Minute), - Attempts: intToPtr(10), - Delay: timeToPtr(25 * time.Second), - Mode: stringToPtr("delay"), + Interval: pointerOf(5 * time.Minute), + Attempts: pointerOf(10), + Delay: pointerOf(25 * time.Second), + Mode: pointerOf("delay"), }, Update: &UpdateStrategy{ - AutoRevert: boolToPtr(true), + AutoRevert: pointerOf(true), }, EphemeralDisk: &EphemeralDisk{ - SizeMB: intToPtr(300), + SizeMB: pointerOf(300), }, Tasks: []*Task{ { Name: "redis", Driver: "docker", Config: map[string]interface{}{ - "image": "redis:3.2", + "image": "redis:7", "port_map": []map[string]int{{ "db": 6379, }}, }, RestartPolicy: &RestartPolicy{ // inherit other values from TG - Attempts: intToPtr(20), + Attempts: pointerOf(20), }, Resources: &Resources{ - CPU: intToPtr(500), - MemoryMB: intToPtr(256), + CPU: pointerOf(500), + MemoryMB: pointerOf(256), Networks: []*NetworkResource{ { - MBits: intToPtr(10), + MBits: pointerOf(10), DynamicPorts: []Port{ { Label: "db", @@ -617,13 +602,13 @@ func TestJobs_Canonicalize(t *testing.T) { }, Templates: []*Template{ { - EmbeddedTmpl: stringToPtr("---"), - DestPath: stringToPtr("local/file.yml"), + EmbeddedTmpl: pointerOf("---"), + DestPath: pointerOf("local/file.yml"), }, { - EmbeddedTmpl: stringToPtr("FOO=bar\n"), - DestPath: stringToPtr("local/file.env"), - Envvars: boolToPtr(true), + EmbeddedTmpl: pointerOf("FOO=bar\n"), + DestPath: pointerOf("local/file.env"), + Envvars: pointerOf(true), }, }, }, @@ -632,75 +617,75 @@ func TestJobs_Canonicalize(t *testing.T) { }, }, expected: &Job{ - Namespace: stringToPtr(DefaultNamespace), - ID: stringToPtr("example_template"), - Name: stringToPtr("example_template"), - ParentID: stringToPtr(""), - Priority: intToPtr(50), - Region: stringToPtr("global"), - Type: stringToPtr("service"), - AllAtOnce: boolToPtr(false), - ConsulToken: stringToPtr(""), - ConsulNamespace: stringToPtr(""), - VaultToken: stringToPtr(""), - VaultNamespace: stringToPtr(""), - NomadTokenID: stringToPtr(""), - Stop: boolToPtr(false), - Stable: boolToPtr(false), - Version: uint64ToPtr(0), - Status: stringToPtr(""), - StatusDescription: stringToPtr(""), - CreateIndex: uint64ToPtr(0), - ModifyIndex: uint64ToPtr(0), - JobModifyIndex: uint64ToPtr(0), + Namespace: pointerOf(DefaultNamespace), + ID: pointerOf("example_template"), + Name: pointerOf("example_template"), + ParentID: pointerOf(""), + Priority: pointerOf(50), + Region: pointerOf("global"), + Type: pointerOf("service"), + AllAtOnce: pointerOf(false), + ConsulToken: pointerOf(""), + ConsulNamespace: pointerOf(""), + VaultToken: pointerOf(""), + VaultNamespace: pointerOf(""), + NomadTokenID: pointerOf(""), + Stop: pointerOf(false), + Stable: pointerOf(false), + Version: pointerOf(uint64(0)), + Status: pointerOf(""), + StatusDescription: pointerOf(""), + CreateIndex: pointerOf(uint64(0)), + ModifyIndex: pointerOf(uint64(0)), + JobModifyIndex: pointerOf(uint64(0)), Datacenters: []string{"dc1"}, Update: &UpdateStrategy{ - Stagger: timeToPtr(30 * time.Second), - MaxParallel: intToPtr(1), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(10 * time.Second), - HealthyDeadline: timeToPtr(5 * time.Minute), - ProgressDeadline: timeToPtr(10 * time.Minute), - AutoRevert: boolToPtr(false), - Canary: intToPtr(0), - AutoPromote: boolToPtr(true), + Stagger: pointerOf(30 * time.Second), + MaxParallel: pointerOf(1), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(10 * time.Second), + HealthyDeadline: pointerOf(5 * time.Minute), + ProgressDeadline: pointerOf(10 * time.Minute), + AutoRevert: pointerOf(false), + Canary: pointerOf(0), + AutoPromote: pointerOf(true), }, TaskGroups: []*TaskGroup{ { - Name: stringToPtr("cache"), - Count: intToPtr(1), + Name: pointerOf("cache"), + Count: pointerOf(1), RestartPolicy: &RestartPolicy{ - Interval: timeToPtr(5 * time.Minute), - Attempts: intToPtr(10), - Delay: timeToPtr(25 * time.Second), - Mode: stringToPtr("delay"), + Interval: pointerOf(5 * time.Minute), + Attempts: pointerOf(10), + Delay: pointerOf(25 * time.Second), + Mode: pointerOf("delay"), }, ReschedulePolicy: &ReschedulePolicy{ - Attempts: intToPtr(0), - Interval: timeToPtr(0), - DelayFunction: stringToPtr("exponential"), - Delay: timeToPtr(30 * time.Second), - MaxDelay: timeToPtr(1 * time.Hour), - Unlimited: boolToPtr(true), + Attempts: pointerOf(0), + Interval: pointerOf(time.Duration(0)), + DelayFunction: pointerOf("exponential"), + Delay: pointerOf(30 * time.Second), + MaxDelay: pointerOf(1 * time.Hour), + Unlimited: pointerOf(true), }, EphemeralDisk: &EphemeralDisk{ - Sticky: boolToPtr(false), - Migrate: boolToPtr(false), - SizeMB: intToPtr(300), + Sticky: pointerOf(false), + Migrate: pointerOf(false), + SizeMB: pointerOf(300), }, Consul: &Consul{ Namespace: "", }, Update: &UpdateStrategy{ - Stagger: timeToPtr(30 * time.Second), - MaxParallel: intToPtr(1), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(10 * time.Second), - HealthyDeadline: timeToPtr(5 * time.Minute), - ProgressDeadline: timeToPtr(10 * time.Minute), - AutoRevert: boolToPtr(true), - Canary: intToPtr(0), - AutoPromote: boolToPtr(true), + Stagger: pointerOf(30 * time.Second), + MaxParallel: pointerOf(1), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(10 * time.Second), + HealthyDeadline: pointerOf(5 * time.Minute), + ProgressDeadline: pointerOf(10 * time.Minute), + AutoRevert: pointerOf(true), + Canary: pointerOf(0), + AutoPromote: pointerOf(true), }, Migrate: DefaultMigrateStrategy(), Tasks: []*Task{ @@ -708,24 +693,24 @@ func TestJobs_Canonicalize(t *testing.T) { Name: "redis", Driver: "docker", Config: map[string]interface{}{ - "image": "redis:3.2", + "image": "redis:7", "port_map": []map[string]int{{ "db": 6379, }}, }, RestartPolicy: &RestartPolicy{ - Interval: timeToPtr(5 * time.Minute), - Attempts: intToPtr(20), - Delay: timeToPtr(25 * time.Second), - Mode: stringToPtr("delay"), + Interval: pointerOf(5 * time.Minute), + Attempts: pointerOf(20), + Delay: pointerOf(25 * time.Second), + Mode: pointerOf("delay"), }, Resources: &Resources{ - CPU: intToPtr(500), - Cores: intToPtr(0), - MemoryMB: intToPtr(256), + CPU: pointerOf(500), + Cores: pointerOf(0), + MemoryMB: pointerOf(256), Networks: []*NetworkResource{ { - MBits: intToPtr(10), + MBits: pointerOf(10), DynamicPorts: []Port{ { Label: "db", @@ -754,34 +739,34 @@ func TestJobs_Canonicalize(t *testing.T) { }, }, }, - KillTimeout: timeToPtr(5 * time.Second), + KillTimeout: pointerOf(5 * time.Second), LogConfig: DefaultLogConfig(), Templates: []*Template{ { - SourcePath: stringToPtr(""), - DestPath: stringToPtr("local/file.yml"), - EmbeddedTmpl: stringToPtr("---"), - ChangeMode: stringToPtr("restart"), - ChangeSignal: stringToPtr(""), - Splay: timeToPtr(5 * time.Second), - Perms: stringToPtr("0644"), - LeftDelim: stringToPtr("{{"), - RightDelim: stringToPtr("}}"), - Envvars: boolToPtr(false), - VaultGrace: timeToPtr(0), + SourcePath: pointerOf(""), + DestPath: pointerOf("local/file.yml"), + EmbeddedTmpl: pointerOf("---"), + ChangeMode: pointerOf("restart"), + ChangeSignal: pointerOf(""), + Splay: pointerOf(5 * time.Second), + Perms: pointerOf("0644"), + LeftDelim: pointerOf("{{"), + RightDelim: pointerOf("}}"), + Envvars: pointerOf(false), + VaultGrace: pointerOf(time.Duration(0)), }, { - SourcePath: stringToPtr(""), - DestPath: stringToPtr("local/file.env"), - EmbeddedTmpl: stringToPtr("FOO=bar\n"), - ChangeMode: stringToPtr("restart"), - ChangeSignal: stringToPtr(""), - Splay: timeToPtr(5 * time.Second), - Perms: stringToPtr("0644"), - LeftDelim: stringToPtr("{{"), - RightDelim: stringToPtr("}}"), - Envvars: boolToPtr(true), - VaultGrace: timeToPtr(0), + SourcePath: pointerOf(""), + DestPath: pointerOf("local/file.env"), + EmbeddedTmpl: pointerOf("FOO=bar\n"), + ChangeMode: pointerOf("restart"), + ChangeSignal: pointerOf(""), + Splay: pointerOf(5 * time.Second), + Perms: pointerOf("0644"), + LeftDelim: pointerOf("{{"), + RightDelim: pointerOf("}}"), + Envvars: pointerOf(true), + VaultGrace: pointerOf(time.Duration(0)), }, }, }, @@ -794,48 +779,48 @@ func TestJobs_Canonicalize(t *testing.T) { { name: "periodic", input: &Job{ - ID: stringToPtr("bar"), + ID: pointerOf("bar"), Periodic: &PeriodicConfig{}, }, expected: &Job{ - Namespace: stringToPtr(DefaultNamespace), - ID: stringToPtr("bar"), - ParentID: stringToPtr(""), - Name: stringToPtr("bar"), - Region: stringToPtr("global"), - Type: stringToPtr("service"), - Priority: intToPtr(50), - AllAtOnce: boolToPtr(false), - ConsulToken: stringToPtr(""), - ConsulNamespace: stringToPtr(""), - VaultToken: stringToPtr(""), - VaultNamespace: stringToPtr(""), - NomadTokenID: stringToPtr(""), - Stop: boolToPtr(false), - Stable: boolToPtr(false), - Version: uint64ToPtr(0), - Status: stringToPtr(""), - StatusDescription: stringToPtr(""), - CreateIndex: uint64ToPtr(0), - ModifyIndex: uint64ToPtr(0), - JobModifyIndex: uint64ToPtr(0), + Namespace: pointerOf(DefaultNamespace), + ID: pointerOf("bar"), + ParentID: pointerOf(""), + Name: pointerOf("bar"), + Region: pointerOf("global"), + Type: pointerOf("service"), + Priority: pointerOf(50), + AllAtOnce: pointerOf(false), + ConsulToken: pointerOf(""), + ConsulNamespace: pointerOf(""), + VaultToken: pointerOf(""), + VaultNamespace: pointerOf(""), + NomadTokenID: pointerOf(""), + Stop: pointerOf(false), + Stable: pointerOf(false), + Version: pointerOf(uint64(0)), + Status: pointerOf(""), + StatusDescription: pointerOf(""), + CreateIndex: pointerOf(uint64(0)), + ModifyIndex: pointerOf(uint64(0)), + JobModifyIndex: pointerOf(uint64(0)), Update: &UpdateStrategy{ - Stagger: timeToPtr(30 * time.Second), - MaxParallel: intToPtr(1), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(10 * time.Second), - HealthyDeadline: timeToPtr(5 * time.Minute), - ProgressDeadline: timeToPtr(10 * time.Minute), - AutoRevert: boolToPtr(false), - Canary: intToPtr(0), - AutoPromote: boolToPtr(false), + Stagger: pointerOf(30 * time.Second), + MaxParallel: pointerOf(1), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(10 * time.Second), + HealthyDeadline: pointerOf(5 * time.Minute), + ProgressDeadline: pointerOf(10 * time.Minute), + AutoRevert: pointerOf(false), + Canary: pointerOf(0), + AutoPromote: pointerOf(false), }, Periodic: &PeriodicConfig{ - Enabled: boolToPtr(true), - Spec: stringToPtr(""), - SpecType: stringToPtr(PeriodicSpecCron), - ProhibitOverlap: boolToPtr(false), - TimeZone: stringToPtr("UTC"), + Enabled: pointerOf(true), + Spec: pointerOf(""), + SpecType: pointerOf(PeriodicSpecCron), + ProhibitOverlap: pointerOf(false), + TimeZone: pointerOf("UTC"), }, }, }, @@ -843,34 +828,34 @@ func TestJobs_Canonicalize(t *testing.T) { { name: "update_merge", input: &Job{ - Name: stringToPtr("foo"), - ID: stringToPtr("bar"), - ParentID: stringToPtr("lol"), + Name: pointerOf("foo"), + ID: pointerOf("bar"), + ParentID: pointerOf("lol"), Update: &UpdateStrategy{ - Stagger: timeToPtr(1 * time.Second), - MaxParallel: intToPtr(1), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(10 * time.Second), - HealthyDeadline: timeToPtr(6 * time.Minute), - ProgressDeadline: timeToPtr(7 * time.Minute), - AutoRevert: boolToPtr(false), - Canary: intToPtr(0), - AutoPromote: boolToPtr(false), + Stagger: pointerOf(1 * time.Second), + MaxParallel: pointerOf(1), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(10 * time.Second), + HealthyDeadline: pointerOf(6 * time.Minute), + ProgressDeadline: pointerOf(7 * time.Minute), + AutoRevert: pointerOf(false), + Canary: pointerOf(0), + AutoPromote: pointerOf(false), }, TaskGroups: []*TaskGroup{ { - Name: stringToPtr("bar"), + Name: pointerOf("bar"), Consul: &Consul{ Namespace: "", }, Update: &UpdateStrategy{ - Stagger: timeToPtr(2 * time.Second), - MaxParallel: intToPtr(2), - HealthCheck: stringToPtr("manual"), - MinHealthyTime: timeToPtr(1 * time.Second), - AutoRevert: boolToPtr(true), - Canary: intToPtr(1), - AutoPromote: boolToPtr(true), + Stagger: pointerOf(2 * time.Second), + MaxParallel: pointerOf(2), + HealthCheck: pointerOf("manual"), + MinHealthyTime: pointerOf(1 * time.Second), + AutoRevert: pointerOf(true), + Canary: pointerOf(1), + AutoPromote: pointerOf(true), }, Tasks: []*Task{ { @@ -879,7 +864,7 @@ func TestJobs_Canonicalize(t *testing.T) { }, }, { - Name: stringToPtr("baz"), + Name: pointerOf("baz"), Tasks: []*Task{ { Name: "task1", @@ -889,74 +874,74 @@ func TestJobs_Canonicalize(t *testing.T) { }, }, expected: &Job{ - Namespace: stringToPtr(DefaultNamespace), - ID: stringToPtr("bar"), - Name: stringToPtr("foo"), - Region: stringToPtr("global"), - Type: stringToPtr("service"), - ParentID: stringToPtr("lol"), - Priority: intToPtr(50), - AllAtOnce: boolToPtr(false), - ConsulToken: stringToPtr(""), - ConsulNamespace: stringToPtr(""), - VaultToken: stringToPtr(""), - VaultNamespace: stringToPtr(""), - NomadTokenID: stringToPtr(""), - Stop: boolToPtr(false), - Stable: boolToPtr(false), - Version: uint64ToPtr(0), - Status: stringToPtr(""), - StatusDescription: stringToPtr(""), - CreateIndex: uint64ToPtr(0), - ModifyIndex: uint64ToPtr(0), - JobModifyIndex: uint64ToPtr(0), + Namespace: pointerOf(DefaultNamespace), + ID: pointerOf("bar"), + Name: pointerOf("foo"), + Region: pointerOf("global"), + Type: pointerOf("service"), + ParentID: pointerOf("lol"), + Priority: pointerOf(50), + AllAtOnce: pointerOf(false), + ConsulToken: pointerOf(""), + ConsulNamespace: pointerOf(""), + VaultToken: pointerOf(""), + VaultNamespace: pointerOf(""), + NomadTokenID: pointerOf(""), + Stop: pointerOf(false), + Stable: pointerOf(false), + Version: pointerOf(uint64(0)), + Status: pointerOf(""), + StatusDescription: pointerOf(""), + CreateIndex: pointerOf(uint64(0)), + ModifyIndex: pointerOf(uint64(0)), + JobModifyIndex: pointerOf(uint64(0)), Update: &UpdateStrategy{ - Stagger: timeToPtr(1 * time.Second), - MaxParallel: intToPtr(1), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(10 * time.Second), - HealthyDeadline: timeToPtr(6 * time.Minute), - ProgressDeadline: timeToPtr(7 * time.Minute), - AutoRevert: boolToPtr(false), - Canary: intToPtr(0), - AutoPromote: boolToPtr(false), + Stagger: pointerOf(1 * time.Second), + MaxParallel: pointerOf(1), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(10 * time.Second), + HealthyDeadline: pointerOf(6 * time.Minute), + ProgressDeadline: pointerOf(7 * time.Minute), + AutoRevert: pointerOf(false), + Canary: pointerOf(0), + AutoPromote: pointerOf(false), }, TaskGroups: []*TaskGroup{ { - Name: stringToPtr("bar"), - Count: intToPtr(1), + Name: pointerOf("bar"), + Count: pointerOf(1), EphemeralDisk: &EphemeralDisk{ - Sticky: boolToPtr(false), - Migrate: boolToPtr(false), - SizeMB: intToPtr(300), + Sticky: pointerOf(false), + Migrate: pointerOf(false), + SizeMB: pointerOf(300), }, RestartPolicy: &RestartPolicy{ - Delay: timeToPtr(15 * time.Second), - Attempts: intToPtr(2), - Interval: timeToPtr(30 * time.Minute), - Mode: stringToPtr("fail"), + Delay: pointerOf(15 * time.Second), + Attempts: pointerOf(2), + Interval: pointerOf(30 * time.Minute), + Mode: pointerOf("fail"), }, ReschedulePolicy: &ReschedulePolicy{ - Attempts: intToPtr(0), - Interval: timeToPtr(0), - DelayFunction: stringToPtr("exponential"), - Delay: timeToPtr(30 * time.Second), - MaxDelay: timeToPtr(1 * time.Hour), - Unlimited: boolToPtr(true), + Attempts: pointerOf(0), + Interval: pointerOf(time.Duration(0)), + DelayFunction: pointerOf("exponential"), + Delay: pointerOf(30 * time.Second), + MaxDelay: pointerOf(1 * time.Hour), + Unlimited: pointerOf(true), }, Consul: &Consul{ Namespace: "", }, Update: &UpdateStrategy{ - Stagger: timeToPtr(2 * time.Second), - MaxParallel: intToPtr(2), - HealthCheck: stringToPtr("manual"), - MinHealthyTime: timeToPtr(1 * time.Second), - HealthyDeadline: timeToPtr(6 * time.Minute), - ProgressDeadline: timeToPtr(7 * time.Minute), - AutoRevert: boolToPtr(true), - Canary: intToPtr(1), - AutoPromote: boolToPtr(true), + Stagger: pointerOf(2 * time.Second), + MaxParallel: pointerOf(2), + HealthCheck: pointerOf("manual"), + MinHealthyTime: pointerOf(1 * time.Second), + HealthyDeadline: pointerOf(6 * time.Minute), + ProgressDeadline: pointerOf(7 * time.Minute), + AutoRevert: pointerOf(true), + Canary: pointerOf(1), + AutoPromote: pointerOf(true), }, Migrate: DefaultMigrateStrategy(), Tasks: []*Task{ @@ -964,46 +949,46 @@ func TestJobs_Canonicalize(t *testing.T) { Name: "task1", LogConfig: DefaultLogConfig(), Resources: DefaultResources(), - KillTimeout: timeToPtr(5 * time.Second), + KillTimeout: pointerOf(5 * time.Second), RestartPolicy: defaultServiceJobRestartPolicy(), }, }, }, { - Name: stringToPtr("baz"), - Count: intToPtr(1), + Name: pointerOf("baz"), + Count: pointerOf(1), EphemeralDisk: &EphemeralDisk{ - Sticky: boolToPtr(false), - Migrate: boolToPtr(false), - SizeMB: intToPtr(300), + Sticky: pointerOf(false), + Migrate: pointerOf(false), + SizeMB: pointerOf(300), }, RestartPolicy: &RestartPolicy{ - Delay: timeToPtr(15 * time.Second), - Attempts: intToPtr(2), - Interval: timeToPtr(30 * time.Minute), - Mode: stringToPtr("fail"), + Delay: pointerOf(15 * time.Second), + Attempts: pointerOf(2), + Interval: pointerOf(30 * time.Minute), + Mode: pointerOf("fail"), }, ReschedulePolicy: &ReschedulePolicy{ - Attempts: intToPtr(0), - Interval: timeToPtr(0), - DelayFunction: stringToPtr("exponential"), - Delay: timeToPtr(30 * time.Second), - MaxDelay: timeToPtr(1 * time.Hour), - Unlimited: boolToPtr(true), + Attempts: pointerOf(0), + Interval: pointerOf(time.Duration(0)), + DelayFunction: pointerOf("exponential"), + Delay: pointerOf(30 * time.Second), + MaxDelay: pointerOf(1 * time.Hour), + Unlimited: pointerOf(true), }, Consul: &Consul{ Namespace: "", }, Update: &UpdateStrategy{ - Stagger: timeToPtr(1 * time.Second), - MaxParallel: intToPtr(1), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(10 * time.Second), - HealthyDeadline: timeToPtr(6 * time.Minute), - ProgressDeadline: timeToPtr(7 * time.Minute), - AutoRevert: boolToPtr(false), - Canary: intToPtr(0), - AutoPromote: boolToPtr(false), + Stagger: pointerOf(1 * time.Second), + MaxParallel: pointerOf(1), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(10 * time.Second), + HealthyDeadline: pointerOf(6 * time.Minute), + ProgressDeadline: pointerOf(7 * time.Minute), + AutoRevert: pointerOf(false), + Canary: pointerOf(0), + AutoPromote: pointerOf(false), }, Migrate: DefaultMigrateStrategy(), Tasks: []*Task{ @@ -1011,7 +996,7 @@ func TestJobs_Canonicalize(t *testing.T) { Name: "task1", LogConfig: DefaultLogConfig(), Resources: DefaultResources(), - KillTimeout: timeToPtr(5 * time.Second), + KillTimeout: pointerOf(5 * time.Second), RestartPolicy: defaultServiceJobRestartPolicy(), }, }, @@ -1023,35 +1008,35 @@ func TestJobs_Canonicalize(t *testing.T) { { name: "restart_merge", input: &Job{ - Name: stringToPtr("foo"), - ID: stringToPtr("bar"), - ParentID: stringToPtr("lol"), + Name: pointerOf("foo"), + ID: pointerOf("bar"), + ParentID: pointerOf("lol"), TaskGroups: []*TaskGroup{ { - Name: stringToPtr("bar"), + Name: pointerOf("bar"), RestartPolicy: &RestartPolicy{ - Delay: timeToPtr(15 * time.Second), - Attempts: intToPtr(2), - Interval: timeToPtr(30 * time.Minute), - Mode: stringToPtr("fail"), + Delay: pointerOf(15 * time.Second), + Attempts: pointerOf(2), + Interval: pointerOf(30 * time.Minute), + Mode: pointerOf("fail"), }, Tasks: []*Task{ { Name: "task1", RestartPolicy: &RestartPolicy{ - Attempts: intToPtr(5), - Delay: timeToPtr(1 * time.Second), + Attempts: pointerOf(5), + Delay: pointerOf(1 * time.Second), }, }, }, }, { - Name: stringToPtr("baz"), + Name: pointerOf("baz"), RestartPolicy: &RestartPolicy{ - Delay: timeToPtr(20 * time.Second), - Attempts: intToPtr(2), - Interval: timeToPtr(30 * time.Minute), - Mode: stringToPtr("fail"), + Delay: pointerOf(20 * time.Second), + Attempts: pointerOf(2), + Interval: pointerOf(30 * time.Minute), + Mode: pointerOf("fail"), }, Consul: &Consul{ Namespace: "", @@ -1065,74 +1050,74 @@ func TestJobs_Canonicalize(t *testing.T) { }, }, expected: &Job{ - Namespace: stringToPtr(DefaultNamespace), - ID: stringToPtr("bar"), - Name: stringToPtr("foo"), - Region: stringToPtr("global"), - Type: stringToPtr("service"), - ParentID: stringToPtr("lol"), - Priority: intToPtr(50), - AllAtOnce: boolToPtr(false), - ConsulToken: stringToPtr(""), - ConsulNamespace: stringToPtr(""), - VaultToken: stringToPtr(""), - VaultNamespace: stringToPtr(""), - NomadTokenID: stringToPtr(""), - Stop: boolToPtr(false), - Stable: boolToPtr(false), - Version: uint64ToPtr(0), - Status: stringToPtr(""), - StatusDescription: stringToPtr(""), - CreateIndex: uint64ToPtr(0), - ModifyIndex: uint64ToPtr(0), - JobModifyIndex: uint64ToPtr(0), + Namespace: pointerOf(DefaultNamespace), + ID: pointerOf("bar"), + Name: pointerOf("foo"), + Region: pointerOf("global"), + Type: pointerOf("service"), + ParentID: pointerOf("lol"), + Priority: pointerOf(50), + AllAtOnce: pointerOf(false), + ConsulToken: pointerOf(""), + ConsulNamespace: pointerOf(""), + VaultToken: pointerOf(""), + VaultNamespace: pointerOf(""), + NomadTokenID: pointerOf(""), + Stop: pointerOf(false), + Stable: pointerOf(false), + Version: pointerOf(uint64(0)), + Status: pointerOf(""), + StatusDescription: pointerOf(""), + CreateIndex: pointerOf(uint64(0)), + ModifyIndex: pointerOf(uint64(0)), + JobModifyIndex: pointerOf(uint64(0)), Update: &UpdateStrategy{ - Stagger: timeToPtr(30 * time.Second), - MaxParallel: intToPtr(1), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(10 * time.Second), - HealthyDeadline: timeToPtr(5 * time.Minute), - ProgressDeadline: timeToPtr(10 * time.Minute), - AutoRevert: boolToPtr(false), - Canary: intToPtr(0), - AutoPromote: boolToPtr(false), + Stagger: pointerOf(30 * time.Second), + MaxParallel: pointerOf(1), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(10 * time.Second), + HealthyDeadline: pointerOf(5 * time.Minute), + ProgressDeadline: pointerOf(10 * time.Minute), + AutoRevert: pointerOf(false), + Canary: pointerOf(0), + AutoPromote: pointerOf(false), }, TaskGroups: []*TaskGroup{ { - Name: stringToPtr("bar"), - Count: intToPtr(1), + Name: pointerOf("bar"), + Count: pointerOf(1), EphemeralDisk: &EphemeralDisk{ - Sticky: boolToPtr(false), - Migrate: boolToPtr(false), - SizeMB: intToPtr(300), + Sticky: pointerOf(false), + Migrate: pointerOf(false), + SizeMB: pointerOf(300), }, RestartPolicy: &RestartPolicy{ - Delay: timeToPtr(15 * time.Second), - Attempts: intToPtr(2), - Interval: timeToPtr(30 * time.Minute), - Mode: stringToPtr("fail"), + Delay: pointerOf(15 * time.Second), + Attempts: pointerOf(2), + Interval: pointerOf(30 * time.Minute), + Mode: pointerOf("fail"), }, ReschedulePolicy: &ReschedulePolicy{ - Attempts: intToPtr(0), - Interval: timeToPtr(0), - DelayFunction: stringToPtr("exponential"), - Delay: timeToPtr(30 * time.Second), - MaxDelay: timeToPtr(1 * time.Hour), - Unlimited: boolToPtr(true), + Attempts: pointerOf(0), + Interval: pointerOf(time.Duration(0)), + DelayFunction: pointerOf("exponential"), + Delay: pointerOf(30 * time.Second), + MaxDelay: pointerOf(1 * time.Hour), + Unlimited: pointerOf(true), }, Consul: &Consul{ Namespace: "", }, Update: &UpdateStrategy{ - Stagger: timeToPtr(30 * time.Second), - MaxParallel: intToPtr(1), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(10 * time.Second), - HealthyDeadline: timeToPtr(5 * time.Minute), - ProgressDeadline: timeToPtr(10 * time.Minute), - AutoRevert: boolToPtr(false), - Canary: intToPtr(0), - AutoPromote: boolToPtr(false), + Stagger: pointerOf(30 * time.Second), + MaxParallel: pointerOf(1), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(10 * time.Second), + HealthyDeadline: pointerOf(5 * time.Minute), + ProgressDeadline: pointerOf(10 * time.Minute), + AutoRevert: pointerOf(false), + Canary: pointerOf(0), + AutoPromote: pointerOf(false), }, Migrate: DefaultMigrateStrategy(), Tasks: []*Task{ @@ -1140,51 +1125,51 @@ func TestJobs_Canonicalize(t *testing.T) { Name: "task1", LogConfig: DefaultLogConfig(), Resources: DefaultResources(), - KillTimeout: timeToPtr(5 * time.Second), + KillTimeout: pointerOf(5 * time.Second), RestartPolicy: &RestartPolicy{ - Attempts: intToPtr(5), - Delay: timeToPtr(1 * time.Second), - Interval: timeToPtr(30 * time.Minute), - Mode: stringToPtr("fail"), + Attempts: pointerOf(5), + Delay: pointerOf(1 * time.Second), + Interval: pointerOf(30 * time.Minute), + Mode: pointerOf("fail"), }, }, }, }, { - Name: stringToPtr("baz"), - Count: intToPtr(1), + Name: pointerOf("baz"), + Count: pointerOf(1), EphemeralDisk: &EphemeralDisk{ - Sticky: boolToPtr(false), - Migrate: boolToPtr(false), - SizeMB: intToPtr(300), + Sticky: pointerOf(false), + Migrate: pointerOf(false), + SizeMB: pointerOf(300), }, RestartPolicy: &RestartPolicy{ - Delay: timeToPtr(20 * time.Second), - Attempts: intToPtr(2), - Interval: timeToPtr(30 * time.Minute), - Mode: stringToPtr("fail"), + Delay: pointerOf(20 * time.Second), + Attempts: pointerOf(2), + Interval: pointerOf(30 * time.Minute), + Mode: pointerOf("fail"), }, ReschedulePolicy: &ReschedulePolicy{ - Attempts: intToPtr(0), - Interval: timeToPtr(0), - DelayFunction: stringToPtr("exponential"), - Delay: timeToPtr(30 * time.Second), - MaxDelay: timeToPtr(1 * time.Hour), - Unlimited: boolToPtr(true), + Attempts: pointerOf(0), + Interval: pointerOf(time.Duration(0)), + DelayFunction: pointerOf("exponential"), + Delay: pointerOf(30 * time.Second), + MaxDelay: pointerOf(1 * time.Hour), + Unlimited: pointerOf(true), }, Consul: &Consul{ Namespace: "", }, Update: &UpdateStrategy{ - Stagger: timeToPtr(30 * time.Second), - MaxParallel: intToPtr(1), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(10 * time.Second), - HealthyDeadline: timeToPtr(5 * time.Minute), - ProgressDeadline: timeToPtr(10 * time.Minute), - AutoRevert: boolToPtr(false), - Canary: intToPtr(0), - AutoPromote: boolToPtr(false), + Stagger: pointerOf(30 * time.Second), + MaxParallel: pointerOf(1), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(10 * time.Second), + HealthyDeadline: pointerOf(5 * time.Minute), + ProgressDeadline: pointerOf(10 * time.Minute), + AutoRevert: pointerOf(false), + Canary: pointerOf(0), + AutoPromote: pointerOf(false), }, Migrate: DefaultMigrateStrategy(), Tasks: []*Task{ @@ -1192,12 +1177,12 @@ func TestJobs_Canonicalize(t *testing.T) { Name: "task1", LogConfig: DefaultLogConfig(), Resources: DefaultResources(), - KillTimeout: timeToPtr(5 * time.Second), + KillTimeout: pointerOf(5 * time.Second), RestartPolicy: &RestartPolicy{ - Delay: timeToPtr(20 * time.Second), - Attempts: intToPtr(2), - Interval: timeToPtr(30 * time.Minute), - Mode: stringToPtr("fail"), + Delay: pointerOf(20 * time.Second), + Attempts: pointerOf(2), + Interval: pointerOf(30 * time.Minute), + Mode: pointerOf("fail"), }, }, }, @@ -1209,14 +1194,14 @@ func TestJobs_Canonicalize(t *testing.T) { { name: "multiregion", input: &Job{ - Name: stringToPtr("foo"), - ID: stringToPtr("bar"), - ParentID: stringToPtr("lol"), + Name: pointerOf("foo"), + ID: pointerOf("bar"), + ParentID: pointerOf("lol"), Multiregion: &Multiregion{ Regions: []*MultiregionRegion{ { Name: "west", - Count: intToPtr(1), + Count: pointerOf(1), }, }, }, @@ -1224,49 +1209,49 @@ func TestJobs_Canonicalize(t *testing.T) { expected: &Job{ Multiregion: &Multiregion{ Strategy: &MultiregionStrategy{ - MaxParallel: intToPtr(0), - OnFailure: stringToPtr(""), + MaxParallel: pointerOf(0), + OnFailure: pointerOf(""), }, Regions: []*MultiregionRegion{ { Name: "west", - Count: intToPtr(1), + Count: pointerOf(1), Datacenters: []string{}, Meta: map[string]string{}, }, }, }, - Namespace: stringToPtr(DefaultNamespace), - ID: stringToPtr("bar"), - Name: stringToPtr("foo"), - Region: stringToPtr("global"), - Type: stringToPtr("service"), - ParentID: stringToPtr("lol"), - Priority: intToPtr(50), - AllAtOnce: boolToPtr(false), - ConsulToken: stringToPtr(""), - ConsulNamespace: stringToPtr(""), - VaultToken: stringToPtr(""), - VaultNamespace: stringToPtr(""), - NomadTokenID: stringToPtr(""), - Stop: boolToPtr(false), - Stable: boolToPtr(false), - Version: uint64ToPtr(0), - Status: stringToPtr(""), - StatusDescription: stringToPtr(""), - CreateIndex: uint64ToPtr(0), - ModifyIndex: uint64ToPtr(0), - JobModifyIndex: uint64ToPtr(0), + Namespace: pointerOf(DefaultNamespace), + ID: pointerOf("bar"), + Name: pointerOf("foo"), + Region: pointerOf("global"), + Type: pointerOf("service"), + ParentID: pointerOf("lol"), + Priority: pointerOf(50), + AllAtOnce: pointerOf(false), + ConsulToken: pointerOf(""), + ConsulNamespace: pointerOf(""), + VaultToken: pointerOf(""), + VaultNamespace: pointerOf(""), + NomadTokenID: pointerOf(""), + Stop: pointerOf(false), + Stable: pointerOf(false), + Version: pointerOf(uint64(0)), + Status: pointerOf(""), + StatusDescription: pointerOf(""), + CreateIndex: pointerOf(uint64(0)), + ModifyIndex: pointerOf(uint64(0)), + JobModifyIndex: pointerOf(uint64(0)), Update: &UpdateStrategy{ - Stagger: timeToPtr(30 * time.Second), - MaxParallel: intToPtr(1), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(10 * time.Second), - HealthyDeadline: timeToPtr(5 * time.Minute), - ProgressDeadline: timeToPtr(10 * time.Minute), - AutoRevert: boolToPtr(false), - Canary: intToPtr(0), - AutoPromote: boolToPtr(false), + Stagger: pointerOf(30 * time.Second), + MaxParallel: pointerOf(1), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(10 * time.Second), + HealthyDeadline: pointerOf(5 * time.Minute), + ProgressDeadline: pointerOf(10 * time.Minute), + AutoRevert: pointerOf(false), + Canary: pointerOf(0), + AutoPromote: pointerOf(false), }, }, }, @@ -1275,61 +1260,58 @@ func TestJobs_Canonicalize(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { tc.input.Canonicalize() - if !reflect.DeepEqual(tc.input, tc.expected) { - t.Fatalf("Name: %v, Diffs:\n%v", tc.name, pretty.Diff(tc.expected, tc.input)) - } + must.Eq(t, tc.expected, tc.input) }) } } func TestJobs_EnforceRegister(t *testing.T) { testutil.Parallel(t) - require := require.New(t) + c, s := makeClient(t, nil, nil) defer s.Stop() jobs := c.Jobs() // Listing jobs before registering returns nothing resp, _, err := jobs.List(nil) - require.Nil(err) - require.Empty(resp) + must.NoError(t, err) + must.SliceEmpty(t, resp) // Create a job and attempt to register it with an incorrect index. job := testJob() resp2, _, err := jobs.EnforceRegister(job, 10, nil) - require.NotNil(err) - require.Contains(err.Error(), RegisterEnforceIndexErrPrefix) + must.ErrorContains(t, err, RegisterEnforceIndexErrPrefix) // Register resp2, wm, err := jobs.EnforceRegister(job, 0, nil) - require.Nil(err) - require.NotNil(resp2) - require.NotZero(resp2.EvalID) + must.NoError(t, err) + must.NotNil(t, resp2) + must.UUIDv4(t, resp2.EvalID) assertWriteMeta(t, wm) // Query the jobs back out again resp, qm, err := jobs.List(nil) - require.Nil(err) - require.Len(resp, 1) - require.Equal(*job.ID, resp[0].ID) + must.NoError(t, err) + must.Len(t, 1, resp) + must.Eq(t, *job.ID, resp[0].ID) assertQueryMeta(t, qm) // Fail at incorrect index curIndex := resp[0].JobModifyIndex resp2, _, err = jobs.EnforceRegister(job, 123456, nil) - require.NotNil(err) - require.Contains(err.Error(), RegisterEnforceIndexErrPrefix) + must.ErrorContains(t, err, RegisterEnforceIndexErrPrefix) // Works at correct index resp3, wm, err := jobs.EnforceRegister(job, curIndex, nil) - require.Nil(err) - require.NotNil(resp3) - require.NotZero(resp3.EvalID) + must.NoError(t, err) + must.NotNil(t, resp3) + must.UUIDv4(t, resp3.EvalID) assertWriteMeta(t, wm) } func TestJobs_Revert(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() jobs := c.Jobs() @@ -1337,49 +1319,32 @@ func TestJobs_Revert(t *testing.T) { // Register twice job := testJob() resp, wm, err := jobs.Register(job, nil) - if err != nil { - t.Fatalf("err: %s", err) - } - if resp == nil || resp.EvalID == "" { - t.Fatalf("missing eval id") - } + must.NoError(t, err) + must.UUIDv4(t, resp.EvalID) assertWriteMeta(t, wm) job.Meta = map[string]string{"foo": "new"} resp, wm, err = jobs.Register(job, nil) - if err != nil { - t.Fatalf("err: %s", err) - } - if resp == nil || resp.EvalID == "" { - t.Fatalf("missing eval id") - } + must.NoError(t, err) + must.UUIDv4(t, resp.EvalID) assertWriteMeta(t, wm) // Fail revert at incorrect enforce - _, _, err = jobs.Revert(*job.ID, 0, uint64ToPtr(10), nil, "", "") - if err == nil || !strings.Contains(err.Error(), "enforcing version") { - t.Fatalf("expected enforcement error: %v", err) - } + _, _, err = jobs.Revert(*job.ID, 0, pointerOf(uint64(10)), nil, "", "") + must.ErrorContains(t, err, "enforcing version") // Works at correct index - revertResp, wm, err := jobs.Revert(*job.ID, 0, uint64ToPtr(1), nil, "", "") - if err != nil { - t.Fatalf("err: %s", err) - } - if revertResp.EvalID == "" { - t.Fatalf("missing eval id") - } - if revertResp.EvalCreateIndex == 0 { - t.Fatalf("bad eval create index") - } - if revertResp.JobModifyIndex == 0 { - t.Fatalf("bad job modify index") - } + revertResp, wm, err := jobs.Revert(*job.ID, 0, pointerOf(uint64(1)), nil, "", "") + must.NoError(t, err) + must.UUIDv4(t, revertResp.EvalID) + must.Positive(t, revertResp.EvalCreateIndex) + must.Positive(t, revertResp.JobModifyIndex) assertWriteMeta(t, wm) } func TestJobs_Info(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() jobs := c.Jobs() @@ -1388,35 +1353,26 @@ func TestJobs_Info(t *testing.T) { // returns an error id := "job-id/with\\troublesome:characters\n?&字" _, _, err := jobs.Info(id, nil) - if err == nil || !strings.Contains(err.Error(), "not found") { - t.Fatalf("expected not found error, got: %#v", err) - } + must.ErrorContains(t, err, "not found") // Register the job job := testJob() job.ID = &id _, wm, err := jobs.Register(job, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertWriteMeta(t, wm) // Query the job again and ensure it exists result, qm, err := jobs.Info(id, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertQueryMeta(t, qm) // Check that the result is what we expect - if result == nil || *result.ID != *job.ID { - t.Fatalf("expect: %#v, got: %#v", job, result) - } + must.Eq(t, *result.ID, *job.ID) } func TestJobs_ScaleInvalidAction(t *testing.T) { testutil.Parallel(t) - require := require.New(t) c, s := makeClient(t, nil, nil) defer s.Stop() @@ -1436,146 +1392,116 @@ func TestJobs_ScaleInvalidAction(t *testing.T) { } for _, test := range tests { _, _, err := jobs.Scale(test.jobID, test.group, &test.value, "reason", false, nil, nil) - require.Errorf(err, "expected jobs.Scale(%s, %s) to fail", test.jobID, test.group) - require.Containsf(err.Error(), test.want, "jobs.Scale(%s, %s) error doesn't contain %s, got: %s", test.jobID, test.group, test.want, err) + must.ErrorContains(t, err, test.want) } // Register test job job := testJob() - job.ID = stringToPtr("TestJobs_Scale") + job.ID = pointerOf("TestJobs_Scale") _, wm, err := jobs.Register(job, nil) - require.NoError(err) + must.NoError(t, err) assertWriteMeta(t, wm) // Perform a scaling action with bad group name, verify error - _, _, err = jobs.Scale(*job.ID, "incorrect-group-name", intToPtr(2), + _, _, err = jobs.Scale(*job.ID, "incorrect-group-name", pointerOf(2), "because", false, nil, nil) - require.Error(err) - require.Contains(err.Error(), "does not exist") + must.ErrorContains(t, err, "does not exist") } func TestJobs_Versions(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() jobs := c.Jobs() // Trying to retrieve a job by ID before it exists returns an error _, _, _, err := jobs.Versions("job1", false, nil) - if err == nil || !strings.Contains(err.Error(), "not found") { - t.Fatalf("expected not found error, got: %#v", err) - } + must.ErrorContains(t, err, "not found") // Register the job job := testJob() _, wm, err := jobs.Register(job, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertWriteMeta(t, wm) // Query the job again and ensure it exists result, _, qm, err := jobs.Versions("job1", false, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertQueryMeta(t, qm) // Check that the result is what we expect - if len(result) == 0 || *result[0].ID != *job.ID { - t.Fatalf("expect: %#v, got: %#v", job, result) - } + must.Eq(t, *job.ID, *result[0].ID) } func TestJobs_PrefixList(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() jobs := c.Jobs() // Listing when nothing exists returns empty results, _, err := jobs.PrefixList("dummy") - if err != nil { - t.Fatalf("err: %s", err) - } - if n := len(results); n != 0 { - t.Fatalf("expected 0 jobs, got: %d", n) - } + must.NoError(t, err) + must.SliceEmpty(t, results) // Register the job job := testJob() _, wm, err := jobs.Register(job, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertWriteMeta(t, wm) // Query the job again and ensure it exists // Listing when nothing exists returns empty results, _, err = jobs.PrefixList((*job.ID)[:1]) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) // Check if we have the right list - if len(results) != 1 || results[0].ID != *job.ID { - t.Fatalf("bad: %#v", results) - } + must.Len(t, 1, results) + must.Eq(t, *job.ID, results[0].ID) } func TestJobs_List(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() jobs := c.Jobs() // Listing when nothing exists returns empty results, _, err := jobs.List(nil) - if err != nil { - t.Fatalf("err: %s", err) - } - if n := len(results); n != 0 { - t.Fatalf("expected 0 jobs, got: %d", n) - } + must.NoError(t, err) + must.SliceEmpty(t, results) // Register the job job := testJob() _, wm, err := jobs.Register(job, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertWriteMeta(t, wm) // Query the job again and ensure it exists // Listing when nothing exists returns empty results, _, err = jobs.List(nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) // Check if we have the right list - if len(results) != 1 || results[0].ID != *job.ID { - t.Fatalf("bad: %#v", results) - } + must.Len(t, 1, results) + must.Eq(t, *job.ID, results[0].ID) } func TestJobs_Allocations(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() jobs := c.Jobs() // Looking up by a nonexistent job returns nothing allocs, qm, err := jobs.Allocations("job1", true, nil) - if err != nil { - t.Fatalf("err: %s", err) - } - if qm.LastIndex != 0 { - t.Fatalf("bad index: %d", qm.LastIndex) - } - if n := len(allocs); n != 0 { - t.Fatalf("expected 0 allocs, got: %d", n) - } + must.NoError(t, err) + must.Zero(t, qm.LastIndex) + must.SliceEmpty(t, allocs) // TODO: do something here to create some allocations for // an existing job, lookup again. @@ -1583,48 +1509,39 @@ func TestJobs_Allocations(t *testing.T) { func TestJobs_Evaluations(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() jobs := c.Jobs() // Looking up by a nonexistent job ID returns nothing evals, qm, err := jobs.Evaluations("job1", nil) - if err != nil { - t.Fatalf("err: %s", err) - } - if qm.LastIndex != 0 { - t.Fatalf("bad index: %d", qm.LastIndex) - } - if n := len(evals); n != 0 { - t.Fatalf("expected 0 evals, got: %d", n) - } + must.NoError(t, err) + must.Zero(t, qm.LastIndex) + must.SliceEmpty(t, evals) // Insert a job. This also creates an evaluation so we should // be able to query that out after. job := testJob() resp, wm, err := jobs.Register(job, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertWriteMeta(t, wm) // Look up the evaluations again. evals, qm, err = jobs.Evaluations("job1", nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertQueryMeta(t, qm) // Check that we got the evals back, evals are in order most recent to least recent // so the last eval is the original registered eval idx := len(evals) - 1 - if n := len(evals); n == 0 || evals[idx].ID != resp.EvalID { - t.Fatalf("expected >= 1 eval (%s), got: %#v", resp.EvalID, evals[idx]) - } + must.Positive(t, len(evals)) + must.Eq(t, resp.EvalID, evals[idx].ID) } func TestJobs_Deregister(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() jobs := c.Jobs() @@ -1632,151 +1549,128 @@ func TestJobs_Deregister(t *testing.T) { // Register a new job job := testJob() _, wm, err := jobs.Register(job, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertWriteMeta(t, wm) - // Attempting delete on non-existing job returns an error - if _, _, err = jobs.Deregister("nope", false, nil); err != nil { - t.Fatalf("unexpected error deregistering job: %v", err) - } + // Attempting delete on non-existing job does not return an error + _, _, err = jobs.Deregister("nope", false, nil) + must.NoError(t, err) // Do a soft deregister of an existing job evalID, wm3, err := jobs.Deregister("job1", false, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertWriteMeta(t, wm3) - if evalID == "" { - t.Fatalf("missing eval ID") - } + must.UUIDv4(t, evalID) // Check that the job is still queryable out, qm1, err := jobs.Info("job1", nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertQueryMeta(t, qm1) - if out == nil { - t.Fatalf("missing job") - } + must.NotNil(t, out) // Do a purge deregister of an existing job evalID, wm4, err := jobs.Deregister("job1", true, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) + assertWriteMeta(t, wm4) - if evalID == "" { - t.Fatalf("missing eval ID") - } + must.UUIDv4(t, evalID) // Check that the job is really gone result, qm, err := jobs.List(nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) + assertQueryMeta(t, qm) - if n := len(result); n != 0 { - t.Fatalf("expected 0 jobs, got: %d", n) - } + must.SliceEmpty(t, result) } func TestJobs_Deregister_EvalPriority(t *testing.T) { testutil.Parallel(t) - requireAssert := require.New(t) c, s := makeClient(t, nil, nil) defer s.Stop() // Listing jobs before registering returns nothing listResp, _, err := c.Jobs().List(nil) - requireAssert.Nil(err) - requireAssert.Len(listResp, 0) + must.NoError(t, err) + must.SliceEmpty(t, listResp) // Create a job and register it. job := testJob() registerResp, wm, err := c.Jobs().Register(job, nil) - requireAssert.Nil(err) - requireAssert.NotNil(registerResp) - requireAssert.NotEmpty(registerResp.EvalID) + must.NoError(t, err) + must.NotNil(t, registerResp) + must.UUIDv4(t, registerResp.EvalID) assertWriteMeta(t, wm) // Deregister the job with an eval priority. evalID, _, err := c.Jobs().DeregisterOpts(*job.ID, &DeregisterOptions{EvalPriority: 97}, nil) - requireAssert.NoError(err) - requireAssert.NotEmpty(t, evalID) + must.NoError(t, err) + must.UUIDv4(t, evalID) // Lookup the eval and check the priority on it. evalInfo, _, err := c.Evaluations().Info(evalID, nil) - requireAssert.NoError(err) - requireAssert.Equal(97, evalInfo.Priority) + must.NoError(t, err) + must.Eq(t, 97, evalInfo.Priority) } func TestJobs_Deregister_NoEvalPriority(t *testing.T) { testutil.Parallel(t) - requireAssert := require.New(t) c, s := makeClient(t, nil, nil) defer s.Stop() // Listing jobs before registering returns nothing listResp, _, err := c.Jobs().List(nil) - requireAssert.Nil(err) - requireAssert.Len(listResp, 0) + must.NoError(t, err) + must.SliceEmpty(t, listResp) // Create a job and register it. job := testJob() registerResp, wm, err := c.Jobs().Register(job, nil) - requireAssert.Nil(err) - requireAssert.NotNil(registerResp) - requireAssert.NotEmpty(registerResp.EvalID) + must.NoError(t, err) + must.NotNil(t, registerResp) + must.UUIDv4(t, registerResp.EvalID) assertWriteMeta(t, wm) // Deregister the job with an eval priority. evalID, _, err := c.Jobs().DeregisterOpts(*job.ID, &DeregisterOptions{}, nil) - requireAssert.NoError(err) - requireAssert.NotEmpty(t, evalID) + must.NoError(t, err) + must.UUIDv4(t, evalID) // Lookup the eval and check the priority on it. evalInfo, _, err := c.Evaluations().Info(evalID, nil) - requireAssert.NoError(err) - requireAssert.Equal(*job.Priority, evalInfo.Priority) + must.NoError(t, err) + must.Eq(t, *job.Priority, evalInfo.Priority) } func TestJobs_ForceEvaluate(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() jobs := c.Jobs() // Force-eval on a non-existent job fails _, _, err := jobs.ForceEvaluate("job1", nil) - if err == nil || !strings.Contains(err.Error(), "not found") { - t.Fatalf("expected not found error, got: %#v", err) - } + must.ErrorContains(t, err, "not found") // Create a new job _, wm, err := jobs.Register(testJob(), nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertWriteMeta(t, wm) // Try force-eval again evalID, wm, err := jobs.ForceEvaluate("job1", nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertWriteMeta(t, wm) // Retrieve the evals and see if we get a matching one evals, qm, err := jobs.Evaluations("job1", nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertQueryMeta(t, qm) + + // todo(shoenig) fix must.SliceContainsFunc and use that + // https://github.com/shoenig/test/issues/88 for _, eval := range evals { if eval.ID == evalID { return @@ -1787,59 +1681,60 @@ func TestJobs_ForceEvaluate(t *testing.T) { func TestJobs_PeriodicForce(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() + jobs := c.Jobs() // Force-eval on a nonexistent job fails _, _, err := jobs.PeriodicForce("job1", nil) - if err == nil || !strings.Contains(err.Error(), "not found") { - t.Fatalf("expected not found error, got: %#v", err) - } + must.ErrorContains(t, err, "not found") // Create a new job job := testPeriodicJob() _, _, err = jobs.Register(job, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) - testutil.WaitForResult(func() (bool, error) { + f := func() error { out, _, err := jobs.Info(*job.ID, nil) - if err != nil || out == nil || *out.ID != *job.ID { - return false, err + if err != nil { + return fmt.Errorf("failed to get jobs info: %w", err) } - return true, nil - }, func(err error) { - t.Fatalf("err: %s", err) - }) + if out == nil { + return fmt.Errorf("jobs info response is nil") + } + if *out.ID != *job.ID { + return fmt.Errorf("expected job ids to match, out: %s, job: %s", *out.ID, *job.ID) + } + return nil + } + must.Wait(t, wait.InitialSuccess( + wait.ErrorFunc(f), + wait.Timeout(10*time.Second), + wait.Gap(1*time.Second), + )) // Try force again evalID, wm, err := jobs.PeriodicForce(*job.ID, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) + assertWriteMeta(t, wm) - if evalID == "" { - t.Fatalf("empty evalID") - } + must.NotEq(t, "", evalID) // Retrieve the eval - evals := c.Evaluations() - eval, qm, err := evals.Info(evalID, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + evaluations := c.Evaluations() + eval, qm, err := evaluations.Info(evalID, nil) + must.NoError(t, err) + assertQueryMeta(t, qm) - if eval.ID == evalID { - return - } - t.Fatalf("evaluation %q missing", evalID) + must.Eq(t, eval.ID, evalID) } func TestJobs_Plan(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() jobs := c.Jobs() @@ -1847,71 +1742,38 @@ func TestJobs_Plan(t *testing.T) { // Create a job and attempt to register it job := testJob() resp, wm, err := jobs.Register(job, nil) - if err != nil { - t.Fatalf("err: %s", err) - } - if resp == nil || resp.EvalID == "" { - t.Fatalf("missing eval id") - } + must.NoError(t, err) + must.UUIDv4(t, resp.EvalID) assertWriteMeta(t, wm) // Check that passing a nil job fails - if _, _, err := jobs.Plan(nil, true, nil); err == nil { - t.Fatalf("expect an error when job isn't provided") - } + _, _, err = jobs.Plan(nil, true, nil) + must.Error(t, err) // Make a plan request planResp, wm, err := jobs.Plan(job, true, nil) - if err != nil { - t.Fatalf("err: %s", err) - } - if planResp == nil { - t.Fatalf("nil response") - } - - if planResp.JobModifyIndex == 0 { - t.Fatalf("bad JobModifyIndex value: %#v", planResp) - } - if planResp.Diff == nil { - t.Fatalf("got nil diff: %#v", planResp) - } - if planResp.Annotations == nil { - t.Fatalf("got nil annotations: %#v", planResp) - } - // Can make this assertion because there are no clients. - if len(planResp.CreatedEvals) == 0 { - t.Fatalf("got no CreatedEvals: %#v", planResp) - } + must.NoError(t, err) + must.NotNil(t, planResp) + must.Positive(t, planResp.JobModifyIndex) + must.NotNil(t, planResp.Diff) + must.NotNil(t, planResp.Annotations) + must.SliceNotEmpty(t, planResp.CreatedEvals) assertWriteMeta(t, wm) // Make a plan request w/o the diff planResp, wm, err = jobs.Plan(job, false, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) + must.NotNil(t, planResp) assertWriteMeta(t, wm) - - if planResp == nil { - t.Fatalf("nil response") - } - - if planResp.JobModifyIndex == 0 { - t.Fatalf("bad JobModifyIndex value: %d", planResp.JobModifyIndex) - } - if planResp.Diff != nil { - t.Fatalf("got non-nil diff: %#v", planResp) - } - if planResp.Annotations == nil { - t.Fatalf("got nil annotations: %#v", planResp) - } - // Can make this assertion because there are no clients. - if len(planResp.CreatedEvals) == 0 { - t.Fatalf("got no CreatedEvals: %#v", planResp) - } + must.Positive(t, planResp.JobModifyIndex) + must.Nil(t, planResp.Diff) + must.NotNil(t, planResp.Annotations) + must.SliceNotEmpty(t, planResp.CreatedEvals) } func TestJobs_JobSummary(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() jobs := c.Jobs() @@ -1919,78 +1781,81 @@ func TestJobs_JobSummary(t *testing.T) { // Trying to retrieve a job summary before the job exists // returns an error _, _, err := jobs.Summary("job1", nil) - if err == nil || !strings.Contains(err.Error(), "not found") { - t.Fatalf("expected not found error, got: %#v", err) - } + must.ErrorContains(t, err, "not found") // Register the job job := testJob() taskName := job.TaskGroups[0].Name _, wm, err := jobs.Register(job, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertWriteMeta(t, wm) // Query the job summary again and ensure it exists result, qm, err := jobs.Summary("job1", nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertQueryMeta(t, qm) // Check that the result is what we expect - if *job.ID != result.JobID { - t.Fatalf("err: expected job id of %s saw %s", *job.ID, result.JobID) - } - if _, ok := result.Summary[*taskName]; !ok { - t.Fatalf("err: unable to find %s key in job summary", *taskName) - } + must.Eq(t, *job.ID, result.JobID) + + _, ok := result.Summary[*taskName] + must.True(t, ok) } func TestJobs_NewBatchJob(t *testing.T) { testutil.Parallel(t) + job := NewBatchJob("job1", "myjob", "global", 5) expect := &Job{ - Region: stringToPtr("global"), - ID: stringToPtr("job1"), - Name: stringToPtr("myjob"), - Type: stringToPtr(JobTypeBatch), - Priority: intToPtr(5), - } - if !reflect.DeepEqual(job, expect) { - t.Fatalf("expect: %#v, got: %#v", expect, job) + Region: pointerOf("global"), + ID: pointerOf("job1"), + Name: pointerOf("myjob"), + Type: pointerOf(JobTypeBatch), + Priority: pointerOf(5), } + must.Eq(t, expect, job) } func TestJobs_NewServiceJob(t *testing.T) { testutil.Parallel(t) + job := NewServiceJob("job1", "myjob", "global", 5) expect := &Job{ - Region: stringToPtr("global"), - ID: stringToPtr("job1"), - Name: stringToPtr("myjob"), - Type: stringToPtr(JobTypeService), - Priority: intToPtr(5), - } - if !reflect.DeepEqual(job, expect) { - t.Fatalf("expect: %#v, got: %#v", expect, job) + Region: pointerOf("global"), + ID: pointerOf("job1"), + Name: pointerOf("myjob"), + Type: pointerOf(JobTypeService), + Priority: pointerOf(5), } + must.Eq(t, expect, job) } func TestJobs_NewSystemJob(t *testing.T) { testutil.Parallel(t) + job := NewSystemJob("job1", "myjob", "global", 5) expect := &Job{ - Region: stringToPtr("global"), - ID: stringToPtr("job1"), - Name: stringToPtr("myjob"), - Type: stringToPtr(JobTypeSystem), - Priority: intToPtr(5), + Region: pointerOf("global"), + ID: pointerOf("job1"), + Name: pointerOf("myjob"), + Type: pointerOf(JobTypeSystem), + Priority: pointerOf(5), } - if !reflect.DeepEqual(job, expect) { - t.Fatalf("expect: %#v, got: %#v", expect, job) + must.Eq(t, expect, job) +} + +func TestJobs_NewSysbatchJob(t *testing.T) { + testutil.Parallel(t) + + job := NewSysbatchJob("job1", "myjob", "global", 5) + expect := &Job{ + Region: pointerOf("global"), + ID: pointerOf("job1"), + Name: pointerOf("myjob"), + Type: pointerOf(JobTypeSysbatch), + Priority: pointerOf(5), } + must.Eq(t, expect, job) } func TestJobs_SetMeta(t *testing.T) { @@ -1999,37 +1864,28 @@ func TestJobs_SetMeta(t *testing.T) { // Initializes a nil map out := job.SetMeta("foo", "bar") - if job.Meta == nil { - t.Fatalf("should initialize metadata") - } + must.NotNil(t, job.Meta) // Check that the job was returned - if job != out { - t.Fatalf("expect: %#v, got: %#v", job, out) - } + must.Eq(t, out, job) // Setting another pair is additive job.SetMeta("baz", "zip") expect := map[string]string{"foo": "bar", "baz": "zip"} - if !reflect.DeepEqual(job.Meta, expect) { - t.Fatalf("expect: %#v, got: %#v", expect, job.Meta) - } + must.Eq(t, expect, job.Meta) } func TestJobs_Constrain(t *testing.T) { testutil.Parallel(t) + job := &Job{Constraints: nil} // Create and add a constraint out := job.Constrain(NewConstraint("kernel.name", "=", "darwin")) - if n := len(job.Constraints); n != 1 { - t.Fatalf("expected 1 constraint, got: %d", n) - } + must.Len(t, 1, job.Constraints) // Check that the job was returned - if job != out { - t.Fatalf("expect: %#v, got: %#v", job, out) - } + must.Eq(t, job, out) // Adding another constraint preserves the original job.Constrain(NewConstraint("memory.totalbytes", ">=", "128000000")) @@ -2045,25 +1901,20 @@ func TestJobs_Constrain(t *testing.T) { Operand: ">=", }, } - if !reflect.DeepEqual(job.Constraints, expect) { - t.Fatalf("expect: %#v, got: %#v", expect, job.Constraints) - } + must.Eq(t, expect, job.Constraints) } func TestJobs_AddAffinity(t *testing.T) { testutil.Parallel(t) + job := &Job{Affinities: nil} // Create and add an affinity out := job.AddAffinity(NewAffinity("kernel.version", "=", "4.6", 100)) - if n := len(job.Affinities); n != 1 { - t.Fatalf("expected 1 affinity, got: %d", n) - } + must.Len(t, 1, job.Affinities) // Check that the job was returned - if job != out { - t.Fatalf("expect: %#v, got: %#v", job, out) - } + must.Eq(t, job, out) // Adding another affinity preserves the original job.AddAffinity(NewAffinity("${node.datacenter}", "=", "dc2", 50)) @@ -2072,22 +1923,21 @@ func TestJobs_AddAffinity(t *testing.T) { LTarget: "kernel.version", RTarget: "4.6", Operand: "=", - Weight: int8ToPtr(100), + Weight: pointerOf(int8(100)), }, { LTarget: "${node.datacenter}", RTarget: "dc2", Operand: "=", - Weight: int8ToPtr(50), + Weight: pointerOf(int8(50)), }, } - if !reflect.DeepEqual(job.Affinities, expect) { - t.Fatalf("expect: %#v, got: %#v", expect, job.Affinities) - } + must.Eq(t, expect, job.Affinities) } func TestJobs_Sort(t *testing.T) { testutil.Parallel(t) + jobs := []*JobListStub{ {ID: "job2"}, {ID: "job0"}, @@ -2100,13 +1950,12 @@ func TestJobs_Sort(t *testing.T) { {ID: "job1"}, {ID: "job2"}, } - if !reflect.DeepEqual(jobs, expect) { - t.Fatalf("\n\n%#v\n\n%#v", jobs, expect) - } + must.Eq(t, expect, jobs) } func TestJobs_AddSpread(t *testing.T) { testutil.Parallel(t) + job := &Job{Spreads: nil} // Create and add a Spread @@ -2114,14 +1963,10 @@ func TestJobs_AddSpread(t *testing.T) { spread := NewSpread("${meta.rack}", 100, []*SpreadTarget{spreadTarget}) out := job.AddSpread(spread) - if n := len(job.Spreads); n != 1 { - t.Fatalf("expected 1 spread, got: %d", n) - } + must.Len(t, 1, job.Spreads) // Check that the job was returned - if job != out { - t.Fatalf("expect: %#v, got: %#v", job, out) - } + must.Eq(t, job, out) // Adding another spread preserves the original spreadTarget2 := NewSpreadTarget("dc1", 100) @@ -2132,7 +1977,7 @@ func TestJobs_AddSpread(t *testing.T) { expect := []*Spread{ { Attribute: "${meta.rack}", - Weight: int8ToPtr(100), + Weight: pointerOf(int8(100)), SpreadTarget: []*SpreadTarget{ { Value: "r1", @@ -2142,7 +1987,7 @@ func TestJobs_AddSpread(t *testing.T) { }, { Attribute: "${node.datacenter}", - Weight: int8ToPtr(100), + Weight: pointerOf(int8(100)), SpreadTarget: []*SpreadTarget{ { Value: "dc1", @@ -2151,15 +1996,12 @@ func TestJobs_AddSpread(t *testing.T) { }, }, } - if !reflect.DeepEqual(job.Spreads, expect) { - t.Fatalf("expect: %#v, got: %#v", expect, job.Spreads) - } + must.Eq(t, expect, job.Spreads) } // TestJobs_ScaleAction tests the scale target for task group count func TestJobs_ScaleAction(t *testing.T) { testutil.Parallel(t) - require := require.New(t) c, s := makeClient(t, nil, nil) defer s.Stop() @@ -2173,54 +2015,49 @@ func TestJobs_ScaleAction(t *testing.T) { newCount := origCount + 1 // Trying to scale against a target before it exists returns an error - _, _, err := jobs.Scale(id, "missing", intToPtr(newCount), "this won't work", - false, nil, nil) - require.Error(err) - require.Contains(err.Error(), "not found") + _, _, err := jobs.Scale(id, "missing", pointerOf(newCount), "this won't work", false, nil, nil) + must.ErrorContains(t, err, "not found") // Register the job regResp, wm, err := jobs.Register(job, nil) - require.NoError(err) + must.NoError(t, err) assertWriteMeta(t, wm) // Perform scaling action scalingResp, wm, err := jobs.Scale(id, groupName, - intToPtr(newCount), "need more instances", false, + pointerOf(newCount), "need more instances", false, map[string]interface{}{ "meta": "data", }, nil) - require.NoError(err) - require.NotNil(scalingResp) - require.NotEmpty(scalingResp.EvalID) - require.NotEmpty(scalingResp.EvalCreateIndex) - require.Greater(scalingResp.JobModifyIndex, regResp.JobModifyIndex) + must.NoError(t, err) + must.NotNil(t, scalingResp) + must.UUIDv4(t, scalingResp.EvalID) + must.Positive(t, scalingResp.EvalCreateIndex) + must.Greater(t, regResp.JobModifyIndex, scalingResp.JobModifyIndex) assertWriteMeta(t, wm) // Query the job again resp, _, err := jobs.Info(*job.ID, nil) - require.NoError(err) - require.Equal(*resp.TaskGroups[0].Count, newCount) + must.NoError(t, err) + must.Eq(t, *resp.TaskGroups[0].Count, newCount) // Check for the scaling event status, _, err := jobs.ScaleStatus(*job.ID, nil) - require.NoError(err) - require.Len(status.TaskGroups[groupName].Events, 1) + must.NoError(t, err) + must.Len(t, 1, status.TaskGroups[groupName].Events) scalingEvent := status.TaskGroups[groupName].Events[0] - require.False(scalingEvent.Error) - require.Equal("need more instances", scalingEvent.Message) - require.Equal(map[string]interface{}{ - "meta": "data", - }, scalingEvent.Meta) - require.Greater(scalingEvent.Time, uint64(0)) - require.NotNil(scalingEvent.EvalID) - require.Equal(scalingResp.EvalID, *scalingEvent.EvalID) - require.Equal(int64(origCount), scalingEvent.PreviousCount) + must.False(t, scalingEvent.Error) + must.Eq(t, "need more instances", scalingEvent.Message) + must.MapEq(t, map[string]interface{}{"meta": "data"}, scalingEvent.Meta) + must.Positive(t, scalingEvent.Time) + must.UUIDv4(t, *scalingEvent.EvalID) + must.Eq(t, scalingResp.EvalID, *scalingEvent.EvalID) + must.Eq(t, int64(origCount), scalingEvent.PreviousCount) } func TestJobs_ScaleAction_Error(t *testing.T) { testutil.Parallel(t) - require := require.New(t) c, s := makeClient(t, nil, nil) defer s.Stop() @@ -2234,7 +2071,7 @@ func TestJobs_ScaleAction_Error(t *testing.T) { // Register the job regResp, wm, err := jobs.Register(job, nil) - require.NoError(err) + must.NoError(t, err) assertWriteMeta(t, wm) // Perform scaling action @@ -2243,36 +2080,33 @@ func TestJobs_ScaleAction_Error(t *testing.T) { "meta": "data", }, nil) - require.NoError(err) - require.NotNil(scaleResp) - require.Empty(scaleResp.EvalID) - require.Empty(scaleResp.EvalCreateIndex) + must.NoError(t, err) + must.NotNil(t, scaleResp) + must.Eq(t, "", scaleResp.EvalID) + must.Zero(t, scaleResp.EvalCreateIndex) assertWriteMeta(t, wm) // Query the job again resp, _, err := jobs.Info(*job.ID, nil) - require.NoError(err) - require.Equal(*resp.TaskGroups[0].Count, prevCount) - require.Equal(regResp.JobModifyIndex, scaleResp.JobModifyIndex) - require.Empty(scaleResp.EvalCreateIndex) - require.Empty(scaleResp.EvalID) + must.NoError(t, err) + must.Eq(t, *resp.TaskGroups[0].Count, prevCount) + must.Eq(t, regResp.JobModifyIndex, scaleResp.JobModifyIndex) + must.Zero(t, scaleResp.EvalCreateIndex) + must.Eq(t, "", scaleResp.EvalID) status, _, err := jobs.ScaleStatus(*job.ID, nil) - require.NoError(err) - require.Len(status.TaskGroups[groupName].Events, 1) + must.NoError(t, err) + must.Len(t, 1, status.TaskGroups[groupName].Events) errEvent := status.TaskGroups[groupName].Events[0] - require.True(errEvent.Error) - require.Equal("something bad happened", errEvent.Message) - require.Equal(map[string]interface{}{ - "meta": "data", - }, errEvent.Meta) - require.Greater(errEvent.Time, uint64(0)) - require.Nil(errEvent.EvalID) + must.True(t, errEvent.Error) + must.Eq(t, "something bad happened", errEvent.Message) + must.Eq(t, map[string]interface{}{"meta": "data"}, errEvent.Meta) + must.Positive(t, errEvent.Time) + must.Nil(t, errEvent.EvalID) } func TestJobs_ScaleAction_Noop(t *testing.T) { testutil.Parallel(t) - require := require.New(t) c, s := makeClient(t, nil, nil) defer s.Stop() @@ -2286,7 +2120,7 @@ func TestJobs_ScaleAction_Noop(t *testing.T) { // Register the job regResp, wm, err := jobs.Register(job, nil) - require.NoError(err) + must.NoError(t, err) assertWriteMeta(t, wm) // Perform scaling action @@ -2295,39 +2129,35 @@ func TestJobs_ScaleAction_Noop(t *testing.T) { "meta": "data", }, nil) - require.NoError(err) - require.NotNil(scaleResp) - require.Empty(scaleResp.EvalID) - require.Empty(scaleResp.EvalCreateIndex) + must.NoError(t, err) + must.NotNil(t, scaleResp) + must.Eq(t, "", scaleResp.EvalID) + must.Zero(t, scaleResp.EvalCreateIndex) assertWriteMeta(t, wm) // Query the job again resp, _, err := jobs.Info(*job.ID, nil) - require.NoError(err) - require.Equal(*resp.TaskGroups[0].Count, prevCount) - require.Equal(regResp.JobModifyIndex, scaleResp.JobModifyIndex) - require.Empty(scaleResp.EvalCreateIndex) - require.Empty(scaleResp.EvalID) + must.NoError(t, err) + must.Eq(t, *resp.TaskGroups[0].Count, prevCount) + must.Eq(t, regResp.JobModifyIndex, scaleResp.JobModifyIndex) + must.Zero(t, scaleResp.EvalCreateIndex) + must.NotNil(t, scaleResp.EvalID) status, _, err := jobs.ScaleStatus(*job.ID, nil) - require.NoError(err) - require.Len(status.TaskGroups[groupName].Events, 1) + must.NoError(t, err) + must.Len(t, 1, status.TaskGroups[groupName].Events) noopEvent := status.TaskGroups[groupName].Events[0] - require.False(noopEvent.Error) - require.Equal("no count, just informative", noopEvent.Message) - require.Equal(map[string]interface{}{ - "meta": "data", - }, noopEvent.Meta) - require.Greater(noopEvent.Time, uint64(0)) - require.Nil(noopEvent.EvalID) + must.False(t, noopEvent.Error) + must.Eq(t, "no count, just informative", noopEvent.Message) + must.MapEq(t, map[string]interface{}{"meta": "data"}, noopEvent.Meta) + must.Positive(t, noopEvent.Time) + must.Nil(t, noopEvent.EvalID) } // TestJobs_ScaleStatus tests the /scale status endpoint for task group count func TestJobs_ScaleStatus(t *testing.T) { testutil.Parallel(t) - require := require.New(t) - c, s := makeClient(t, nil, nil) defer s.Stop() jobs := c.Jobs() @@ -2335,8 +2165,7 @@ func TestJobs_ScaleStatus(t *testing.T) { // Trying to retrieve a status before it exists returns an error id := "job-id/with\\troublesome:characters\n?&字" _, _, err := jobs.ScaleStatus(id, nil) - require.Error(err) - require.Contains(err.Error(), "not found") + must.ErrorContains(t, err, "not found") // Register the job job := testJob() @@ -2344,18 +2173,16 @@ func TestJobs_ScaleStatus(t *testing.T) { groupName := *job.TaskGroups[0].Name groupCount := *job.TaskGroups[0].Count _, wm, err := jobs.Register(job, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertWriteMeta(t, wm) // Query the scaling endpoint and verify success result, qm, err := jobs.ScaleStatus(id, nil) - require.NoError(err) + must.NoError(t, err) assertQueryMeta(t, qm) // Check that the result is what we expect - require.Equal(groupCount, result.TaskGroups[groupName].Desired) + must.Eq(t, groupCount, result.TaskGroups[groupName].Desired) } func TestJobs_Services(t *testing.T) { @@ -2372,11 +2199,10 @@ func TestJobs_Parse(t *testing.T) { // that parsing is done server-side and not via the jobspec package. { c, err := NewClient(DefaultConfig()) - require.NoError(t, err) + must.NoError(t, err) _, err = c.Jobs().ParseHCL(jobspec, false) - require.Error(t, err) - require.Contains(t, err.Error(), "Put") + must.ErrorContains(t, err, "Put") } c, s := makeClient(t, nil, nil) @@ -2384,15 +2210,15 @@ func TestJobs_Parse(t *testing.T) { // Test ParseHCL job1, err := c.Jobs().ParseHCL(jobspec, false) - require.NoError(t, err) - require.Equal(t, "example", *job1.Name) - require.Nil(t, job1.Namespace) + must.NoError(t, err) + must.Eq(t, "example", *job1.Name) + must.Nil(t, job1.Namespace) job1Canonicalized, err := c.Jobs().ParseHCL(jobspec, true) - require.NoError(t, err) - require.Equal(t, "example", *job1Canonicalized.Name) - require.Equal(t, "default", *job1Canonicalized.Namespace) - require.NotEqual(t, job1, job1Canonicalized) + must.NoError(t, err) + must.Eq(t, "example", *job1Canonicalized.Name) + must.Eq(t, "default", *job1Canonicalized.Namespace) + must.NotEq(t, job1, job1Canonicalized) // Test ParseHCLOpts req := &JobsParseRequest{ @@ -2402,8 +2228,8 @@ func TestJobs_Parse(t *testing.T) { } job2, err := c.Jobs().ParseHCLOpts(req) - require.NoError(t, err) - require.Equal(t, job1, job2) + must.NoError(t, err) + must.Eq(t, job1, job2) // Test ParseHCLOpts with Canonicalize=true req = &JobsParseRequest{ @@ -2412,8 +2238,8 @@ func TestJobs_Parse(t *testing.T) { Canonicalize: true, } job2Canonicalized, err := c.Jobs().ParseHCLOpts(req) - require.NoError(t, err) - require.Equal(t, job1Canonicalized, job2Canonicalized) + must.NoError(t, err) + must.Eq(t, job1Canonicalized, job2Canonicalized) // Test ParseHCLOpts with HCLv1=true req = &JobsParseRequest{ @@ -2423,8 +2249,8 @@ func TestJobs_Parse(t *testing.T) { } job3, err := c.Jobs().ParseHCLOpts(req) - require.NoError(t, err) - require.Equal(t, job1, job3) + must.NoError(t, err) + must.Eq(t, job1, job3) // Test ParseHCLOpts with HCLv1=true and Canonicalize=true req = &JobsParseRequest{ @@ -2433,6 +2259,6 @@ func TestJobs_Parse(t *testing.T) { Canonicalize: true, } job3Canonicalized, err := c.Jobs().ParseHCLOpts(req) - require.NoError(t, err) - require.Equal(t, job1Canonicalized, job3Canonicalized) + must.NoError(t, err) + must.Eq(t, job1Canonicalized, job3Canonicalized) } diff --git a/api/namespace.go b/api/namespace.go index 7e53521263c..3a21e224753 100644 --- a/api/namespace.go +++ b/api/namespace.go @@ -58,7 +58,7 @@ func (n *Namespaces) Register(namespace *Namespace, q *WriteOptions) (*WriteMeta // Delete is used to delete a namespace func (n *Namespaces) Delete(namespace string, q *WriteOptions) (*WriteMeta, error) { - wm, err := n.client.delete(fmt.Sprintf("/v1/namespace/%s", namespace), nil, q) + wm, err := n.client.delete(fmt.Sprintf("/v1/namespace/%s", namespace), nil, nil, q) if err != nil { return nil, err } diff --git a/api/namespace_test.go b/api/namespace_test.go index 6bb90484ffe..89a49fadddf 100644 --- a/api/namespace_test.go +++ b/api/namespace_test.go @@ -4,12 +4,12 @@ import ( "testing" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/stretchr/testify/assert" + "github.com/shoenig/test/must" ) func TestNamespaces_Register(t *testing.T) { testutil.Parallel(t) - assert := assert.New(t) + c, s := makeClient(t, nil, nil) defer s.Stop() namespaces := c.Namespaces() @@ -17,21 +17,21 @@ func TestNamespaces_Register(t *testing.T) { // Create a namespace and register it ns := testNamespace() wm, err := namespaces.Register(ns, nil) - assert.Nil(err) + must.NoError(t, err) assertWriteMeta(t, wm) // Query the jobs back out again resp, qm, err := namespaces.List(nil) - assert.Nil(err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.Len(resp, 2) - assert.Equal(ns.Name, resp[0].Name) - assert.Equal("default", resp[1].Name) + must.Len(t, 2, resp) + must.Eq(t, ns.Name, resp[0].Name) + must.Eq(t, "default", resp[1].Name) } func TestNamespaces_Register_Invalid(t *testing.T) { testutil.Parallel(t) - assert := assert.New(t) + c, s := makeClient(t, nil, nil) defer s.Stop() namespaces := c.Namespaces() @@ -40,38 +40,38 @@ func TestNamespaces_Register_Invalid(t *testing.T) { ns := testNamespace() ns.Name = "*" _, err := namespaces.Register(ns, nil) - assert.NotNil(err) + must.ErrorContains(t, err, `invalid name "*".`) } -func TestNamespace_Info(t *testing.T) { +func TestNamespaces_Info(t *testing.T) { testutil.Parallel(t) - assert := assert.New(t) + c, s := makeClient(t, nil, nil) defer s.Stop() namespaces := c.Namespaces() // Trying to retrieve a namespace before it exists returns an error _, _, err := namespaces.Info("foo", nil) - assert.NotNil(err) - assert.Contains(err.Error(), "not found") + must.NotNil(t, err) + must.ErrorContains(t, err, "not found") // Register the namespace ns := testNamespace() wm, err := namespaces.Register(ns, nil) - assert.Nil(err) + must.NoError(t, err) assertWriteMeta(t, wm) // Query the namespace again and ensure it exists result, qm, err := namespaces.Info(ns.Name, nil) - assert.Nil(err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.NotNil(result) - assert.Equal(ns.Name, result.Name) + must.NotNil(t, result) + must.Eq(t, ns.Name, result.Name) } func TestNamespaces_Delete(t *testing.T) { testutil.Parallel(t) - assert := assert.New(t) + c, s := makeClient(t, nil, nil) defer s.Stop() namespaces := c.Namespaces() @@ -79,33 +79,33 @@ func TestNamespaces_Delete(t *testing.T) { // Create a namespace and register it ns := testNamespace() wm, err := namespaces.Register(ns, nil) - assert.Nil(err) + must.NoError(t, err) assertWriteMeta(t, wm) // Query the namespace back out again resp, qm, err := namespaces.List(nil) - assert.Nil(err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.Len(resp, 2) - assert.Equal(ns.Name, resp[0].Name) - assert.Equal("default", resp[1].Name) + must.Len(t, 2, resp) + must.Eq(t, ns.Name, resp[0].Name) + must.Eq(t, "default", resp[1].Name) // Delete the namespace wm, err = namespaces.Delete(ns.Name, nil) - assert.Nil(err) + must.NoError(t, err) assertWriteMeta(t, wm) // Query the namespaces back out again resp, qm, err = namespaces.List(nil) - assert.Nil(err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.Len(resp, 1) - assert.Equal("default", resp[0].Name) + must.Len(t, 1, resp) + must.Eq(t, "default", resp[0].Name) } func TestNamespaces_List(t *testing.T) { testutil.Parallel(t) - assert := assert.New(t) + c, s := makeClient(t, nil, nil) defer s.Stop() namespaces := c.Namespaces() @@ -116,29 +116,29 @@ func TestNamespaces_List(t *testing.T) { ns1.Name = "fooaaa" ns2.Name = "foobbb" wm, err := namespaces.Register(ns1, nil) - assert.Nil(err) + must.NoError(t, err) assertWriteMeta(t, wm) wm, err = namespaces.Register(ns2, nil) - assert.Nil(err) + must.NoError(t, err) assertWriteMeta(t, wm) // Query the namespaces resp, qm, err := namespaces.List(nil) - assert.Nil(err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.Len(resp, 3) + must.Len(t, 3, resp) // Query the namespaces using a prefix resp, qm, err = namespaces.PrefixList("foo", nil) - assert.Nil(err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.Len(resp, 2) + must.Len(t, 2, resp) // Query the namespaces using a prefix resp, qm, err = namespaces.PrefixList("foob", nil) - assert.Nil(err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.Len(resp, 1) - assert.Equal(ns2.Name, resp[0].Name) + must.Len(t, 1, resp) + must.Eq(t, ns2.Name, resp[0].Name) } diff --git a/api/nodes.go b/api/nodes.go index e91a3894ff3..b6fd8e7b660 100644 --- a/api/nodes.go +++ b/api/nodes.go @@ -9,9 +9,10 @@ import ( ) const ( - NodeStatusInit = "initializing" - NodeStatusReady = "ready" - NodeStatusDown = "down" + NodeStatusInit = "initializing" + NodeStatusReady = "ready" + NodeStatusDown = "down" + NodeStatusDisconnected = "disconnected" // NodeSchedulingEligible and Ineligible marks the node as eligible or not, // respectively, for receiving allocations. This is orthogonal to the node diff --git a/api/nodes_test.go b/api/nodes_test.go index a811826edce..2d1b4101eb5 100644 --- a/api/nodes_test.go +++ b/api/nodes_test.go @@ -3,81 +3,81 @@ package api import ( "context" "fmt" - "reflect" "sort" - "strings" "testing" "time" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" + "github.com/shoenig/test/wait" ) +func queryNodeList(t *testing.T, nodes *Nodes) ([]*NodeListStub, *QueryMeta) { + t.Helper() + var ( + nodeListStub []*NodeListStub + queryMeta *QueryMeta + err error + ) + + f := func() error { + nodeListStub, queryMeta, err = nodes.List(nil) + if err != nil { + return fmt.Errorf("failed to list nodes: %w", err) + } + if len(nodeListStub) == 0 { + return fmt.Errorf("no nodes yet") + } + return nil + } + + must.Wait(t, wait.InitialSuccess( + wait.ErrorFunc(f), + wait.Timeout(10*time.Second), + wait.Gap(1*time.Second), + )) + + return nodeListStub, queryMeta +} + +func oneNodeFromNodeList(t *testing.T, nodes *Nodes) *NodeListStub { + nodeListStub, _ := queryNodeList(t, nodes) + must.Len(t, 1, nodeListStub, must.Sprint("expected 1 node")) + return nodeListStub[0] +} + func TestNodes_List(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { c.DevMode = true }) defer s.Stop() nodes := c.Nodes() - var qm *QueryMeta - var out []*NodeListStub - var err error - - testutil.WaitForResult(func() (bool, error) { - out, qm, err = nodes.List(nil) - if err != nil { - return false, err - } - if n := len(out); n != 1 { - return false, fmt.Errorf("expected 1 node, got: %d", n) - } - return true, nil - }, func(err error) { - t.Fatalf("err: %s", err) - }) + nodeListStub, queryMeta := queryNodeList(t, nodes) + must.Len(t, 1, nodeListStub) // Check that we got valid QueryMeta. - assertQueryMeta(t, qm) + assertQueryMeta(t, queryMeta) } func TestNodes_PrefixList(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { c.DevMode = true }) defer s.Stop() nodes := c.Nodes() - var qm *QueryMeta - var out []*NodeListStub - var err error - // Get the node ID - var nodeID string - testutil.WaitForResult(func() (bool, error) { - out, _, err := nodes.List(nil) - if err != nil { - return false, err - } - if n := len(out); n != 1 { - return false, fmt.Errorf("expected 1 node, got: %d", n) - } - nodeID = out[0].ID - return true, nil - }, func(err error) { - t.Fatalf("err: %s", err) - }) + nodeID := oneNodeFromNodeList(t, nodes).ID // Find node based on four character prefix - out, qm, err = nodes.PrefixList(nodeID[:4]) - if err != nil { - t.Fatalf("err: %s", err) - } - if n := len(out); n != 1 { - t.Fatalf("expected 1 node, got: %d ", n) - } + out, qm, err := nodes.PrefixList(nodeID[:4]) + must.NoError(t, err) + must.Len(t, 1, out, must.Sprint("expected only 1 node")) // Check that we got valid QueryMeta. assertQueryMeta(t, qm) @@ -87,43 +87,32 @@ func TestNodes_PrefixList(t *testing.T) { // reserved resources in the response. func TestNodes_List_Resources(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { c.DevMode = true }) defer s.Stop() nodes := c.Nodes() - var out []*NodeListStub - var err error - - testutil.WaitForResult(func() (bool, error) { - out, _, err = nodes.List(nil) - if err != nil { - return false, err - } - if n := len(out); n != 1 { - return false, fmt.Errorf("expected 1 node, got: %d", n) - } - return true, nil - }, func(err error) { - t.Fatalf("err: %s", err) - }) + node := oneNodeFromNodeList(t, nodes) // By default resources should *not* be included - require.Nil(t, out[0].NodeResources) - require.Nil(t, out[0].ReservedResources) + must.Nil(t, node.NodeResources) + must.Nil(t, node.ReservedResources) qo := &QueryOptions{ Params: map[string]string{"resources": "true"}, } - out, _, err = nodes.List(qo) - require.NoError(t, err) - require.NotNil(t, out[0].NodeResources) - require.NotNil(t, out[0].ReservedResources) + + out, _, err := nodes.List(qo) + must.NoError(t, err) + must.NotNil(t, out[0].NodeResources) + must.NotNil(t, out[0].ReservedResources) } func TestNodes_Info(t *testing.T) { testutil.Parallel(t) + startTime := time.Now().Unix() c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { c.DevMode = true @@ -132,57 +121,36 @@ func TestNodes_Info(t *testing.T) { nodes := c.Nodes() // Retrieving a nonexistent node returns error - _, _, err := nodes.Info("12345678-abcd-efab-cdef-123456789abc", nil) - if err == nil || !strings.Contains(err.Error(), "not found") { - t.Fatalf("expected not found error, got: %#v", err) - } + _, _, infoErr := nodes.Info("12345678-abcd-efab-cdef-123456789abc", nil) + must.ErrorContains(t, infoErr, "not found") - // Get the node ID - var nodeID, dc string - testutil.WaitForResult(func() (bool, error) { - out, _, err := nodes.List(nil) - if err != nil { - return false, err - } - if n := len(out); n != 1 { - return false, fmt.Errorf("expected 1 node, got: %d", n) - } - nodeID = out[0].ID - dc = out[0].Datacenter - return true, nil - }, func(err error) { - t.Fatalf("err: %s", err) - }) + // Get the node ID and DC + node := oneNodeFromNodeList(t, nodes) + nodeID, dc := node.ID, node.Datacenter // Querying for existing nodes returns properly result, qm, err := nodes.Info(nodeID, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) + assertQueryMeta(t, qm) // Check that the result is what we expect - if result.ID != nodeID || result.Datacenter != dc { - t.Fatalf("expected %s (%s), got: %s (%s)", - nodeID, dc, - result.ID, result.Datacenter) - } + must.Eq(t, nodeID, result.ID) + must.Eq(t, dc, result.Datacenter) - require.Equal(t, 20000, result.NodeResources.MinDynamicPort) - require.Equal(t, 32000, result.NodeResources.MaxDynamicPort) + must.Eq(t, 20000, result.NodeResources.MinDynamicPort) + must.Eq(t, 32000, result.NodeResources.MaxDynamicPort) // Check that the StatusUpdatedAt field is being populated correctly - if result.StatusUpdatedAt < startTime { - t.Fatalf("start time: %v, status updated: %v", startTime, result.StatusUpdatedAt) - } + must.Less(t, result.StatusUpdatedAt, startTime) - if len(result.Events) < 1 { - t.Fatalf("Expected at minimum the node register event to be populated: %+v", result) - } + // check we have at least one event + must.GreaterEq(t, 1, len(result.Events)) } func TestNodes_NoSecretID(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { c.DevMode = true }) @@ -190,34 +158,21 @@ func TestNodes_NoSecretID(t *testing.T) { nodes := c.Nodes() // Get the node ID - var nodeID string - testutil.WaitForResult(func() (bool, error) { - out, _, err := nodes.List(nil) - if err != nil { - return false, err - } - if n := len(out); n != 1 { - return false, fmt.Errorf("expected 1 node, got: %d", n) - } - nodeID = out[0].ID - return true, nil - }, func(err error) { - t.Fatalf("err: %s", err) - }) + nodeID := oneNodeFromNodeList(t, nodes).ID // perform a raw http call and make sure that: // - "ID" to make sure that raw decoding is working correctly // - "SecretID" to make sure it's not present resp := make(map[string]interface{}) _, err := c.query("/v1/node/"+nodeID, &resp, nil) - require.NoError(t, err) - require.Equal(t, nodeID, resp["ID"]) - require.Empty(t, resp["SecretID"]) + must.NoError(t, err) + must.Eq(t, nodeID, resp["ID"].(string)) + must.Eq(t, "", resp["SecretID"]) } func TestNodes_ToggleDrain(t *testing.T) { testutil.Parallel(t) - require := require.New(t) + c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { c.DevMode = true }) @@ -225,26 +180,13 @@ func TestNodes_ToggleDrain(t *testing.T) { nodes := c.Nodes() // Wait for node registration and get the ID - var nodeID string - testutil.WaitForResult(func() (bool, error) { - out, _, err := nodes.List(nil) - if err != nil { - return false, err - } - if n := len(out); n != 1 { - return false, fmt.Errorf("expected 1 node, got: %d", n) - } - nodeID = out[0].ID - return true, nil - }, func(err error) { - t.Fatalf("err: %s", err) - }) + nodeID := oneNodeFromNodeList(t, nodes).ID // Check for drain mode out, _, err := nodes.Info(nodeID, nil) - require.Nil(err) - require.False(out.Drain) - require.Nil(out.LastDrain) + must.NoError(t, err) + must.False(t, out.Drain) + must.Nil(t, out.LastDrain) // Toggle it on timeBeforeDrain := time.Now().Add(-1 * time.Second) @@ -259,7 +201,7 @@ func TestNodes_ToggleDrain(t *testing.T) { MarkEligible: false, Meta: drainMeta, }, nil) - require.Nil(err) + must.NoError(t, err) assertWriteMeta(t, &drainOut.WriteMeta) // Drain may have completed before we can check, use event stream @@ -269,127 +211,98 @@ func TestNodes_ToggleDrain(t *testing.T) { streamCh, err := c.EventStream().Stream(ctx, map[Topic][]string{ TopicNode: {nodeID}, }, 0, nil) - require.NoError(err) + must.NoError(t, err) // we expect to see the node change to Drain:true and then back to Drain:false+ineligible var sawDraining, sawDrainComplete uint64 for sawDrainComplete == 0 { select { case events := <-streamCh: - require.NoError(events.Err) + must.NoError(t, events.Err) for _, e := range events.Events { node, err := e.Node() - require.NoError(err) - require.Equal(node.DrainStrategy != nil, node.Drain) - require.True(!node.Drain || node.SchedulingEligibility == NodeSchedulingIneligible) // node.Drain => "ineligible" + must.NoError(t, err) + must.Eq(t, node.DrainStrategy != nil, node.Drain) + must.True(t, !node.Drain || node.SchedulingEligibility == NodeSchedulingIneligible) // node.Drain => "ineligible" if node.Drain && node.SchedulingEligibility == NodeSchedulingIneligible { - require.NotNil(node.LastDrain) - require.Equal(DrainStatusDraining, node.LastDrain.Status) + must.NotNil(t, node.LastDrain) + must.Eq(t, DrainStatusDraining, node.LastDrain.Status) now := time.Now() - require.False(node.LastDrain.StartedAt.Before(timeBeforeDrain), - "wanted %v <= %v", node.LastDrain.StartedAt, timeBeforeDrain) - require.False(node.LastDrain.StartedAt.After(now), - "wanted %v <= %v", node.LastDrain.StartedAt, now) - require.Equal(drainMeta, node.LastDrain.Meta) + must.False(t, node.LastDrain.StartedAt.Before(timeBeforeDrain)) + must.False(t, node.LastDrain.StartedAt.After(now)) + must.Eq(t, drainMeta, node.LastDrain.Meta) sawDraining = node.ModifyIndex } else if sawDraining != 0 && !node.Drain && node.SchedulingEligibility == NodeSchedulingIneligible { - require.NotNil(node.LastDrain) - require.Equal(DrainStatusComplete, node.LastDrain.Status) - require.True(!node.LastDrain.UpdatedAt.Before(node.LastDrain.StartedAt)) - require.Equal(drainMeta, node.LastDrain.Meta) + must.NotNil(t, node.LastDrain) + must.Eq(t, DrainStatusComplete, node.LastDrain.Status) + must.True(t, !node.LastDrain.UpdatedAt.Before(node.LastDrain.StartedAt)) + must.Eq(t, drainMeta, node.LastDrain.Meta) sawDrainComplete = node.ModifyIndex } } case <-time.After(5 * time.Second): - require.Fail("failed waiting for event stream event") + must.Unreachable(t, must.Sprint("waiting on stream event that never happened")) } } // Toggle off again drainOut, err = nodes.UpdateDrain(nodeID, nil, true, nil) - require.Nil(err) + must.NoError(t, err) assertWriteMeta(t, &drainOut.WriteMeta) // Check again out, _, err = nodes.Info(nodeID, nil) - require.Nil(err) - require.False(out.Drain) - require.Nil(out.DrainStrategy) - require.Equal(NodeSchedulingEligible, out.SchedulingEligibility) + must.NoError(t, err) + must.False(t, out.Drain) + must.Nil(t, out.DrainStrategy) + must.Eq(t, NodeSchedulingEligible, out.SchedulingEligibility) } func TestNodes_ToggleEligibility(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { c.DevMode = true }) defer s.Stop() nodes := c.Nodes() - // Wait for node registration and get the ID - var nodeID string - testutil.WaitForResult(func() (bool, error) { - out, _, err := nodes.List(nil) - if err != nil { - return false, err - } - if n := len(out); n != 1 { - return false, fmt.Errorf("expected 1 node, got: %d", n) - } - nodeID = out[0].ID - return true, nil - }, func(err error) { - t.Fatalf("err: %s", err) - }) + // Get node ID + nodeID := oneNodeFromNodeList(t, nodes).ID // Check for eligibility out, _, err := nodes.Info(nodeID, nil) - if err != nil { - t.Fatalf("err: %s", err) - } - if out.SchedulingEligibility != NodeSchedulingEligible { - t.Fatalf("node should be eligible") - } + must.NoError(t, err) + must.Eq(t, NodeSchedulingEligible, out.SchedulingEligibility) // Toggle it off eligOut, err := nodes.ToggleEligibility(nodeID, false, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertWriteMeta(t, &eligOut.WriteMeta) // Check again out, _, err = nodes.Info(nodeID, nil) - if err != nil { - t.Fatalf("err: %s", err) - } - if out.SchedulingEligibility != NodeSchedulingIneligible { - t.Fatalf("bad eligibility: %v vs %v", out.SchedulingEligibility, NodeSchedulingIneligible) - } + must.NoError(t, err) + must.Eq(t, NodeSchedulingIneligible, out.SchedulingEligibility) // Toggle on eligOut, err = nodes.ToggleEligibility(nodeID, true, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertWriteMeta(t, &eligOut.WriteMeta) // Check again out, _, err = nodes.Info(nodeID, nil) - if err != nil { - t.Fatalf("err: %s", err) - } - if out.SchedulingEligibility != NodeSchedulingEligible { - t.Fatalf("bad eligibility: %v vs %v", out.SchedulingEligibility, NodeSchedulingEligible) - } - if out.DrainStrategy != nil { - t.Fatalf("drain strategy should be unset") - } + must.NoError(t, err) + must.Eq(t, NodeSchedulingEligible, out.SchedulingEligibility) + must.Nil(t, out.DrainStrategy) } func TestNodes_Allocations(t *testing.T) { testutil.Parallel(t) - c, s := makeClient(t, nil, nil) + + c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { + c.DevMode = true + }) defer s.Stop() nodes := c.Nodes() @@ -397,17 +310,14 @@ func TestNodes_Allocations(t *testing.T) { // don't check the index here because it's possible the node // has already registered, in which case we will get a non- // zero result anyways. - allocs, _, err := nodes.Allocations("nope", nil) - if err != nil { - t.Fatalf("err: %s", err) - } - if n := len(allocs); n != 0 { - t.Fatalf("expected 0 allocs, got: %d", n) - } + allocations, _, err := nodes.Allocations("nope", nil) + must.NoError(t, err) + must.Len(t, 0, allocations) } func TestNodes_ForceEvaluate(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { c.DevMode = true }) @@ -416,37 +326,21 @@ func TestNodes_ForceEvaluate(t *testing.T) { // Force-eval on a nonexistent node fails _, _, err := nodes.ForceEvaluate("12345678-abcd-efab-cdef-123456789abc", nil) - if err == nil || !strings.Contains(err.Error(), "not found") { - t.Fatalf("expected not found error, got: %#v", err) - } + must.ErrorContains(t, err, "not found") // Wait for node registration and get the ID - var nodeID string - testutil.WaitForResult(func() (bool, error) { - out, _, err := nodes.List(nil) - if err != nil { - return false, err - } - if n := len(out); n != 1 { - return false, fmt.Errorf("expected 1 node, got: %d", n) - } - nodeID = out[0].ID - return true, nil - }, func(err error) { - t.Fatalf("err: %s", err) - }) + nodeID := oneNodeFromNodeList(t, nodes).ID // Try force-eval again. We don't check the WriteMeta because // there are no allocations to process, so we would get an index // of zero. Same goes for the eval ID. _, _, err = nodes.ForceEvaluate(nodeID, nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) } func TestNodes_Sort(t *testing.T) { testutil.Parallel(t) + nodes := []*NodeListStub{ {CreateIndex: 2}, {CreateIndex: 1}, @@ -459,15 +353,12 @@ func TestNodes_Sort(t *testing.T) { {CreateIndex: 2}, {CreateIndex: 1}, } - if !reflect.DeepEqual(nodes, expect) { - t.Fatalf("\n\n%#v\n\n%#v", nodes, expect) - } + must.Eq(t, expect, nodes) } // Unittest monitorDrainMultiplex when an error occurs func TestNodes_MonitorDrain_Multiplex_Bad(t *testing.T) { testutil.Parallel(t) - require := require.New(t) ctx := context.Background() multiplexCtx, cancel := context.WithCancel(ctx) @@ -488,38 +379,36 @@ func TestNodes_MonitorDrain_Multiplex_Bad(t *testing.T) { // Fake an alloc update msg := Messagef(0, "alloc update") allocCh <- msg - require.Equal(msg, <-outCh) + must.Eq(t, msg, <-outCh) // Fake a node update msg = Messagef(0, "node update") nodeCh <- msg - require.Equal(msg, <-outCh) + must.Eq(t, msg, <-outCh) // Fake an error that should shut everything down msg = Messagef(MonitorMsgLevelError, "fake error") nodeCh <- msg - require.Equal(msg, <-outCh) + must.Eq(t, msg, <-outCh) _, ok := <-exitedCh - require.False(ok) + must.False(t, ok) _, ok = <-outCh - require.False(ok) + must.False(t, ok) // Exiting should also cancel the context that would be passed to the // node & alloc watchers select { case <-multiplexCtx.Done(): case <-time.After(100 * time.Millisecond): - t.Fatalf("context wasn't canceled") + must.Unreachable(t, must.Sprint("multiplex context was not cancelled")) } - } // Unittest monitorDrainMultiplex when drain finishes func TestNodes_MonitorDrain_Multiplex_Good(t *testing.T) { testutil.Parallel(t) - require := require.New(t) ctx := context.Background() multiplexCtx, cancel := context.WithCancel(ctx) @@ -541,19 +430,17 @@ func TestNodes_MonitorDrain_Multiplex_Good(t *testing.T) { msg := Messagef(MonitorMsgLevelInfo, "node update") nodeCh <- msg close(nodeCh) - require.Equal(msg, <-outCh) + must.Eq(t, msg, <-outCh) // Nothing else should have exited yet select { - case msg, ok := <-outCh: - if ok { - t.Fatalf("unexpected output: %q", msg) - } - t.Fatalf("out channel closed unexpectedly") + case badMsg, ok := <-outCh: + must.False(t, ok, must.Sprintf("unexpected output %v", badMsg)) + must.Unreachable(t, must.Sprint("out channel closed unexpectedly")) case <-exitedCh: - t.Fatalf("multiplexer exited unexpectedly") + must.Unreachable(t, must.Sprint("multiplexer exited unexpectedly")) case <-multiplexCtx.Done(): - t.Fatalf("multiplexer context canceled unexpectedly") + must.Unreachable(t, must.Sprint("multiplexer context canceled unexpectedly")) case <-time.After(10 * time.Millisecond): t.Logf("multiplexer still running as expected") } @@ -561,103 +448,87 @@ func TestNodes_MonitorDrain_Multiplex_Good(t *testing.T) { // Fake an alloc update coming in after the node monitor has finished msg = Messagef(0, "alloc update") allocCh <- msg - require.Equal(msg, <-outCh) + must.Eq(t, msg, <-outCh) // Closing the allocCh should cause everything to exit close(allocCh) _, ok := <-exitedCh - require.False(ok) + must.False(t, ok) _, ok = <-outCh - require.False(ok) + must.False(t, ok) // Exiting should also cancel the context that would be passed to the // node & alloc watchers select { case <-multiplexCtx.Done(): case <-time.After(100 * time.Millisecond): - t.Fatalf("context wasn't canceled") + must.Unreachable(t, must.Sprint("context was not cancelled")) } - } func TestNodes_DrainStrategy_Equal(t *testing.T) { testutil.Parallel(t) - require := require.New(t) // nil var d *DrainStrategy - require.True(d.Equal(nil)) + must.Equal(t, nil, d) o := &DrainStrategy{} - require.False(d.Equal(o)) - require.False(o.Equal(d)) + must.NotEqual(t, d, o) + must.NotEqual(t, o, d) d = &DrainStrategy{} - require.True(d.Equal(o)) + must.Equal(t, d, o) + must.Equal(t, o, d) // ForceDeadline d.ForceDeadline = time.Now() - require.False(d.Equal(o)) + must.NotEqual(t, d, o) o.ForceDeadline = d.ForceDeadline - require.True(d.Equal(o)) + must.Equal(t, d, o) // Deadline d.Deadline = 1 - require.False(d.Equal(o)) + must.NotEqual(t, d, o) o.Deadline = 1 - require.True(d.Equal(o)) + must.Equal(t, d, o) // IgnoreSystemJobs d.IgnoreSystemJobs = true - require.False(d.Equal(o)) + must.NotEqual(t, d, o) o.IgnoreSystemJobs = true - require.True(d.Equal(o)) + must.Equal(t, d, o) } func TestNodes_Purge(t *testing.T) { testutil.Parallel(t) - require := require.New(t) + c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { c.DevMode = true }) defer s.Stop() + nodes := c.Nodes() // Purge on a nonexistent node fails. _, _, err := c.Nodes().Purge("12345678-abcd-efab-cdef-123456789abc", nil) - if err == nil || !strings.Contains(err.Error(), "not found") { - t.Fatalf("expected not found error, got: %#v", err) - } + must.ErrorContains(t, err, "not found") - // Wait for node registration and get the ID so we can attempt to purge a - // node that exists. - var nodeID string - testutil.WaitForResult(func() (bool, error) { - out, _, err := c.Nodes().List(nil) - if err != nil { - return false, err - } - if n := len(out); n != 1 { - return false, fmt.Errorf("expected 1 node, got: %d", n) - } - nodeID = out[0].ID - return true, nil - }, func(err error) { - t.Fatalf("err: %s", err) - }) + // Wait for nodeID + nodeID := oneNodeFromNodeList(t, nodes).ID // Perform the node purge and check the response objects. out, meta, err := c.Nodes().Purge(nodeID, nil) - require.Nil(err) - require.NotNil(out) + must.NoError(t, err) + must.NotNil(t, out) // We can't use assertQueryMeta here, as the RPC response does not populate // the known leader field. - require.Greater(meta.LastIndex, uint64(0)) + must.Positive(t, meta.LastIndex) } func TestNodeStatValueFormatting(t *testing.T) { @@ -669,15 +540,15 @@ func TestNodeStatValueFormatting(t *testing.T) { }{ { "true", - StatValue{BoolVal: boolToPtr(true)}, + StatValue{BoolVal: pointerOf(true)}, }, { "false", - StatValue{BoolVal: boolToPtr(false)}, + StatValue{BoolVal: pointerOf(false)}, }, { "myvalue", - StatValue{StringVal: stringToPtr("myvalue")}, + StatValue{StringVal: pointerOf("myvalue")}, }, { "2.718", @@ -710,28 +581,28 @@ func TestNodeStatValueFormatting(t *testing.T) { { "2", StatValue{ - IntNumeratorVal: int64ToPtr(2), + IntNumeratorVal: pointerOf(int64(2)), }, }, { "2 / 3", StatValue{ - IntNumeratorVal: int64ToPtr(2), - IntDenominatorVal: int64ToPtr(3), + IntNumeratorVal: pointerOf(int64(2)), + IntDenominatorVal: pointerOf(int64(3)), }, }, { "2 MHz", StatValue{ - IntNumeratorVal: int64ToPtr(2), + IntNumeratorVal: pointerOf(int64(2)), Unit: "MHz", }, }, { "2 / 3 MHz", StatValue{ - IntNumeratorVal: int64ToPtr(2), - IntDenominatorVal: int64ToPtr(3), + IntNumeratorVal: pointerOf(int64(2)), + IntDenominatorVal: pointerOf(int64(3)), Unit: "MHz", }, }, @@ -740,7 +611,7 @@ func TestNodeStatValueFormatting(t *testing.T) { for i, c := range cases { t.Run(fmt.Sprintf("case %d %v", i, c.expected), func(t *testing.T) { formatted := c.value.String() - require.Equal(t, c.expected, formatted) + must.Eq(t, c.expected, formatted) }) } } diff --git a/api/operator.go b/api/operator.go index c3343c5e4ce..562fb62d8c8 100644 --- a/api/operator.go +++ b/api/operator.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "io" - "io/ioutil" "strconv" "strings" "time" @@ -134,6 +133,10 @@ type SchedulerConfiguration struct { // management ACL token RejectJobRegistration bool + // PauseEvalBroker stops the leader evaluation broker process from running + // until the configuration is updated and written to the Nomad servers. + PauseEvalBroker bool + // CreateIndex/ModifyIndex store the create/modify indexes of this configuration. CreateIndex uint64 ModifyIndex uint64 @@ -158,7 +161,7 @@ type SchedulerSetConfigurationResponse struct { } // SchedulerAlgorithm is an enum string that encapsulates the valid options for a -// SchedulerConfiguration stanza's SchedulerAlgorithm. These modes will allow the +// SchedulerConfiguration block's SchedulerAlgorithm. These modes will allow the // scheduler to be user-selectable. type SchedulerAlgorithm string @@ -225,7 +228,7 @@ func (op *Operator) Snapshot(q *QueryOptions) (io.ReadCloser, error) { cr, err := newChecksumValidatingReader(resp.Body, digest) if err != nil { - io.Copy(ioutil.Discard, resp.Body) + io.Copy(io.Discard, resp.Body) resp.Body.Close() return nil, err diff --git a/api/operator_ent_test.go b/api/operator_ent_test.go index 7e315a5d786..155cd67e42b 100644 --- a/api/operator_ent_test.go +++ b/api/operator_ent_test.go @@ -1,5 +1,4 @@ //go:build ent -// +build ent package api @@ -7,11 +6,12 @@ import ( "testing" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" ) func TestOperator_LicenseGet(t *testing.T) { testutil.Parallel(t) + c, s, _ := makeACLClient(t, nil, nil) defer s.Stop() @@ -19,11 +19,10 @@ func TestOperator_LicenseGet(t *testing.T) { // Make authenticated request. _, _, err := operator.LicenseGet(nil) - require.NoError(t, err) + must.NoError(t, err) // Make unauthenticated request. c.SetSecretID("") _, _, err = operator.LicenseGet(nil) - require.Error(t, err) - require.Contains(t, err.Error(), "403") + must.ErrorContains(t, err, "403") } diff --git a/api/operator_metrics.go b/api/operator_metrics.go index e64198194a7..b5577a5476e 100644 --- a/api/operator_metrics.go +++ b/api/operator_metrics.go @@ -1,7 +1,7 @@ package api import ( - "io/ioutil" + "io" "time" ) @@ -67,7 +67,7 @@ func (op *Operator) Metrics(q *QueryOptions) ([]byte, error) { return nil, err } - metricsBytes, err := ioutil.ReadAll(metricsReader) + metricsBytes, err := io.ReadAll(metricsReader) if err != nil { return nil, err } diff --git a/api/operator_metrics_test.go b/api/operator_metrics_test.go index f8b149f500a..efe6f502980 100644 --- a/api/operator_metrics_test.go +++ b/api/operator_metrics_test.go @@ -4,12 +4,15 @@ import ( "testing" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" ) func TestOperator_MetricsSummary(t *testing.T) { testutil.Parallel(t) - c, s := makeClient(t, nil, nil) + + c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { + c.DevMode = true + }) defer s.Stop() operator := c.Operator() @@ -20,19 +23,21 @@ func TestOperator_MetricsSummary(t *testing.T) { } metrics, qm, err := operator.MetricsSummary(qo) - require.NoError(t, err) - require.NotNil(t, metrics) - require.NotNil(t, qm) - require.NotNil(t, metrics.Timestamp) // should always get a TimeStamp - require.GreaterOrEqual(t, len(metrics.Points), 0) // may not have points yet - require.GreaterOrEqual(t, len(metrics.Gauges), 1) // should have at least 1 gauge - require.GreaterOrEqual(t, len(metrics.Counters), 1) // should have at least 1 counter - require.GreaterOrEqual(t, len(metrics.Samples), 1) // should have at least 1 sample + must.NoError(t, err) + must.NotNil(t, metrics) + must.NotNil(t, qm) + must.NotNil(t, metrics.Timestamp) // should always get a TimeStamp + must.SliceEmpty(t, metrics.Points) // may not have points yet + must.SliceNotEmpty(t, metrics.Gauges) // should have at least 1 gauge + must.SliceNotEmpty(t, metrics.Counters) // should have at least 1 counter + must.SliceNotEmpty(t, metrics.Samples) // should have at least 1 sample } func TestOperator_Metrics_Prometheus(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) { + c.DevMode = true c.Telemetry = &testutil.Telemetry{PrometheusMetrics: true} }) defer s.Stop() @@ -45,8 +50,8 @@ func TestOperator_Metrics_Prometheus(t *testing.T) { } metrics, err := operator.Metrics(qo) - require.NoError(t, err) - require.NotNil(t, metrics) + must.NoError(t, err) + must.NotNil(t, metrics) metricString := string(metrics[:]) - require.Containsf(t, metricString, "# HELP", "expected Prometheus format containing \"# HELP\", got: \n%s", metricString) + must.StrContains(t, metricString, "# HELP") } diff --git a/api/operator_test.go b/api/operator_test.go index 276aefb186c..4a7becdb936 100644 --- a/api/operator_test.go +++ b/api/operator_test.go @@ -1,31 +1,29 @@ package api import ( - "strings" "testing" "github.com/hashicorp/nomad/api/internal/testutil" + "github.com/shoenig/test/must" ) func TestOperator_RaftGetConfiguration(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() operator := c.Operator() out, err := operator.RaftGetConfiguration(nil) - if err != nil { - t.Fatalf("err: %v", err) - } - if len(out.Servers) != 1 || - !out.Servers[0].Leader || - !out.Servers[0].Voter { - t.Fatalf("bad: %v", out) - } + must.NoError(t, err) + must.Len(t, 1, out.Servers) + must.True(t, out.Servers[0].Leader) + must.True(t, out.Servers[0].Voter) } func TestOperator_RaftRemovePeerByAddress(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() @@ -33,23 +31,61 @@ func TestOperator_RaftRemovePeerByAddress(t *testing.T) { // through. operator := c.Operator() err := operator.RaftRemovePeerByAddress("nope", nil) - if err == nil || !strings.Contains(err.Error(), - "address \"nope\" was not found in the Raft configuration") { - t.Fatalf("err: %v", err) - } + must.ErrorContains(t, err, `address "nope" was not found in the Raft configuration`) } func TestOperator_RaftRemovePeerByID(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() - // If we get this error, it proves we sent the address all the way - // through. + // If we get this error, it proves we sent the address all the way through. operator := c.Operator() err := operator.RaftRemovePeerByID("nope", nil) - if err == nil || !strings.Contains(err.Error(), - "id \"nope\" was not found in the Raft configuration") { - t.Fatalf("err: %v", err) + must.ErrorContains(t, err, `id "nope" was not found in the Raft configuration`) +} + +func TestOperator_SchedulerGetConfiguration(t *testing.T) { + testutil.Parallel(t) + + c, s := makeClient(t, nil, nil) + defer s.Stop() + + schedulerConfig, _, err := c.Operator().SchedulerGetConfiguration(nil) + must.NoError(t, err) + must.NotNil(t, schedulerConfig) +} + +func TestOperator_SchedulerSetConfiguration(t *testing.T) { + testutil.Parallel(t) + + c, s := makeClient(t, nil, nil) + defer s.Stop() + + newSchedulerConfig := SchedulerConfiguration{ + SchedulerAlgorithm: SchedulerAlgorithmSpread, + PreemptionConfig: PreemptionConfig{ + SystemSchedulerEnabled: true, + SysBatchSchedulerEnabled: true, + BatchSchedulerEnabled: true, + ServiceSchedulerEnabled: true, + }, + MemoryOversubscriptionEnabled: true, + RejectJobRegistration: true, + PauseEvalBroker: true, } + + schedulerConfigUpdateResp, _, err := c.Operator().SchedulerSetConfiguration(&newSchedulerConfig, nil) + must.NoError(t, err) + must.True(t, schedulerConfigUpdateResp.Updated) + + // We can't exactly predict the query meta responses, so we test fields individually. + schedulerConfig, _, err := c.Operator().SchedulerGetConfiguration(nil) + must.NoError(t, err) + must.Eq(t, SchedulerAlgorithmSpread, schedulerConfig.SchedulerConfig.SchedulerAlgorithm) + must.True(t, schedulerConfig.SchedulerConfig.PauseEvalBroker) + must.True(t, schedulerConfig.SchedulerConfig.RejectJobRegistration) + must.True(t, schedulerConfig.SchedulerConfig.MemoryOversubscriptionEnabled) + must.Eq(t, schedulerConfig.SchedulerConfig.PreemptionConfig, newSchedulerConfig.PreemptionConfig) } diff --git a/api/quota.go b/api/quota.go index 029f1f4a55e..be4e46c7e95 100644 --- a/api/quota.go +++ b/api/quota.go @@ -90,7 +90,7 @@ func (q *Quotas) Register(spec *QuotaSpec, qo *WriteOptions) (*WriteMeta, error) // Delete is used to delete a quota spec func (q *Quotas) Delete(quota string, qo *WriteOptions) (*WriteMeta, error) { - wm, err := q.client.delete(fmt.Sprintf("/v1/quota/%s", quota), nil, qo) + wm, err := q.client.delete(fmt.Sprintf("/v1/quota/%s", quota), nil, nil, qo) if err != nil { return nil, err } diff --git a/api/quota_test.go b/api/quota_test.go index 3de5fd157a3..ea1e1ce93cf 100644 --- a/api/quota_test.go +++ b/api/quota_test.go @@ -1,5 +1,4 @@ //go:build ent -// +build ent package api @@ -7,12 +6,12 @@ import ( "testing" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/stretchr/testify/assert" + "github.com/shoenig/test/must" ) func TestQuotas_Register(t *testing.T) { testutil.Parallel(t) - assert := assert.New(t) + c, s := makeClient(t, nil, nil) defer s.Stop() quotas := c.Quotas() @@ -20,20 +19,20 @@ func TestQuotas_Register(t *testing.T) { // Create a quota spec and register it qs := testQuotaSpec() wm, err := quotas.Register(qs, nil) - assert.Nil(err) + must.NoError(t, err) assertWriteMeta(t, wm) // Query the specs back out again resp, qm, err := quotas.List(nil) - assert.Nil(err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.Len(resp, 1) - assert.Equal(qs.Name, resp[0].Name) + must.Len(t, 1, resp) + must.Eq(t, qs.Name, resp[0].Name) } func TestQuotas_Register_Invalid(t *testing.T) { testutil.Parallel(t) - assert := assert.New(t) + c, s := makeClient(t, nil, nil) defer s.Stop() quotas := c.Quotas() @@ -42,64 +41,62 @@ func TestQuotas_Register_Invalid(t *testing.T) { qs := testQuotaSpec() qs.Name = "*" _, err := quotas.Register(qs, nil) - assert.NotNil(err) + must.Error(t, err) } func TestQuotas_Info(t *testing.T) { testutil.Parallel(t) - assert := assert.New(t) + c, s := makeClient(t, nil, nil) defer s.Stop() quotas := c.Quotas() // Trying to retrieve a quota spec before it exists returns an error _, _, err := quotas.Info("foo", nil) - assert.NotNil(err) - assert.Contains(err.Error(), "not found") + must.ErrorContains(t, err, "not found") // Register the quota qs := testQuotaSpec() wm, err := quotas.Register(qs, nil) - assert.Nil(err) + must.NoError(t, err) assertWriteMeta(t, wm) // Query the quota again and ensure it exists result, qm, err := quotas.Info(qs.Name, nil) - assert.Nil(err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.NotNil(result) - assert.Equal(qs.Name, result.Name) + must.NotNil(t, result) + must.Eq(t, qs.Name, result.Name) } func TestQuotas_Usage(t *testing.T) { testutil.Parallel(t) - assert := assert.New(t) + c, s := makeClient(t, nil, nil) defer s.Stop() quotas := c.Quotas() // Trying to retrieve a quota spec before it exists returns an error _, _, err := quotas.Usage("foo", nil) - assert.NotNil(err) - assert.Contains(err.Error(), "not found") + must.ErrorContains(t, err, "not found") // Register the quota qs := testQuotaSpec() wm, err := quotas.Register(qs, nil) - assert.Nil(err) + must.NoError(t, err) assertWriteMeta(t, wm) // Query the quota usage and ensure it exists result, qm, err := quotas.Usage(qs.Name, nil) - assert.Nil(err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.NotNil(result) - assert.Equal(qs.Name, result.Name) + must.NotNil(t, result) + must.Eq(t, qs.Name, result.Name) } func TestQuotas_Delete(t *testing.T) { testutil.Parallel(t) - assert := assert.New(t) + c, s := makeClient(t, nil, nil) defer s.Stop() quotas := c.Quotas() @@ -107,31 +104,31 @@ func TestQuotas_Delete(t *testing.T) { // Create a quota and register it qs := testQuotaSpec() wm, err := quotas.Register(qs, nil) - assert.Nil(err) + must.NoError(t, err) assertWriteMeta(t, wm) // Query the quota back out again resp, qm, err := quotas.List(nil) - assert.Nil(err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.Len(resp, 1) - assert.Equal(qs.Name, resp[0].Name) + must.Len(t, 1, resp) + must.Eq(t, qs.Name, resp[0].Name) // Delete the quota wm, err = quotas.Delete(qs.Name, nil) - assert.Nil(err) + must.NoError(t, err) assertWriteMeta(t, wm) // Query the quotas back out again resp, qm, err = quotas.List(nil) - assert.Nil(err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.Len(resp, 0) + must.SliceEmpty(t, resp) } func TestQuotas_List(t *testing.T) { testutil.Parallel(t) - assert := assert.New(t) + c, s := makeClient(t, nil, nil) defer s.Stop() quotas := c.Quotas() @@ -142,36 +139,36 @@ func TestQuotas_List(t *testing.T) { qs1.Name = "fooaaa" qs2.Name = "foobbb" wm, err := quotas.Register(qs1, nil) - assert.Nil(err) + must.NoError(t, err) assertWriteMeta(t, wm) wm, err = quotas.Register(qs2, nil) - assert.Nil(err) + must.NoError(t, err) assertWriteMeta(t, wm) // Query the quotas resp, qm, err := quotas.List(nil) - assert.Nil(err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.Len(resp, 2) + must.Len(t, 2, resp) // Query the quotas using a prefix resp, qm, err = quotas.PrefixList("foo", nil) - assert.Nil(err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.Len(resp, 2) + must.Len(t, 2, resp) // Query the quotas using a prefix resp, qm, err = quotas.PrefixList("foob", nil) - assert.Nil(err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.Len(resp, 1) - assert.Equal(qs2.Name, resp[0].Name) + must.Len(t, 1, resp) + must.Eq(t, qs2.Name, resp[0].Name) } func TestQuotas_ListUsages(t *testing.T) { testutil.Parallel(t) - assert := assert.New(t) + c, s := makeClient(t, nil, nil) defer s.Stop() quotas := c.Quotas() @@ -182,29 +179,29 @@ func TestQuotas_ListUsages(t *testing.T) { qs1.Name = "fooaaa" qs2.Name = "foobbb" wm, err := quotas.Register(qs1, nil) - assert.Nil(err) + must.NoError(t, err) assertWriteMeta(t, wm) wm, err = quotas.Register(qs2, nil) - assert.Nil(err) + must.NoError(t, err) assertWriteMeta(t, wm) // Query the quotas resp, qm, err := quotas.ListUsage(nil) - assert.Nil(err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.Len(resp, 2) + must.Len(t, 2, resp) // Query the quotas using a prefix resp, qm, err = quotas.PrefixListUsage("foo", nil) - assert.Nil(err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.Len(resp, 2) + must.Len(t, 2, resp) // Query the quotas using a prefix resp, qm, err = quotas.PrefixListUsage("foob", nil) - assert.Nil(err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.Len(resp, 1) - assert.Equal(qs2.Name, resp[0].Name) + must.Len(t, 1, resp) + must.Eq(t, qs2.Name, resp[0].Name) } diff --git a/api/raw.go b/api/raw.go index 9369829c511..077f87dd064 100644 --- a/api/raw.go +++ b/api/raw.go @@ -34,5 +34,5 @@ func (raw *Raw) Write(endpoint string, in, out interface{}, q *WriteOptions) (*W // Delete is used to do a DELETE request against an endpoint // and serialize/deserialized using the standard Nomad conventions. func (raw *Raw) Delete(endpoint string, out interface{}, q *WriteOptions) (*WriteMeta, error) { - return raw.c.delete(endpoint, out, q) + return raw.c.delete(endpoint, nil, out, q) } diff --git a/api/regions_test.go b/api/regions_test.go index b500eb1bcd7..72e178cc19e 100644 --- a/api/regions_test.go +++ b/api/regions_test.go @@ -3,8 +3,11 @@ package api import ( "fmt" "testing" + "time" "github.com/hashicorp/nomad/api/internal/testutil" + "github.com/shoenig/test/must" + "github.com/shoenig/test/wait" ) func TestRegionsList(t *testing.T) { @@ -20,24 +23,28 @@ func TestRegionsList(t *testing.T) { defer s2.Stop() // Join the servers - if _, err := c2.Agent().Join(s1.SerfAddr); err != nil { - t.Fatalf("err: %v", err) - } + _, err := c2.Agent().Join(s1.SerfAddr) + must.NoError(t, err) - // Regions returned and sorted - testutil.WaitForResult(func() (bool, error) { + f := func() error { regions, err := c1.Regions().List() if err != nil { - return false, err + return fmt.Errorf("failed to get regions: %w", err) } if n := len(regions); n != 2 { - return false, fmt.Errorf("expected 2 regions, got: %d", n) + return fmt.Errorf("expected 2 regions, got %d", n) } - if regions[0] != "regionA" || regions[1] != "regionB" { - return false, fmt.Errorf("bad: %#v", regions) + if regions[0] != "regionA" { + return fmt.Errorf("unexpected first region, got: %s", regions[0]) } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) + if regions[1] != "regionB" { + return fmt.Errorf("unexpected second region, got: %s", regions[1]) + } + return nil + } + must.Wait(t, wait.InitialSuccess( + wait.ErrorFunc(f), + wait.Timeout(10*time.Second), + wait.Gap(1*time.Second), + )) } diff --git a/api/resources.go b/api/resources.go index b5ada2d9ec1..84d9d190547 100644 --- a/api/resources.go +++ b/api/resources.go @@ -38,7 +38,7 @@ func (r *Resources) Canonicalize() { // CPU will be set to the default if cores is nil above. // If cpu is nil here then cores has been set and cpu should be 0 if r.CPU == nil { - r.CPU = intToPtr(0) + r.CPU = pointerOf(0) } if r.MemoryMB == nil { @@ -55,9 +55,9 @@ func (r *Resources) Canonicalize() { // and should be kept in sync. func DefaultResources() *Resources { return &Resources{ - CPU: intToPtr(100), - Cores: intToPtr(0), - MemoryMB: intToPtr(300), + CPU: pointerOf(100), + Cores: pointerOf(0), + MemoryMB: pointerOf(300), } } @@ -68,9 +68,9 @@ func DefaultResources() *Resources { // IN nomad/structs/structs.go and should be kept in sync. func MinResources() *Resources { return &Resources{ - CPU: intToPtr(1), - Cores: intToPtr(0), - MemoryMB: intToPtr(10), + CPU: pointerOf(1), + Cores: pointerOf(0), + MemoryMB: pointerOf(10), } } @@ -98,9 +98,9 @@ func (r *Resources) Merge(other *Resources) { type Port struct { Label string `hcl:",label"` - Value int `mapstructure:"static" hcl:"static,optional"` - To int `mapstructure:"to" hcl:"to,optional"` - HostNetwork string `mapstructure:"host_network" hcl:"host_network,optional"` + Value int `hcl:"static,optional"` + To int `hcl:"to,optional"` + HostNetwork string `hcl:"host_network,optional"` } type DNSConfig struct { @@ -268,7 +268,7 @@ type RequestedDevice struct { func (d *RequestedDevice) Canonicalize() { if d.Count == nil { - d.Count = uint64ToPtr(1) + d.Count = pointerOf(uint64(1)) } for _, a := range d.Affinities { diff --git a/api/resources_test.go b/api/resources_test.go index 2f9904ba6e5..ac279437efc 100644 --- a/api/resources_test.go +++ b/api/resources_test.go @@ -1,11 +1,10 @@ package api import ( - "reflect" "testing" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/kr/pretty" + "github.com/shoenig/test/must" ) func TestResources_Canonicalize(t *testing.T) { @@ -23,25 +22,25 @@ func TestResources_Canonicalize(t *testing.T) { { name: "cores", input: &Resources{ - Cores: intToPtr(2), - MemoryMB: intToPtr(1024), + Cores: pointerOf(2), + MemoryMB: pointerOf(1024), }, expected: &Resources{ - CPU: intToPtr(0), - Cores: intToPtr(2), - MemoryMB: intToPtr(1024), + CPU: pointerOf(0), + Cores: pointerOf(2), + MemoryMB: pointerOf(1024), }, }, { name: "cpu", input: &Resources{ - CPU: intToPtr(500), - MemoryMB: intToPtr(1024), + CPU: pointerOf(500), + MemoryMB: pointerOf(1024), }, expected: &Resources{ - CPU: intToPtr(500), - Cores: intToPtr(0), - MemoryMB: intToPtr(1024), + CPU: pointerOf(500), + Cores: pointerOf(0), + MemoryMB: pointerOf(1024), }, }, } @@ -49,9 +48,7 @@ func TestResources_Canonicalize(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { tc.input.Canonicalize() - if !reflect.DeepEqual(tc.input, tc.expected) { - t.Fatalf("Name: %v, Diffs:\n%v", tc.name, pretty.Diff(tc.expected, tc.input)) - } + must.Eq(t, tc.expected, tc.input) }) } } diff --git a/api/scaling.go b/api/scaling.go index 2266c895987..32259c9f415 100644 --- a/api/scaling.go +++ b/api/scaling.go @@ -35,7 +35,7 @@ func (s *Scaling) GetPolicy(id string, q *QueryOptions) (*ScalingPolicy, *QueryM func (p *ScalingPolicy) Canonicalize(taskGroupCount int) { if p.Enabled == nil { - p.Enabled = boolToPtr(true) + p.Enabled = pointerOf(true) } if p.Min == nil { var m int64 = int64(taskGroupCount) diff --git a/api/scaling_test.go b/api/scaling_test.go index 0d4a703c6ba..5801dc56777 100644 --- a/api/scaling_test.go +++ b/api/scaling_test.go @@ -4,12 +4,11 @@ import ( "testing" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" ) func TestScalingPolicies_ListPolicies(t *testing.T) { testutil.Parallel(t) - require := require.New(t) c, s := makeClient(t, nil, nil) defer s.Stop() @@ -18,23 +17,21 @@ func TestScalingPolicies_ListPolicies(t *testing.T) { // Check that we don't have any scaling policies before registering a job that has one policies, _, err := scaling.ListPolicies(nil) - require.NoError(err) - require.Empty(policies, "expected 0 scaling policies, got: %d", len(policies)) + must.NoError(t, err) + must.SliceEmpty(t, policies) // Register a job with a scaling policy job := testJob() job.TaskGroups[0].Scaling = &ScalingPolicy{ - Max: int64ToPtr(100), + Max: pointerOf(int64(100)), } _, _, err = jobs.Register(job, nil) - require.NoError(err) + must.NoError(t, err) // Check that we have a scaling policy now policies, _, err = scaling.ListPolicies(nil) - require.NoError(err) - if len(policies) != 1 { - t.Fatalf("expected 1 scaling policy, got: %d", len(policies)) - } + must.NoError(t, err) + must.Len(t, 1, policies) policy := policies[0] @@ -43,21 +40,20 @@ func TestScalingPolicies_ListPolicies(t *testing.T) { if job.Namespace != nil && *job.Namespace != "" { namespace = *job.Namespace } - require.Equal(policy.Target["Namespace"], namespace) + must.Eq(t, policy.Target["Namespace"], namespace) // Check that the scaling policy references the right job - require.Equal(policy.Target["Job"], *job.ID) + must.Eq(t, policy.Target["Job"], *job.ID) // Check that the scaling policy references the right group - require.Equal(policy.Target["Group"], *job.TaskGroups[0].Name) + must.Eq(t, policy.Target["Group"], *job.TaskGroups[0].Name) // Check that the scaling policy has the right type - require.Equal(ScalingPolicyTypeHorizontal, policy.Type) + must.Eq(t, ScalingPolicyTypeHorizontal, policy.Type) } func TestScalingPolicies_GetPolicy(t *testing.T) { testutil.Parallel(t) - require := require.New(t) c, s := makeClient(t, nil, nil) defer s.Stop() @@ -66,32 +62,30 @@ func TestScalingPolicies_GetPolicy(t *testing.T) { // Empty ID should return 404 _, _, err := scaling.GetPolicy("", nil) - require.Error(err) - require.Containsf(err.Error(), "404", "expected 404 error, got: %s", err.Error()) + must.ErrorContains(t, err, "404") - // Inexistent ID should return 404 - _, _, err = scaling.GetPolicy("i-dont-exist", nil) - require.Error(err) - require.Containsf(err.Error(), "404", "expected 404 error, got: %s", err.Error()) + // Non-existent ID should return 404 + _, _, err = scaling.GetPolicy("i-do-not-exist", nil) + must.ErrorContains(t, err, "404") // Register a job with a scaling policy job := testJob() policy := &ScalingPolicy{ - Enabled: boolToPtr(true), - Min: int64ToPtr(1), - Max: int64ToPtr(1), + Enabled: pointerOf(true), + Min: pointerOf(int64(1)), + Max: pointerOf(int64(1)), Policy: map[string]interface{}{ "key": "value", }, } job.TaskGroups[0].Scaling = policy _, _, err = jobs.Register(job, nil) - require.NoError(err) + must.NoError(t, err) // Find newly created scaling policy ID var policyID string policies, _, err := scaling.ListPolicies(nil) - require.NoError(err) + must.NoError(t, err) for _, p := range policies { if p.Target["Job"] == *job.ID { policyID = p.ID @@ -104,7 +98,7 @@ func TestScalingPolicies_GetPolicy(t *testing.T) { // Fetch scaling policy resp, _, err := scaling.GetPolicy(policyID, nil) - require.NoError(err) + must.NoError(t, err) // Check that the scaling policy fields match namespace := DefaultNamespace @@ -116,10 +110,10 @@ func TestScalingPolicies_GetPolicy(t *testing.T) { "Job": *job.ID, "Group": *job.TaskGroups[0].Name, } - require.Equal(expectedTarget, resp.Target) - require.Equal(policy.Policy, resp.Policy) - require.Equal(policy.Enabled, resp.Enabled) - require.Equal(*policy.Min, *resp.Min) - require.Equal(policy.Max, resp.Max) - require.Equal(ScalingPolicyTypeHorizontal, resp.Type) + must.Eq(t, expectedTarget, resp.Target) + must.Eq(t, policy.Policy, resp.Policy) + must.Eq(t, policy.Enabled, resp.Enabled) + must.Eq(t, *policy.Min, *resp.Min) + must.Eq(t, policy.Max, resp.Max) + must.Eq(t, ScalingPolicyTypeHorizontal, resp.Type) } diff --git a/api/search_test.go b/api/search_test.go index f262faaf649..3041b74debe 100644 --- a/api/search_test.go +++ b/api/search_test.go @@ -5,7 +5,7 @@ import ( "github.com/hashicorp/nomad/api/contexts" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" ) func TestSearch_PrefixSearch(t *testing.T) { @@ -16,18 +16,18 @@ func TestSearch_PrefixSearch(t *testing.T) { job := testJob() _, _, err := c.Jobs().Register(job, nil) - require.NoError(t, err) + must.NoError(t, err) id := *job.ID prefix := id[:len(id)-2] resp, qm, err := c.Search().PrefixSearch(prefix, contexts.Jobs, nil) - require.NoError(t, err) - require.NotNil(t, qm) - require.NotNil(t, resp) + must.NoError(t, err) + must.NotNil(t, qm) + must.NotNil(t, resp) jobMatches := resp.Matches[contexts.Jobs] - require.Len(t, jobMatches, 1) - require.Equal(t, id, jobMatches[0]) + must.Len(t, 1, jobMatches) + must.Eq(t, id, jobMatches[0]) } func TestSearch_FuzzySearch(t *testing.T) { @@ -38,17 +38,15 @@ func TestSearch_FuzzySearch(t *testing.T) { job := testJob() _, _, err := c.Jobs().Register(job, nil) - require.NoError(t, err) + must.NoError(t, err) resp, qm, err := c.Search().FuzzySearch("bin", contexts.All, nil) - require.NoError(t, err) - require.NotNil(t, qm) - require.NotNil(t, resp) + must.NoError(t, err) + must.NotNil(t, qm) + must.NotNil(t, resp) commandMatches := resp.Matches[contexts.Commands] - require.Len(t, commandMatches, 1) - require.Equal(t, "/bin/sleep", commandMatches[0].ID) - require.Equal(t, []string{ - "default", *job.ID, "group1", "task1", - }, commandMatches[0].Scope) + must.Len(t, 1, commandMatches) + must.Eq(t, "/bin/sleep", commandMatches[0].ID) + must.Eq(t, []string{"default", *job.ID, "group1", "task1"}, commandMatches[0].Scope) } diff --git a/api/sentinel.go b/api/sentinel.go index c1e52c7cb81..74c88dd6384 100644 --- a/api/sentinel.go +++ b/api/sentinel.go @@ -1,6 +1,8 @@ package api -import "fmt" +import ( + "errors" +) // SentinelPolicies is used to query the Sentinel Policy endpoints. type SentinelPolicies struct { @@ -25,7 +27,7 @@ func (a *SentinelPolicies) List(q *QueryOptions) ([]*SentinelPolicyListStub, *Qu // Upsert is used to create or update a policy func (a *SentinelPolicies) Upsert(policy *SentinelPolicy, q *WriteOptions) (*WriteMeta, error) { if policy == nil || policy.Name == "" { - return nil, fmt.Errorf("missing policy name") + return nil, errors.New("missing policy name") } wm, err := a.client.write("/v1/sentinel/policy/"+policy.Name, policy, nil, q) if err != nil { @@ -37,9 +39,9 @@ func (a *SentinelPolicies) Upsert(policy *SentinelPolicy, q *WriteOptions) (*Wri // Delete is used to delete a policy func (a *SentinelPolicies) Delete(policyName string, q *WriteOptions) (*WriteMeta, error) { if policyName == "" { - return nil, fmt.Errorf("missing policy name") + return nil, errors.New("missing policy name") } - wm, err := a.client.delete("/v1/sentinel/policy/"+policyName, nil, q) + wm, err := a.client.delete("/v1/sentinel/policy/"+policyName, nil, nil, q) if err != nil { return nil, err } @@ -49,7 +51,7 @@ func (a *SentinelPolicies) Delete(policyName string, q *WriteOptions) (*WriteMet // Info is used to query a specific policy func (a *SentinelPolicies) Info(policyName string, q *QueryOptions) (*SentinelPolicy, *QueryMeta, error) { if policyName == "" { - return nil, nil, fmt.Errorf("missing policy name") + return nil, nil, errors.New("missing policy name") } var resp SentinelPolicy wm, err := a.client.query("/v1/sentinel/policy/"+policyName, &resp, q) diff --git a/api/sentinel_test.go b/api/sentinel_test.go index 638c4191eb6..22d7708e59b 100644 --- a/api/sentinel_test.go +++ b/api/sentinel_test.go @@ -1,5 +1,4 @@ //go:build ent -// +build ent package api @@ -7,7 +6,7 @@ import ( "testing" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/stretchr/testify/assert" + "github.com/shoenig/test/must" ) func TestSentinelPolicies_ListUpsert(t *testing.T) { @@ -18,15 +17,9 @@ func TestSentinelPolicies_ListUpsert(t *testing.T) { // Listing when nothing exists returns empty result, qm, err := ap.List(nil) - if err != nil { - t.Fatalf("err: %s", err) - } - if qm.LastIndex != 1 { - t.Fatalf("bad index: %d", qm.LastIndex) - } - if n := len(result); n != 0 { - t.Fatalf("expected 0 policies, got: %d", n) - } + must.NoError(t, err) + must.Positive(t, qm.LastIndex) + must.SliceEmpty(t, result) // Register a policy policy := &SentinelPolicy{ @@ -37,22 +30,19 @@ func TestSentinelPolicies_ListUpsert(t *testing.T) { Policy: "main = rule { true }", } wm, err := ap.Upsert(policy, nil) - assert.Nil(t, err) + must.NoError(t, err) assertWriteMeta(t, wm) // Check the list again result, qm, err = ap.List(nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertQueryMeta(t, qm) - if len(result) != 1 { - t.Fatalf("expected policy, got: %#v", result) - } + must.Len(t, 1, result) } func TestSentinelPolicies_Delete(t *testing.T) { testutil.Parallel(t) + c, s, _ := makeACLClient(t, nil, nil) defer s.Stop() ap := c.SentinelPolicies() @@ -66,27 +56,24 @@ func TestSentinelPolicies_Delete(t *testing.T) { Policy: "main = rule { true } ", } wm, err := ap.Upsert(policy, nil) - assert.Nil(t, err) + must.NoError(t, err) assertWriteMeta(t, wm) // Delete the policy wm, err = ap.Delete(policy.Name, nil) - assert.Nil(t, err) + must.NoError(t, err) assertWriteMeta(t, wm) // Check the list again result, qm, err := ap.List(nil) - if err != nil { - t.Fatalf("err: %s", err) - } + must.NoError(t, err) assertQueryMeta(t, qm) - if len(result) != 0 { - t.Fatalf("unexpected policy, got: %#v", result) - } + must.SliceEmpty(t, result) } func TestSentinelPolicies_Info(t *testing.T) { testutil.Parallel(t) + c, s, _ := makeACLClient(t, nil, nil) defer s.Stop() ap := c.SentinelPolicies() @@ -100,12 +87,12 @@ func TestSentinelPolicies_Info(t *testing.T) { Policy: "main = rule { true }", } wm, err := ap.Upsert(policy, nil) - assert.Nil(t, err) + must.NoError(t, err) assertWriteMeta(t, wm) // Query the policy out, qm, err := ap.Info(policy.Name, nil) - assert.Nil(t, err) + must.NoError(t, err) assertQueryMeta(t, qm) - assert.Equal(t, policy.Name, out.Name) + must.Eq(t, policy.Name, out.Name) } diff --git a/api/services.go b/api/services.go index 0f4ea3d2eca..d23b207fd3a 100644 --- a/api/services.go +++ b/api/services.go @@ -122,7 +122,7 @@ func (s *Services) Get(serviceName string, q *QueryOptions) ([]*ServiceRegistrat // by its service name and service ID. func (s *Services) Delete(serviceName, serviceID string, q *WriteOptions) (*WriteMeta, error) { path := fmt.Sprintf("/v1/service/%s/%s", url.PathEscape(serviceName), url.PathEscape(serviceID)) - wm, err := s.client.delete(path, nil, q) + wm, err := s.client.delete(path, nil, nil, q) if err != nil { return nil, err } @@ -144,7 +144,7 @@ func (c *CheckRestart) Canonicalize() { } if c.Grace == nil { - c.Grace = timeToPtr(1 * time.Second) + c.Grace = pointerOf(1 * time.Second) } } diff --git a/api/services_test.go b/api/services_test.go index c9ef884e697..96206e6b613 100644 --- a/api/services_test.go +++ b/api/services_test.go @@ -6,7 +6,7 @@ import ( "time" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" ) func TestServiceRegistrations_List(t *testing.T) { @@ -21,28 +21,29 @@ func TestServiceRegistrations_Delete(t *testing.T) { // TODO(jrasell) add tests once registration process is in place. } - func TestService_Canonicalize(t *testing.T) { testutil.Parallel(t) - j := &Job{Name: stringToPtr("job")} - tg := &TaskGroup{Name: stringToPtr("group")} + j := &Job{Name: pointerOf("job")} + tg := &TaskGroup{Name: pointerOf("group")} task := &Task{Name: "task"} s := &Service{} s.Canonicalize(task, tg, j) - require.Equal(t, fmt.Sprintf("%s-%s-%s", *j.Name, *tg.Name, task.Name), s.Name) - require.Equal(t, "auto", s.AddressMode) - require.Equal(t, OnUpdateRequireHealthy, s.OnUpdate) - require.Equal(t, ServiceProviderConsul, s.Provider) + must.Eq(t, fmt.Sprintf("%s-%s-%s", *j.Name, *tg.Name, task.Name), s.Name) + must.Eq(t, "auto", s.AddressMode) + must.Eq(t, OnUpdateRequireHealthy, s.OnUpdate) + must.Eq(t, ServiceProviderConsul, s.Provider) + must.Nil(t, s.Meta) + must.Nil(t, s.CanaryMeta) } func TestServiceCheck_Canonicalize(t *testing.T) { testutil.Parallel(t) - j := &Job{Name: stringToPtr("job")} - tg := &TaskGroup{Name: stringToPtr("group")} + j := &Job{Name: pointerOf("job")} + tg := &TaskGroup{Name: pointerOf("group")} task := &Task{Name: "task"} s := &Service{ Checks: []ServiceCheck{ @@ -53,15 +54,14 @@ func TestServiceCheck_Canonicalize(t *testing.T) { } s.Canonicalize(task, tg, j) - - require.Equal(t, OnUpdateRequireHealthy, s.Checks[0].OnUpdate) + must.Eq(t, OnUpdateRequireHealthy, s.Checks[0].OnUpdate) } func TestService_Check_PassFail(t *testing.T) { testutil.Parallel(t) - job := &Job{Name: stringToPtr("job")} - tg := &TaskGroup{Name: stringToPtr("group")} + job := &Job{Name: pointerOf("job")} + tg := &TaskGroup{Name: pointerOf("group")} task := &Task{Name: "task"} t.Run("enforce minimums", func(t *testing.T) { @@ -73,8 +73,8 @@ func TestService_Check_PassFail(t *testing.T) { } s.Canonicalize(task, tg, job) - require.Zero(t, s.Checks[0].SuccessBeforePassing) - require.Zero(t, s.Checks[0].FailuresBeforeCritical) + must.Zero(t, s.Checks[0].SuccessBeforePassing) + must.Zero(t, s.Checks[0].FailuresBeforeCritical) }) t.Run("normal", func(t *testing.T) { @@ -86,8 +86,8 @@ func TestService_Check_PassFail(t *testing.T) { } s.Canonicalize(task, tg, job) - require.Equal(t, 3, s.Checks[0].SuccessBeforePassing) - require.Equal(t, 4, s.Checks[0].FailuresBeforeCritical) + must.Eq(t, 3, s.Checks[0].SuccessBeforePassing) + must.Eq(t, 4, s.Checks[0].FailuresBeforeCritical) }) } @@ -96,13 +96,13 @@ func TestService_Check_PassFail(t *testing.T) { func TestService_CheckRestart(t *testing.T) { testutil.Parallel(t) - job := &Job{Name: stringToPtr("job")} - tg := &TaskGroup{Name: stringToPtr("group")} + job := &Job{Name: pointerOf("job")} + tg := &TaskGroup{Name: pointerOf("group")} task := &Task{Name: "task"} service := &Service{ CheckRestart: &CheckRestart{ Limit: 11, - Grace: timeToPtr(11 * time.Second), + Grace: pointerOf(11 * time.Second), IgnoreWarnings: true, }, Checks: []ServiceCheck{ @@ -110,7 +110,7 @@ func TestService_CheckRestart(t *testing.T) { Name: "all-set", CheckRestart: &CheckRestart{ Limit: 22, - Grace: timeToPtr(22 * time.Second), + Grace: pointerOf(22 * time.Second), IgnoreWarnings: true, }, }, @@ -118,7 +118,7 @@ func TestService_CheckRestart(t *testing.T) { Name: "some-set", CheckRestart: &CheckRestart{ Limit: 33, - Grace: timeToPtr(33 * time.Second), + Grace: pointerOf(33 * time.Second), }, }, { @@ -128,24 +128,24 @@ func TestService_CheckRestart(t *testing.T) { } service.Canonicalize(task, tg, job) - require.Equal(t, service.Checks[0].CheckRestart.Limit, 22) - require.Equal(t, *service.Checks[0].CheckRestart.Grace, 22*time.Second) - require.True(t, service.Checks[0].CheckRestart.IgnoreWarnings) + must.Eq(t, 22, service.Checks[0].CheckRestart.Limit) + must.Eq(t, 22*time.Second, *service.Checks[0].CheckRestart.Grace) + must.True(t, service.Checks[0].CheckRestart.IgnoreWarnings) - require.Equal(t, service.Checks[1].CheckRestart.Limit, 33) - require.Equal(t, *service.Checks[1].CheckRestart.Grace, 33*time.Second) - require.True(t, service.Checks[1].CheckRestart.IgnoreWarnings) + must.Eq(t, 33, service.Checks[1].CheckRestart.Limit) + must.Eq(t, 33*time.Second, *service.Checks[1].CheckRestart.Grace) + must.True(t, service.Checks[1].CheckRestart.IgnoreWarnings) - require.Equal(t, service.Checks[2].CheckRestart.Limit, 11) - require.Equal(t, *service.Checks[2].CheckRestart.Grace, 11*time.Second) - require.True(t, service.Checks[2].CheckRestart.IgnoreWarnings) + must.Eq(t, 11, service.Checks[2].CheckRestart.Limit) + must.Eq(t, 11*time.Second, *service.Checks[2].CheckRestart.Grace) + must.True(t, service.Checks[2].CheckRestart.IgnoreWarnings) } func TestService_Connect_proxy_settings(t *testing.T) { testutil.Parallel(t) - job := &Job{Name: stringToPtr("job")} - tg := &TaskGroup{Name: stringToPtr("group")} + job := &Job{Name: pointerOf("job")} + tg := &TaskGroup{Name: pointerOf("group")} task := &Task{Name: "task"} service := &Service{ Connect: &ConsulConnect{ @@ -167,20 +167,19 @@ func TestService_Connect_proxy_settings(t *testing.T) { service.Canonicalize(task, tg, job) proxy := service.Connect.SidecarService.Proxy - require.Equal(t, proxy.Upstreams[0].DestinationName, "upstream") - require.Equal(t, proxy.Upstreams[0].LocalBindPort, 80) - require.Equal(t, proxy.Upstreams[0].Datacenter, "dc2") - require.Equal(t, proxy.Upstreams[0].LocalBindAddress, "127.0.0.2") - require.Equal(t, proxy.LocalServicePort, 8000) + must.Eq(t, "upstream", proxy.Upstreams[0].DestinationName) + must.Eq(t, 80, proxy.Upstreams[0].LocalBindPort) + must.Eq(t, "dc2", proxy.Upstreams[0].Datacenter) + must.Eq(t, "127.0.0.2", proxy.Upstreams[0].LocalBindAddress) + must.Eq(t, 8000, proxy.LocalServicePort) } func TestService_Tags(t *testing.T) { testutil.Parallel(t) - r := require.New(t) // canonicalize does not modify eto or tags - job := &Job{Name: stringToPtr("job")} - tg := &TaskGroup{Name: stringToPtr("group")} + job := &Job{Name: pointerOf("job")} + tg := &TaskGroup{Name: pointerOf("group")} task := &Task{Name: "task"} service := &Service{ Tags: []string{"a", "b"}, @@ -189,7 +188,7 @@ func TestService_Tags(t *testing.T) { } service.Canonicalize(task, tg, job) - r.True(service.EnableTagOverride) - r.Equal([]string{"a", "b"}, service.Tags) - r.Equal([]string{"c", "d"}, service.CanaryTags) -} \ No newline at end of file + must.True(t, service.EnableTagOverride) + must.Eq(t, []string{"a", "b"}, service.Tags) + must.Eq(t, []string{"c", "d"}, service.CanaryTags) +} diff --git a/api/status_test.go b/api/status_test.go index bbb19a599e8..16297218868 100644 --- a/api/status_test.go +++ b/api/status_test.go @@ -4,20 +4,18 @@ import ( "testing" "github.com/hashicorp/nomad/api/internal/testutil" + "github.com/shoenig/test/must" ) func TestStatus_Leader(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() status := c.Status() // Query for leader status should return a result out, err := status.Leader() - if err != nil { - t.Fatalf("err: %s", err) - } - if out == "" { - t.Fatalf("expected leader, got: %q", out) - } + must.NoError(t, err) + must.NotEq(t, "", out) } diff --git a/api/system_test.go b/api/system_test.go index ae34f6848c3..623af15a8b9 100644 --- a/api/system_test.go +++ b/api/system_test.go @@ -4,14 +4,15 @@ import ( "testing" "github.com/hashicorp/nomad/api/internal/testutil" + "github.com/shoenig/test/must" ) func TestSystem_GarbageCollect(t *testing.T) { testutil.Parallel(t) + c, s := makeClient(t, nil, nil) defer s.Stop() e := c.System() - if err := e.GarbageCollect(); err != nil { - t.Fatal(err) - } + err := e.GarbageCollect() + must.NoError(t, err) } diff --git a/api/tasks.go b/api/tasks.go index d03a4cb4a9a..2df82e4d9d9 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -170,13 +170,13 @@ func NewAffinity(lTarget string, operand string, rTarget string, weight int8) *A LTarget: lTarget, RTarget: rTarget, Operand: operand, - Weight: int8ToPtr(weight), + Weight: pointerOf(int8(weight)), } } func (a *Affinity) Canonicalize() { if a.Weight == nil { - a.Weight = int8ToPtr(50) + a.Weight = pointerOf(int8(50)) } } @@ -187,35 +187,35 @@ func NewDefaultReschedulePolicy(jobType string) *ReschedulePolicy { // This needs to be in sync with DefaultServiceJobReschedulePolicy // in nomad/structs/structs.go dp = &ReschedulePolicy{ - Delay: timeToPtr(30 * time.Second), - DelayFunction: stringToPtr("exponential"), - MaxDelay: timeToPtr(1 * time.Hour), - Unlimited: boolToPtr(true), + Delay: pointerOf(30 * time.Second), + DelayFunction: pointerOf("exponential"), + MaxDelay: pointerOf(1 * time.Hour), + Unlimited: pointerOf(true), - Attempts: intToPtr(0), - Interval: timeToPtr(0), + Attempts: pointerOf(0), + Interval: pointerOf(time.Duration(0)), } case "batch": // This needs to be in sync with DefaultBatchJobReschedulePolicy // in nomad/structs/structs.go dp = &ReschedulePolicy{ - Attempts: intToPtr(1), - Interval: timeToPtr(24 * time.Hour), - Delay: timeToPtr(5 * time.Second), - DelayFunction: stringToPtr("constant"), + Attempts: pointerOf(1), + Interval: pointerOf(24 * time.Hour), + Delay: pointerOf(5 * time.Second), + DelayFunction: pointerOf("constant"), - MaxDelay: timeToPtr(0), - Unlimited: boolToPtr(false), + MaxDelay: pointerOf(time.Duration(0)), + Unlimited: pointerOf(false), } case "system": dp = &ReschedulePolicy{ - Attempts: intToPtr(0), - Interval: timeToPtr(0), - Delay: timeToPtr(0), - DelayFunction: stringToPtr(""), - MaxDelay: timeToPtr(0), - Unlimited: boolToPtr(false), + Attempts: pointerOf(0), + Interval: pointerOf(time.Duration(0)), + Delay: pointerOf(time.Duration(0)), + DelayFunction: pointerOf(""), + MaxDelay: pointerOf(time.Duration(0)), + Unlimited: pointerOf(false), } default: @@ -223,12 +223,12 @@ func NewDefaultReschedulePolicy(jobType string) *ReschedulePolicy { // function and we need to ensure a non-nil object is returned so that // the canonicalization runs without panicking. dp = &ReschedulePolicy{ - Attempts: intToPtr(0), - Interval: timeToPtr(0), - Delay: timeToPtr(0), - DelayFunction: stringToPtr(""), - MaxDelay: timeToPtr(0), - Unlimited: boolToPtr(false), + Attempts: pointerOf(0), + Interval: pointerOf(time.Duration(0)), + Delay: pointerOf(time.Duration(0)), + DelayFunction: pointerOf(""), + MaxDelay: pointerOf(time.Duration(0)), + Unlimited: pointerOf(false), } } return dp @@ -276,14 +276,14 @@ func NewSpreadTarget(value string, percent uint8) *SpreadTarget { func NewSpread(attribute string, weight int8, spreadTargets []*SpreadTarget) *Spread { return &Spread{ Attribute: attribute, - Weight: int8ToPtr(weight), + Weight: pointerOf(int8(weight)), SpreadTarget: spreadTargets, } } func (s *Spread) Canonicalize() { if s.Weight == nil { - s.Weight = int8ToPtr(50) + s.Weight = pointerOf(int8(50)) } } @@ -296,21 +296,21 @@ type EphemeralDisk struct { func DefaultEphemeralDisk() *EphemeralDisk { return &EphemeralDisk{ - Sticky: boolToPtr(false), - Migrate: boolToPtr(false), - SizeMB: intToPtr(300), + Sticky: pointerOf(false), + Migrate: pointerOf(false), + SizeMB: pointerOf(300), } } func (e *EphemeralDisk) Canonicalize() { if e.Sticky == nil { - e.Sticky = boolToPtr(false) + e.Sticky = pointerOf(false) } if e.Migrate == nil { - e.Migrate = boolToPtr(false) + e.Migrate = pointerOf(false) } if e.SizeMB == nil { - e.SizeMB = intToPtr(300) + e.SizeMB = pointerOf(300) } } @@ -325,10 +325,10 @@ type MigrateStrategy struct { func DefaultMigrateStrategy() *MigrateStrategy { return &MigrateStrategy{ - MaxParallel: intToPtr(1), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(10 * time.Second), - HealthyDeadline: timeToPtr(5 * time.Minute), + MaxParallel: pointerOf(1), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(10 * time.Second), + HealthyDeadline: pointerOf(5 * time.Minute), } } @@ -405,10 +405,10 @@ type VolumeMount struct { func (vm *VolumeMount) Canonicalize() { if vm.PropagationMode == nil { - vm.PropagationMode = stringToPtr(VolumeMountPropagationPrivate) + vm.PropagationMode = pointerOf(VolumeMountPropagationPrivate) } if vm.ReadOnly == nil { - vm.ReadOnly = boolToPtr(false) + vm.ReadOnly = pointerOf(false) } } @@ -439,22 +439,22 @@ type TaskGroup struct { // NewTaskGroup creates a new TaskGroup. func NewTaskGroup(name string, count int) *TaskGroup { return &TaskGroup{ - Name: stringToPtr(name), - Count: intToPtr(count), + Name: pointerOf(name), + Count: pointerOf(count), } } // Canonicalize sets defaults and merges settings that should be inherited from the job func (g *TaskGroup) Canonicalize(job *Job) { if g.Name == nil { - g.Name = stringToPtr("") + g.Name = pointerOf("") } if g.Count == nil { if g.Scaling != nil && g.Scaling.Min != nil { - g.Count = intToPtr(int(*g.Scaling.Min)) + g.Count = pointerOf(int(*g.Scaling.Min)) } else { - g.Count = intToPtr(1) + g.Count = pointerOf(1) } } if g.Scaling != nil { @@ -558,10 +558,10 @@ func (g *TaskGroup) Canonicalize(job *Job) { // in nomad/structs/structs.go func defaultServiceJobRestartPolicy() *RestartPolicy { return &RestartPolicy{ - Delay: timeToPtr(15 * time.Second), - Attempts: intToPtr(2), - Interval: timeToPtr(30 * time.Minute), - Mode: stringToPtr(RestartPolicyModeFail), + Delay: pointerOf(15 * time.Second), + Attempts: pointerOf(2), + Interval: pointerOf(30 * time.Minute), + Mode: pointerOf(RestartPolicyModeFail), } } @@ -569,10 +569,10 @@ func defaultServiceJobRestartPolicy() *RestartPolicy { // in nomad/structs/structs.go func defaultBatchJobRestartPolicy() *RestartPolicy { return &RestartPolicy{ - Delay: timeToPtr(15 * time.Second), - Attempts: intToPtr(3), - Interval: timeToPtr(24 * time.Hour), - Mode: stringToPtr(RestartPolicyModeFail), + Delay: pointerOf(15 * time.Second), + Attempts: pointerOf(3), + Interval: pointerOf(24 * time.Hour), + Mode: pointerOf(RestartPolicyModeFail), } } @@ -623,17 +623,17 @@ type LogConfig struct { func DefaultLogConfig() *LogConfig { return &LogConfig{ - MaxFiles: intToPtr(10), - MaxFileSizeMB: intToPtr(10), + MaxFiles: pointerOf(10), + MaxFileSizeMB: pointerOf(10), } } func (l *LogConfig) Canonicalize() { if l.MaxFiles == nil { - l.MaxFiles = intToPtr(10) + l.MaxFiles = pointerOf(10) } if l.MaxFileSizeMB == nil { - l.MaxFileSizeMB = intToPtr(10) + l.MaxFileSizeMB = pointerOf(10) } } @@ -694,7 +694,7 @@ func (t *Task) Canonicalize(tg *TaskGroup, job *Job) { t.Resources.Canonicalize() if t.KillTimeout == nil { - t.KillTimeout = timeToPtr(5 * time.Second) + t.KillTimeout = pointerOf(5 * time.Second) } if t.LogConfig == nil { t.LogConfig = DefaultLogConfig() @@ -746,11 +746,11 @@ type TaskArtifact struct { func (a *TaskArtifact) Canonicalize() { if a.GetterMode == nil { - a.GetterMode = stringToPtr("any") + a.GetterMode = pointerOf("any") } if a.GetterSource == nil { // Shouldn't be possible, but we don't want to panic - a.GetterSource = stringToPtr("") + a.GetterSource = pointerOf("") } if len(a.GetterOptions) == 0 { a.GetterOptions = nil @@ -768,7 +768,7 @@ func (a *TaskArtifact) Canonicalize() { a.RelativeDest = &dest default: // Default to a directory - a.RelativeDest = stringToPtr("local/") + a.RelativeDest = pointerOf("local/") } } } @@ -791,14 +791,39 @@ func (wc *WaitConfig) Copy() *WaitConfig { return nwc } +type ChangeScript struct { + Command *string `mapstructure:"command" hcl:"command"` + Args []string `mapstructure:"args" hcl:"args,optional"` + Timeout *time.Duration `mapstructure:"timeout" hcl:"timeout,optional"` + FailOnError *bool `mapstructure:"fail_on_error" hcl:"fail_on_error"` +} + +func (ch *ChangeScript) Canonicalize() { + if ch.Command == nil { + ch.Command = pointerOf("") + } + if ch.Args == nil { + ch.Args = []string{} + } + if ch.Timeout == nil { + ch.Timeout = pointerOf(5 * time.Second) + } + if ch.FailOnError == nil { + ch.FailOnError = pointerOf(false) + } +} + type Template struct { SourcePath *string `mapstructure:"source" hcl:"source,optional"` DestPath *string `mapstructure:"destination" hcl:"destination,optional"` EmbeddedTmpl *string `mapstructure:"data" hcl:"data,optional"` ChangeMode *string `mapstructure:"change_mode" hcl:"change_mode,optional"` + ChangeScript *ChangeScript `mapstructure:"change_script" hcl:"change_script,block"` ChangeSignal *string `mapstructure:"change_signal" hcl:"change_signal,optional"` Splay *time.Duration `mapstructure:"splay" hcl:"splay,optional"` Perms *string `mapstructure:"perms" hcl:"perms,optional"` + Uid *int `mapstructure:"uid" hcl:"uid,optional"` + Gid *int `mapstructure:"gid" hcl:"gid,optional"` LeftDelim *string `mapstructure:"left_delimiter" hcl:"left_delimiter,optional"` RightDelim *string `mapstructure:"right_delimiter" hcl:"right_delimiter,optional"` Envvars *bool `mapstructure:"env" hcl:"env,optional"` @@ -808,46 +833,49 @@ type Template struct { func (tmpl *Template) Canonicalize() { if tmpl.SourcePath == nil { - tmpl.SourcePath = stringToPtr("") + tmpl.SourcePath = pointerOf("") } if tmpl.DestPath == nil { - tmpl.DestPath = stringToPtr("") + tmpl.DestPath = pointerOf("") } if tmpl.EmbeddedTmpl == nil { - tmpl.EmbeddedTmpl = stringToPtr("") + tmpl.EmbeddedTmpl = pointerOf("") } if tmpl.ChangeMode == nil { - tmpl.ChangeMode = stringToPtr("restart") + tmpl.ChangeMode = pointerOf("restart") } if tmpl.ChangeSignal == nil { if *tmpl.ChangeMode == "signal" { - tmpl.ChangeSignal = stringToPtr("SIGHUP") + tmpl.ChangeSignal = pointerOf("SIGHUP") } else { - tmpl.ChangeSignal = stringToPtr("") + tmpl.ChangeSignal = pointerOf("") } } else { sig := *tmpl.ChangeSignal - tmpl.ChangeSignal = stringToPtr(strings.ToUpper(sig)) + tmpl.ChangeSignal = pointerOf(strings.ToUpper(sig)) + } + if tmpl.ChangeScript != nil { + tmpl.ChangeScript.Canonicalize() } if tmpl.Splay == nil { - tmpl.Splay = timeToPtr(5 * time.Second) + tmpl.Splay = pointerOf(5 * time.Second) } if tmpl.Perms == nil { - tmpl.Perms = stringToPtr("0644") + tmpl.Perms = pointerOf("0644") } if tmpl.LeftDelim == nil { - tmpl.LeftDelim = stringToPtr("{{") + tmpl.LeftDelim = pointerOf("{{") } if tmpl.RightDelim == nil { - tmpl.RightDelim = stringToPtr("}}") + tmpl.RightDelim = pointerOf("}}") } if tmpl.Envvars == nil { - tmpl.Envvars = boolToPtr(false) + tmpl.Envvars = pointerOf(false) } //COMPAT(0.12) VaultGrace is deprecated and unused as of Vault 0.5 if tmpl.VaultGrace == nil { - tmpl.VaultGrace = timeToPtr(0) + tmpl.VaultGrace = pointerOf(time.Duration(0)) } } @@ -861,16 +889,16 @@ type Vault struct { func (v *Vault) Canonicalize() { if v.Env == nil { - v.Env = boolToPtr(true) + v.Env = pointerOf(true) } if v.Namespace == nil { - v.Namespace = stringToPtr("") + v.Namespace = pointerOf("") } if v.ChangeMode == nil { - v.ChangeMode = stringToPtr("restart") + v.ChangeMode = pointerOf("restart") } if v.ChangeSignal == nil { - v.ChangeSignal = stringToPtr("SIGHUP") + v.ChangeSignal = pointerOf("SIGHUP") } } @@ -1003,7 +1031,7 @@ type TaskEvent struct { } // CSIPluginType is an enum string that encapsulates the valid options for a -// CSIPlugin stanza's Type. These modes will allow the plugin to be used in +// CSIPlugin block's Type. These modes will allow the plugin to be used in // different ways by the client. type CSIPluginType string @@ -1032,17 +1060,32 @@ type TaskCSIPluginConfig struct { // CSIPluginType instructs Nomad on how to handle processing a plugin Type CSIPluginType `mapstructure:"type" hcl:"type,optional"` - // MountDir is the destination that nomad should mount in its CSI - // directory for the plugin. It will then expect a file called CSISocketName - // to be created by the plugin, and will provide references into - // "MountDir/CSIIntermediaryDirname/VolumeName/AllocID for mounts. - // - // Default is /csi. + // MountDir is the directory (within its container) in which the plugin creates a + // socket (called CSISocketName) for communication with Nomad. Default is /csi. MountDir string `mapstructure:"mount_dir" hcl:"mount_dir,optional"` + + // StagePublishBaseDir is the base directory (within its container) in which the plugin + // mounts volumes being staged and bind mounts volumes being published. + // e.g. staging_target_path = {StagePublishBaseDir}/staging/{volume-id}/{usage-mode} + // e.g. target_path = {StagePublishBaseDir}/per-alloc/{alloc-id}/{volume-id}/{usage-mode} + // Default is /local/csi. + StagePublishBaseDir string `mapstructure:"stage_publish_base_dir" hcl:"stage_publish_base_dir,optional"` + + // HealthTimeout is the time after which the CSI plugin tasks will be killed + // if the CSI Plugin is not healthy. + HealthTimeout time.Duration `mapstructure:"health_timeout" hcl:"health_timeout,optional"` } func (t *TaskCSIPluginConfig) Canonicalize() { if t.MountDir == "" { t.MountDir = "/csi" } + + if t.StagePublishBaseDir == "" { + t.StagePublishBaseDir = filepath.Join("/local", "csi") + } + + if t.HealthTimeout == 0 { + t.HealthTimeout = 30 * time.Second + } } diff --git a/api/tasks_test.go b/api/tasks_test.go index 02e20506a6e..8ae4b10b85c 100644 --- a/api/tasks_test.go +++ b/api/tasks_test.go @@ -2,41 +2,35 @@ package api import ( "path/filepath" - "reflect" "testing" "time" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" ) func TestTaskGroup_NewTaskGroup(t *testing.T) { testutil.Parallel(t) + grp := NewTaskGroup("grp1", 2) expect := &TaskGroup{ - Name: stringToPtr("grp1"), - Count: intToPtr(2), - } - if !reflect.DeepEqual(grp, expect) { - t.Fatalf("expect: %#v, got: %#v", expect, grp) + Name: pointerOf("grp1"), + Count: pointerOf(2), } + must.Eq(t, expect, grp) } func TestTaskGroup_Constrain(t *testing.T) { testutil.Parallel(t) + grp := NewTaskGroup("grp1", 1) // Add a constraint to the group out := grp.Constrain(NewConstraint("kernel.name", "=", "darwin")) - if n := len(grp.Constraints); n != 1 { - t.Fatalf("expected 1 constraint, got: %d", n) - } + must.Len(t, 1, grp.Constraints) // Check that the group was returned - if out != grp { - t.Fatalf("expected: %#v, got: %#v", grp, out) - } + must.Eq(t, grp, out) // Add a second constraint grp.Constrain(NewConstraint("memory.totalbytes", ">=", "128000000")) @@ -52,25 +46,20 @@ func TestTaskGroup_Constrain(t *testing.T) { Operand: ">=", }, } - if !reflect.DeepEqual(grp.Constraints, expect) { - t.Fatalf("expect: %#v, got: %#v", expect, grp.Constraints) - } + must.Eq(t, expect, grp.Constraints) } func TestTaskGroup_AddAffinity(t *testing.T) { testutil.Parallel(t) + grp := NewTaskGroup("grp1", 1) // Add an affinity to the group out := grp.AddAffinity(NewAffinity("kernel.version", "=", "4.6", 100)) - if n := len(grp.Affinities); n != 1 { - t.Fatalf("expected 1 affinity, got: %d", n) - } + must.Len(t, 1, grp.Affinities) // Check that the group was returned - if out != grp { - t.Fatalf("expected: %#v, got: %#v", grp, out) - } + must.Eq(t, grp, out) // Add a second affinity grp.AddAffinity(NewAffinity("${node.affinity}", "=", "dc2", 50)) @@ -79,45 +68,39 @@ func TestTaskGroup_AddAffinity(t *testing.T) { LTarget: "kernel.version", RTarget: "4.6", Operand: "=", - Weight: int8ToPtr(100), + Weight: pointerOf(int8(100)), }, { LTarget: "${node.affinity}", RTarget: "dc2", Operand: "=", - Weight: int8ToPtr(50), + Weight: pointerOf(int8(50)), }, } - if !reflect.DeepEqual(grp.Affinities, expect) { - t.Fatalf("expect: %#v, got: %#v", expect, grp.Constraints) - } + must.Eq(t, expect, grp.Affinities) } func TestTaskGroup_SetMeta(t *testing.T) { testutil.Parallel(t) + grp := NewTaskGroup("grp1", 1) // Initializes an empty map out := grp.SetMeta("foo", "bar") - if grp.Meta == nil { - t.Fatalf("should be initialized") - } + must.NotNil(t, grp.Meta) // Check that we returned the group - if out != grp { - t.Fatalf("expect: %#v, got: %#v", grp, out) - } + must.Eq(t, grp, out) // Add a second meta k/v grp.SetMeta("baz", "zip") expect := map[string]string{"foo": "bar", "baz": "zip"} - if !reflect.DeepEqual(grp.Meta, expect) { - t.Fatalf("expect: %#v, got: %#v", expect, grp.Meta) - } + must.Eq(t, expect, grp.Meta) } func TestTaskGroup_AddSpread(t *testing.T) { testutil.Parallel(t) + grp := NewTaskGroup("grp1", 1) // Create and add spread @@ -125,14 +108,10 @@ func TestTaskGroup_AddSpread(t *testing.T) { spread := NewSpread("${meta.rack}", 100, []*SpreadTarget{spreadTarget}) out := grp.AddSpread(spread) - if n := len(grp.Spreads); n != 1 { - t.Fatalf("expected 1 spread, got: %d", n) - } + must.Len(t, 1, grp.Spreads) // Check that the group was returned - if out != grp { - t.Fatalf("expected: %#v, got: %#v", grp, out) - } + must.Eq(t, grp, out) // Add a second spread spreadTarget2 := NewSpreadTarget("dc1", 100) @@ -143,7 +122,7 @@ func TestTaskGroup_AddSpread(t *testing.T) { expect := []*Spread{ { Attribute: "${meta.rack}", - Weight: int8ToPtr(100), + Weight: pointerOf(int8(100)), SpreadTarget: []*SpreadTarget{ { Value: "r1", @@ -153,7 +132,7 @@ func TestTaskGroup_AddSpread(t *testing.T) { }, { Attribute: "${node.datacenter}", - Weight: int8ToPtr(100), + Weight: pointerOf(int8(100)), SpreadTarget: []*SpreadTarget{ { Value: "dc1", @@ -162,25 +141,20 @@ func TestTaskGroup_AddSpread(t *testing.T) { }, }, } - if !reflect.DeepEqual(grp.Spreads, expect) { - t.Fatalf("expect: %#v, got: %#v", expect, grp.Spreads) - } + must.Eq(t, expect, grp.Spreads) } func TestTaskGroup_AddTask(t *testing.T) { testutil.Parallel(t) + grp := NewTaskGroup("grp1", 1) // Add the task to the task group out := grp.AddTask(NewTask("task1", "java")) - if n := len(grp.Tasks); n != 1 { - t.Fatalf("expected 1 task, got: %d", n) - } + must.Len(t, 1, out.Tasks) // Check that we returned the group - if out != grp { - t.Fatalf("expect: %#v, got: %#v", grp, out) - } + must.Eq(t, grp, out) // Add a second task grp.AddTask(NewTask("task2", "exec")) @@ -194,111 +168,92 @@ func TestTaskGroup_AddTask(t *testing.T) { Driver: "exec", }, } - if !reflect.DeepEqual(grp.Tasks, expect) { - t.Fatalf("expect: %#v, got: %#v", expect, grp.Tasks) - } + must.Eq(t, expect, grp.Tasks) } func TestTask_NewTask(t *testing.T) { testutil.Parallel(t) + task := NewTask("task1", "exec") expect := &Task{ Name: "task1", Driver: "exec", } - if !reflect.DeepEqual(task, expect) { - t.Fatalf("expect: %#v, got: %#v", expect, task) - } + must.Eq(t, expect, task) } func TestTask_SetConfig(t *testing.T) { testutil.Parallel(t) + task := NewTask("task1", "exec") // Initializes an empty map out := task.SetConfig("foo", "bar") - if task.Config == nil { - t.Fatalf("should be initialized") - } + must.NotNil(t, task.Config) // Check that we returned the task - if out != task { - t.Fatalf("expect: %#v, got: %#v", task, out) - } + must.Eq(t, task, out) // Set another config value task.SetConfig("baz", "zip") expect := map[string]interface{}{"foo": "bar", "baz": "zip"} - if !reflect.DeepEqual(task.Config, expect) { - t.Fatalf("expect: %#v, got: %#v", expect, task.Config) - } + must.Eq(t, expect, task.Config) } func TestTask_SetMeta(t *testing.T) { testutil.Parallel(t) + task := NewTask("task1", "exec") // Initializes an empty map out := task.SetMeta("foo", "bar") - if task.Meta == nil { - t.Fatalf("should be initialized") - } + must.NotNil(t, out) // Check that we returned the task - if out != task { - t.Fatalf("expect: %#v, got: %#v", task, out) - } + must.Eq(t, task, out) // Set another meta k/v task.SetMeta("baz", "zip") expect := map[string]string{"foo": "bar", "baz": "zip"} - if !reflect.DeepEqual(task.Meta, expect) { - t.Fatalf("expect: %#v, got: %#v", expect, task.Meta) - } + must.Eq(t, expect, task.Meta) } func TestTask_Require(t *testing.T) { testutil.Parallel(t) + task := NewTask("task1", "exec") // Create some require resources resources := &Resources{ - CPU: intToPtr(1250), - MemoryMB: intToPtr(128), - DiskMB: intToPtr(2048), + CPU: pointerOf(1250), + MemoryMB: pointerOf(128), + DiskMB: pointerOf(2048), Networks: []*NetworkResource{ { CIDR: "0.0.0.0/0", - MBits: intToPtr(100), + MBits: pointerOf(100), ReservedPorts: []Port{{"", 80, 0, ""}, {"", 443, 0, ""}}, }, }, } out := task.Require(resources) - if !reflect.DeepEqual(task.Resources, resources) { - t.Fatalf("expect: %#v, got: %#v", resources, task.Resources) - } + must.Eq(t, resources, task.Resources) // Check that we returned the task - if out != task { - t.Fatalf("expect: %#v, got: %#v", task, out) - } + must.Eq(t, task, out) } func TestTask_Constrain(t *testing.T) { testutil.Parallel(t) + task := NewTask("task1", "exec") // Add a constraint to the task out := task.Constrain(NewConstraint("kernel.name", "=", "darwin")) - if n := len(task.Constraints); n != 1 { - t.Fatalf("expected 1 constraint, got: %d", n) - } + must.Len(t, 1, task.Constraints) // Check that the task was returned - if out != task { - t.Fatalf("expected: %#v, got: %#v", task, out) - } + must.Eq(t, task, out) // Add a second constraint task.Constrain(NewConstraint("memory.totalbytes", ">=", "128000000")) @@ -314,24 +269,20 @@ func TestTask_Constrain(t *testing.T) { Operand: ">=", }, } - if !reflect.DeepEqual(task.Constraints, expect) { - t.Fatalf("expect: %#v, got: %#v", expect, task.Constraints) - } + must.Eq(t, expect, task.Constraints) } func TestTask_AddAffinity(t *testing.T) { testutil.Parallel(t) + task := NewTask("task1", "exec") // Add an affinity to the task out := task.AddAffinity(NewAffinity("kernel.version", "=", "4.6", 100)) - require := require.New(t) - require.Len(out.Affinities, 1) + must.Len(t, 1, out.Affinities) // Check that the task was returned - if out != task { - t.Fatalf("expected: %#v, got: %#v", task, out) - } + must.Eq(t, task, out) // Add a second affinity task.AddAffinity(NewAffinity("${node.datacenter}", "=", "dc2", 50)) @@ -340,45 +291,46 @@ func TestTask_AddAffinity(t *testing.T) { LTarget: "kernel.version", RTarget: "4.6", Operand: "=", - Weight: int8ToPtr(100), + Weight: pointerOf(int8(100)), }, { LTarget: "${node.datacenter}", RTarget: "dc2", Operand: "=", - Weight: int8ToPtr(50), + Weight: pointerOf(int8(50)), }, } - if !reflect.DeepEqual(task.Affinities, expect) { - t.Fatalf("expect: %#v, got: %#v", expect, task.Affinities) - } + must.Eq(t, expect, task.Affinities) } func TestTask_Artifact(t *testing.T) { testutil.Parallel(t) + a := TaskArtifact{ - GetterSource: stringToPtr("http://localhost/foo.txt"), - GetterMode: stringToPtr("file"), + GetterSource: pointerOf("http://localhost/foo.txt"), + GetterMode: pointerOf("file"), GetterHeaders: make(map[string]string), GetterOptions: make(map[string]string), } a.Canonicalize() - require.Equal(t, "file", *a.GetterMode) - require.Equal(t, "local/foo.txt", filepath.ToSlash(*a.RelativeDest)) - require.Nil(t, a.GetterOptions) - require.Nil(t, a.GetterHeaders) + must.Eq(t, "file", *a.GetterMode) + must.Eq(t, "local/foo.txt", filepath.ToSlash(*a.RelativeDest)) + must.Nil(t, a.GetterOptions) + must.Nil(t, a.GetterHeaders) } func TestTask_VolumeMount(t *testing.T) { testutil.Parallel(t) - vm := &VolumeMount{} + + vm := new(VolumeMount) vm.Canonicalize() - require.NotNil(t, vm.PropagationMode) - require.Equal(t, *vm.PropagationMode, "private") + must.NotNil(t, vm.PropagationMode) + must.Eq(t, "private", *vm.PropagationMode) } func TestTask_Canonicalize_TaskLifecycle(t *testing.T) { testutil.Parallel(t) + testCases := []struct { name string expected *TaskLifecycle @@ -396,20 +348,20 @@ func TestTask_Canonicalize_TaskLifecycle(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { tg := &TaskGroup{ - Name: stringToPtr("foo"), + Name: pointerOf("foo"), } j := &Job{ - ID: stringToPtr("test"), + ID: pointerOf("test"), } tc.task.Canonicalize(tg, j) - require.Equal(t, tc.expected, tc.task.Lifecycle) - + must.Eq(t, tc.expected, tc.task.Lifecycle) }) } } func TestTask_Template_WaitConfig_Canonicalize_and_Copy(t *testing.T) { testutil.Parallel(t) + taskWithWait := func(wc *WaitConfig) *Task { return &Task{ Templates: []*Template{ @@ -429,16 +381,16 @@ func TestTask_Template_WaitConfig_Canonicalize_and_Copy(t *testing.T) { { name: "all-fields", task: taskWithWait(&WaitConfig{ - Min: timeToPtr(5), - Max: timeToPtr(10), + Min: pointerOf(time.Duration(5)), + Max: pointerOf(time.Duration(10)), }), canonicalized: &WaitConfig{ - Min: timeToPtr(5), - Max: timeToPtr(10), + Min: pointerOf(time.Duration(5)), + Max: pointerOf(time.Duration(10)), }, copied: &WaitConfig{ - Min: timeToPtr(5), - Max: timeToPtr(10), + Min: pointerOf(time.Duration(5)), + Max: pointerOf(time.Duration(10)), }, }, { @@ -456,25 +408,25 @@ func TestTask_Template_WaitConfig_Canonicalize_and_Copy(t *testing.T) { { name: "min-only", task: taskWithWait(&WaitConfig{ - Min: timeToPtr(5), + Min: pointerOf(time.Duration(5)), }), canonicalized: &WaitConfig{ - Min: timeToPtr(5), + Min: pointerOf(time.Duration(5)), }, copied: &WaitConfig{ - Min: timeToPtr(5), + Min: pointerOf(time.Duration(5)), }, }, { name: "max-only", task: taskWithWait(&WaitConfig{ - Max: timeToPtr(10), + Max: pointerOf(time.Duration(10)), }), canonicalized: &WaitConfig{ - Max: timeToPtr(10), + Max: pointerOf(time.Duration(10)), }, copied: &WaitConfig{ - Max: timeToPtr(10), + Max: pointerOf(time.Duration(10)), }, }, } @@ -482,14 +434,14 @@ func TestTask_Template_WaitConfig_Canonicalize_and_Copy(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { tg := &TaskGroup{ - Name: stringToPtr("foo"), + Name: pointerOf("foo"), } j := &Job{ - ID: stringToPtr("test"), + ID: pointerOf("test"), } - require.Equal(t, tc.copied, tc.task.Templates[0].Wait.Copy()) + must.Eq(t, tc.copied, tc.task.Templates[0].Wait.Copy()) tc.task.Canonicalize(tg, j) - require.Equal(t, tc.canonicalized, tc.task.Templates[0].Wait) + must.Eq(t, tc.canonicalized, tc.task.Templates[0].Wait) }) } } @@ -504,10 +456,10 @@ func TestTask_Canonicalize_Vault(t *testing.T) { name: "empty", input: &Vault{}, expected: &Vault{ - Env: boolToPtr(true), - Namespace: stringToPtr(""), - ChangeMode: stringToPtr("restart"), - ChangeSignal: stringToPtr("SIGHUP"), + Env: pointerOf(true), + Namespace: pointerOf(""), + ChangeMode: pointerOf("restart"), + ChangeSignal: pointerOf("SIGHUP"), }, }, } @@ -515,7 +467,7 @@ func TestTask_Canonicalize_Vault(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { tc.input.Canonicalize() - require.Equal(t, tc.expected, tc.input) + must.Eq(t, tc.expected, tc.input) }) } } @@ -523,44 +475,44 @@ func TestTask_Canonicalize_Vault(t *testing.T) { // Ensures no regression on https://github.com/hashicorp/nomad/issues/3132 func TestTaskGroup_Canonicalize_Update(t *testing.T) { testutil.Parallel(t) + // Job with an Empty() Update job := &Job{ - ID: stringToPtr("test"), + ID: pointerOf("test"), Update: &UpdateStrategy{ - AutoRevert: boolToPtr(false), - AutoPromote: boolToPtr(false), - Canary: intToPtr(0), - HealthCheck: stringToPtr(""), - HealthyDeadline: timeToPtr(0), - ProgressDeadline: timeToPtr(0), - MaxParallel: intToPtr(0), - MinHealthyTime: timeToPtr(0), - Stagger: timeToPtr(0), + AutoRevert: pointerOf(false), + AutoPromote: pointerOf(false), + Canary: pointerOf(0), + HealthCheck: pointerOf(""), + HealthyDeadline: pointerOf(time.Duration(0)), + ProgressDeadline: pointerOf(time.Duration(0)), + MaxParallel: pointerOf(0), + MinHealthyTime: pointerOf(time.Duration(0)), + Stagger: pointerOf(time.Duration(0)), }, } job.Canonicalize() tg := &TaskGroup{ - Name: stringToPtr("foo"), + Name: pointerOf("foo"), } tg.Canonicalize(job) - assert.NotNil(t, job.Update) - assert.Nil(t, tg.Update) + must.NotNil(t, job.Update) + must.Nil(t, tg.Update) } func TestTaskGroup_Canonicalize_Scaling(t *testing.T) { testutil.Parallel(t) - require := require.New(t) job := &Job{ - ID: stringToPtr("test"), + ID: pointerOf("test"), } job.Canonicalize() tg := &TaskGroup{ - Name: stringToPtr("foo"), + Name: pointerOf("foo"), Count: nil, Scaling: &ScalingPolicy{ Min: nil, - Max: int64ToPtr(10), + Max: pointerOf(int64(10)), Policy: nil, Enabled: nil, CreateIndex: 0, @@ -571,74 +523,76 @@ func TestTaskGroup_Canonicalize_Scaling(t *testing.T) { // both nil => both == 1 tg.Canonicalize(job) - require.NotNil(tg.Count) - require.NotNil(tg.Scaling.Min) - require.EqualValues(1, *tg.Count) - require.EqualValues(*tg.Count, *tg.Scaling.Min) + must.Positive(t, *tg.Count) + must.NotNil(t, tg.Scaling.Min) + must.Eq(t, 1, *tg.Count) + must.Eq(t, int64(*tg.Count), *tg.Scaling.Min) // count == nil => count = Scaling.Min tg.Count = nil - tg.Scaling.Min = int64ToPtr(5) + tg.Scaling.Min = pointerOf(int64(5)) tg.Canonicalize(job) - require.NotNil(tg.Count) - require.NotNil(tg.Scaling.Min) - require.EqualValues(5, *tg.Count) - require.EqualValues(*tg.Count, *tg.Scaling.Min) + must.Positive(t, *tg.Count) + must.NotNil(t, tg.Scaling.Min) + must.Eq(t, 5, *tg.Count) + must.Eq(t, int64(*tg.Count), *tg.Scaling.Min) // Scaling.Min == nil => Scaling.Min == count - tg.Count = intToPtr(5) + tg.Count = pointerOf(5) tg.Scaling.Min = nil tg.Canonicalize(job) - require.NotNil(tg.Count) - require.NotNil(tg.Scaling.Min) - require.EqualValues(5, *tg.Scaling.Min) - require.EqualValues(*tg.Scaling.Min, *tg.Count) + must.Positive(t, *tg.Count) + must.NotNil(t, tg.Scaling.Min) + must.Eq(t, 5, *tg.Scaling.Min) + must.Eq(t, int64(*tg.Count), *tg.Scaling.Min) // both present, both persisted - tg.Count = intToPtr(5) - tg.Scaling.Min = int64ToPtr(1) + tg.Count = pointerOf(5) + tg.Scaling.Min = pointerOf(int64(1)) tg.Canonicalize(job) - require.NotNil(tg.Count) - require.NotNil(tg.Scaling.Min) - require.EqualValues(1, *tg.Scaling.Min) - require.EqualValues(5, *tg.Count) + must.Positive(t, *tg.Count) + must.NotNil(t, tg.Scaling.Min) + must.Eq(t, 1, *tg.Scaling.Min) + must.Eq(t, 5, *tg.Count) } func TestTaskGroup_Merge_Update(t *testing.T) { testutil.Parallel(t) + job := &Job{ - ID: stringToPtr("test"), + ID: pointerOf("test"), Update: &UpdateStrategy{}, } job.Canonicalize() - // Merge and canonicalize part of an update stanza + // Merge and canonicalize part of an update block tg := &TaskGroup{ - Name: stringToPtr("foo"), + Name: pointerOf("foo"), Update: &UpdateStrategy{ - AutoRevert: boolToPtr(true), - Canary: intToPtr(5), - HealthCheck: stringToPtr("foo"), + AutoRevert: pointerOf(true), + Canary: pointerOf(5), + HealthCheck: pointerOf("foo"), }, } tg.Canonicalize(job) - require.Equal(t, &UpdateStrategy{ - AutoRevert: boolToPtr(true), - AutoPromote: boolToPtr(false), - Canary: intToPtr(5), - HealthCheck: stringToPtr("foo"), - HealthyDeadline: timeToPtr(5 * time.Minute), - ProgressDeadline: timeToPtr(10 * time.Minute), - MaxParallel: intToPtr(1), - MinHealthyTime: timeToPtr(10 * time.Second), - Stagger: timeToPtr(30 * time.Second), + must.Eq(t, &UpdateStrategy{ + AutoRevert: pointerOf(true), + AutoPromote: pointerOf(false), + Canary: pointerOf(5), + HealthCheck: pointerOf("foo"), + HealthyDeadline: pointerOf(5 * time.Minute), + ProgressDeadline: pointerOf(10 * time.Minute), + MaxParallel: pointerOf(1), + MinHealthyTime: pointerOf(10 * time.Second), + Stagger: pointerOf(30 * time.Second), }, tg.Update) } // Verifies that migrate strategy is merged correctly func TestTaskGroup_Canonicalize_MigrateStrategy(t *testing.T) { testutil.Parallel(t) + type testCase struct { desc string jobType string @@ -661,44 +615,44 @@ func TestTaskGroup_Canonicalize_MigrateStrategy(t *testing.T) { jobMigrate: nil, taskMigrate: nil, expected: &MigrateStrategy{ - MaxParallel: intToPtr(1), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(10 * time.Second), - HealthyDeadline: timeToPtr(5 * time.Minute), + MaxParallel: pointerOf(1), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(10 * time.Second), + HealthyDeadline: pointerOf(5 * time.Minute), }, }, { desc: "Empty job migrate strategy", jobType: "service", jobMigrate: &MigrateStrategy{ - MaxParallel: intToPtr(0), - HealthCheck: stringToPtr(""), - MinHealthyTime: timeToPtr(0), - HealthyDeadline: timeToPtr(0), + MaxParallel: pointerOf(0), + HealthCheck: pointerOf(""), + MinHealthyTime: pointerOf(time.Duration(0)), + HealthyDeadline: pointerOf(time.Duration(0)), }, taskMigrate: nil, expected: &MigrateStrategy{ - MaxParallel: intToPtr(0), - HealthCheck: stringToPtr(""), - MinHealthyTime: timeToPtr(0), - HealthyDeadline: timeToPtr(0), + MaxParallel: pointerOf(0), + HealthCheck: pointerOf(""), + MinHealthyTime: pointerOf(time.Duration(0)), + HealthyDeadline: pointerOf(time.Duration(0)), }, }, { desc: "Inherit from job", jobType: "service", jobMigrate: &MigrateStrategy{ - MaxParallel: intToPtr(3), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(2), - HealthyDeadline: timeToPtr(2), + MaxParallel: pointerOf(3), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(time.Duration(2)), + HealthyDeadline: pointerOf(time.Duration(2)), }, taskMigrate: nil, expected: &MigrateStrategy{ - MaxParallel: intToPtr(3), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(2), - HealthyDeadline: timeToPtr(2), + MaxParallel: pointerOf(3), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(time.Duration(2)), + HealthyDeadline: pointerOf(time.Duration(2)), }, }, { @@ -706,67 +660,67 @@ func TestTaskGroup_Canonicalize_MigrateStrategy(t *testing.T) { jobType: "service", jobMigrate: nil, taskMigrate: &MigrateStrategy{ - MaxParallel: intToPtr(3), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(2), - HealthyDeadline: timeToPtr(2), + MaxParallel: pointerOf(3), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(time.Duration(2)), + HealthyDeadline: pointerOf(time.Duration(2)), }, expected: &MigrateStrategy{ - MaxParallel: intToPtr(3), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(2), - HealthyDeadline: timeToPtr(2), + MaxParallel: pointerOf(3), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(time.Duration(2)), + HealthyDeadline: pointerOf(time.Duration(2)), }, }, { desc: "Merge from job", jobType: "service", jobMigrate: &MigrateStrategy{ - MaxParallel: intToPtr(11), + MaxParallel: pointerOf(11), }, taskMigrate: &MigrateStrategy{ - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(2), - HealthyDeadline: timeToPtr(2), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(time.Duration(2)), + HealthyDeadline: pointerOf(time.Duration(2)), }, expected: &MigrateStrategy{ - MaxParallel: intToPtr(11), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(2), - HealthyDeadline: timeToPtr(2), + MaxParallel: pointerOf(11), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(time.Duration(2)), + HealthyDeadline: pointerOf(time.Duration(2)), }, }, { desc: "Override from group", jobType: "service", jobMigrate: &MigrateStrategy{ - MaxParallel: intToPtr(11), + MaxParallel: pointerOf(11), }, taskMigrate: &MigrateStrategy{ - MaxParallel: intToPtr(5), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(2), - HealthyDeadline: timeToPtr(2), + MaxParallel: pointerOf(5), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(time.Duration(2)), + HealthyDeadline: pointerOf(time.Duration(2)), }, expected: &MigrateStrategy{ - MaxParallel: intToPtr(5), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(2), - HealthyDeadline: timeToPtr(2), + MaxParallel: pointerOf(5), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(time.Duration(2)), + HealthyDeadline: pointerOf(time.Duration(2)), }, }, { desc: "Parallel from job, defaulting", jobType: "service", jobMigrate: &MigrateStrategy{ - MaxParallel: intToPtr(5), + MaxParallel: pointerOf(5), }, taskMigrate: nil, expected: &MigrateStrategy{ - MaxParallel: intToPtr(5), - HealthCheck: stringToPtr("checks"), - MinHealthyTime: timeToPtr(10 * time.Second), - HealthyDeadline: timeToPtr(5 * time.Minute), + MaxParallel: pointerOf(5), + HealthCheck: pointerOf("checks"), + MinHealthyTime: pointerOf(10 * time.Second), + HealthyDeadline: pointerOf(5 * time.Minute), }, }, } @@ -774,31 +728,32 @@ func TestTaskGroup_Canonicalize_MigrateStrategy(t *testing.T) { for _, tc := range testCases { t.Run(tc.desc, func(t *testing.T) { job := &Job{ - ID: stringToPtr("test"), + ID: pointerOf("test"), Migrate: tc.jobMigrate, - Type: stringToPtr(tc.jobType), + Type: pointerOf(tc.jobType), } job.Canonicalize() tg := &TaskGroup{ - Name: stringToPtr("foo"), + Name: pointerOf("foo"), Migrate: tc.taskMigrate, } tg.Canonicalize(job) - assert.Equal(t, tc.expected, tg.Migrate) + must.Eq(t, tc.expected, tg.Migrate) }) } } -// TestSpread_Canonicalize asserts that the spread stanza is canonicalized correctly +// TestSpread_Canonicalize asserts that the spread block is canonicalized correctly func TestSpread_Canonicalize(t *testing.T) { testutil.Parallel(t) + job := &Job{ - ID: stringToPtr("test"), - Type: stringToPtr("batch"), + ID: pointerOf("test"), + Type: pointerOf("batch"), } job.Canonicalize() tg := &TaskGroup{ - Name: stringToPtr("foo"), + Name: pointerOf("foo"), } type testCase struct { desc string @@ -818,7 +773,7 @@ func TestSpread_Canonicalize(t *testing.T) { "Zero spread", &Spread{ Attribute: "test", - Weight: int8ToPtr(0), + Weight: pointerOf(int8(0)), }, 0, }, @@ -826,7 +781,7 @@ func TestSpread_Canonicalize(t *testing.T) { "Non Zero spread", &Spread{ Attribute: "test", - Weight: int8ToPtr(100), + Weight: pointerOf(int8(100)), }, 100, }, @@ -837,7 +792,7 @@ func TestSpread_Canonicalize(t *testing.T) { tg.Spreads = []*Spread{tc.spread} tg.Canonicalize(job) for _, spr := range tg.Spreads { - require.Equal(t, tc.expectedWeight, *spr.Weight) + must.Eq(t, tc.expectedWeight, *spr.Weight) } }) } @@ -845,6 +800,7 @@ func TestSpread_Canonicalize(t *testing.T) { func Test_NewDefaultReschedulePolicy(t *testing.T) { testutil.Parallel(t) + testCases := []struct { desc string inputJobType string @@ -854,48 +810,48 @@ func Test_NewDefaultReschedulePolicy(t *testing.T) { desc: "service job type", inputJobType: "service", expected: &ReschedulePolicy{ - Attempts: intToPtr(0), - Interval: timeToPtr(0), - Delay: timeToPtr(30 * time.Second), - DelayFunction: stringToPtr("exponential"), - MaxDelay: timeToPtr(1 * time.Hour), - Unlimited: boolToPtr(true), + Attempts: pointerOf(0), + Interval: pointerOf(time.Duration(0)), + Delay: pointerOf(30 * time.Second), + DelayFunction: pointerOf("exponential"), + MaxDelay: pointerOf(1 * time.Hour), + Unlimited: pointerOf(true), }, }, { desc: "batch job type", inputJobType: "batch", expected: &ReschedulePolicy{ - Attempts: intToPtr(1), - Interval: timeToPtr(24 * time.Hour), - Delay: timeToPtr(5 * time.Second), - DelayFunction: stringToPtr("constant"), - MaxDelay: timeToPtr(0), - Unlimited: boolToPtr(false), + Attempts: pointerOf(1), + Interval: pointerOf(24 * time.Hour), + Delay: pointerOf(5 * time.Second), + DelayFunction: pointerOf("constant"), + MaxDelay: pointerOf(time.Duration(0)), + Unlimited: pointerOf(false), }, }, { desc: "system job type", inputJobType: "system", expected: &ReschedulePolicy{ - Attempts: intToPtr(0), - Interval: timeToPtr(0), - Delay: timeToPtr(0), - DelayFunction: stringToPtr(""), - MaxDelay: timeToPtr(0), - Unlimited: boolToPtr(false), + Attempts: pointerOf(0), + Interval: pointerOf(time.Duration(0)), + Delay: pointerOf(time.Duration(0)), + DelayFunction: pointerOf(""), + MaxDelay: pointerOf(time.Duration(0)), + Unlimited: pointerOf(false), }, }, { desc: "unrecognised job type", inputJobType: "unrecognised", expected: &ReschedulePolicy{ - Attempts: intToPtr(0), - Interval: timeToPtr(0), - Delay: timeToPtr(0), - DelayFunction: stringToPtr(""), - MaxDelay: timeToPtr(0), - Unlimited: boolToPtr(false), + Attempts: pointerOf(0), + Interval: pointerOf(time.Duration(0)), + Delay: pointerOf(time.Duration(0)), + DelayFunction: pointerOf(""), + MaxDelay: pointerOf(time.Duration(0)), + Unlimited: pointerOf(false), }, }, } @@ -903,61 +859,62 @@ func Test_NewDefaultReschedulePolicy(t *testing.T) { for _, tc := range testCases { t.Run(tc.desc, func(t *testing.T) { actual := NewDefaultReschedulePolicy(tc.inputJobType) - assert.Equal(t, tc.expected, actual) + must.Eq(t, tc.expected, actual) }) } } func TestTaskGroup_Canonicalize_Consul(t *testing.T) { testutil.Parallel(t) + t.Run("override job consul in group", func(t *testing.T) { job := &Job{ - ID: stringToPtr("job"), - ConsulNamespace: stringToPtr("ns1"), + ID: pointerOf("job"), + ConsulNamespace: pointerOf("ns1"), } job.Canonicalize() tg := &TaskGroup{ - Name: stringToPtr("group"), + Name: pointerOf("group"), Consul: &Consul{Namespace: "ns2"}, } tg.Canonicalize(job) - require.Equal(t, "ns1", *job.ConsulNamespace) - require.Equal(t, "ns2", tg.Consul.Namespace) + must.Eq(t, "ns1", *job.ConsulNamespace) + must.Eq(t, "ns2", tg.Consul.Namespace) }) t.Run("inherit job consul in group", func(t *testing.T) { job := &Job{ - ID: stringToPtr("job"), - ConsulNamespace: stringToPtr("ns1"), + ID: pointerOf("job"), + ConsulNamespace: pointerOf("ns1"), } job.Canonicalize() tg := &TaskGroup{ - Name: stringToPtr("group"), + Name: pointerOf("group"), Consul: nil, // not set, inherit from job } tg.Canonicalize(job) - require.Equal(t, "ns1", *job.ConsulNamespace) - require.Equal(t, "ns1", tg.Consul.Namespace) + must.Eq(t, "ns1", *job.ConsulNamespace) + must.Eq(t, "ns1", tg.Consul.Namespace) }) t.Run("set in group only", func(t *testing.T) { job := &Job{ - ID: stringToPtr("job"), + ID: pointerOf("job"), ConsulNamespace: nil, } job.Canonicalize() tg := &TaskGroup{ - Name: stringToPtr("group"), + Name: pointerOf("group"), Consul: &Consul{Namespace: "ns2"}, } tg.Canonicalize(job) - require.Empty(t, job.ConsulNamespace) - require.Equal(t, "ns2", tg.Consul.Namespace) + must.Eq(t, "", *job.ConsulNamespace) + must.Eq(t, "ns2", tg.Consul.Namespace) }) } diff --git a/api/util_test.go b/api/util_test.go index 7e5f2e1b575..76830693b5e 100644 --- a/api/util_test.go +++ b/api/util_test.go @@ -1,19 +1,18 @@ package api import ( - crand "crypto/rand" + "crypto/rand" "fmt" "testing" + + "github.com/shoenig/test/must" ) func assertQueryMeta(t *testing.T, qm *QueryMeta) { t.Helper() - if qm.LastIndex == 0 { - t.Fatalf("bad index: %d", qm.LastIndex) - } - if !qm.KnownLeader { - t.Fatalf("expected known leader, got none") - } + + must.NotEq(t, 0, qm.LastIndex, must.Sprint("bad index")) + must.True(t, qm.KnownLeader, must.Sprint("expected a known leader but gone none")) } func assertWriteMeta(t *testing.T, wm *WriteMeta) { @@ -27,18 +26,18 @@ func testJob() *Job { task := NewTask("task1", "raw_exec"). SetConfig("command", "/bin/sleep"). Require(&Resources{ - CPU: intToPtr(100), - MemoryMB: intToPtr(256), + CPU: pointerOf(100), + MemoryMB: pointerOf(256), }). SetLogConfig(&LogConfig{ - MaxFiles: intToPtr(1), - MaxFileSizeMB: intToPtr(2), + MaxFiles: pointerOf(1), + MaxFileSizeMB: pointerOf(2), }) group := NewTaskGroup("group1", 1). AddTask(task). RequireDisk(&EphemeralDisk{ - SizeMB: intToPtr(25), + SizeMB: pointerOf(25), }) job := NewBatchJob("job1", "redis", "global", 1). @@ -52,18 +51,18 @@ func testJobWithScalingPolicy() *Job { job := testJob() job.TaskGroups[0].Scaling = &ScalingPolicy{ Policy: map[string]interface{}{}, - Min: int64ToPtr(1), - Max: int64ToPtr(5), - Enabled: boolToPtr(true), + Min: pointerOf(int64(1)), + Max: pointerOf(int64(5)), + Enabled: pointerOf(true), } return job } func testPeriodicJob() *Job { job := testJob().AddPeriodicConfig(&PeriodicConfig{ - Enabled: boolToPtr(true), - Spec: stringToPtr("*/30 * * * *"), - SpecType: stringToPtr("cron"), + Enabled: pointerOf(true), + Spec: pointerOf("*/30 * * * *"), + SpecType: pointerOf("cron"), }) return job } @@ -109,8 +108,8 @@ func testQuotaSpec() *QuotaSpec { { Region: "global", RegionLimit: &Resources{ - CPU: intToPtr(2000), - MemoryMB: intToPtr(2000), + CPU: pointerOf(2000), + MemoryMB: pointerOf(2000), }, }, }, @@ -128,7 +127,7 @@ func float64ToPtr(f float64) *float64 { // generateUUID generates a uuid useful for testing only func generateUUID() string { buf := make([]byte, 16) - if _, err := crand.Read(buf); err != nil { + if _, err := rand.Read(buf); err != nil { panic(fmt.Errorf("failed to read random bytes: %v", err)) } diff --git a/api/utils.go b/api/utils.go index 9e54306f6b6..a8e1c02e456 100644 --- a/api/utils.go +++ b/api/utils.go @@ -3,44 +3,8 @@ package api import ( "strconv" "strings" - "time" ) -// boolToPtr returns the pointer to a boolean -func boolToPtr(b bool) *bool { - return &b -} - -// int8ToPtr returns the pointer to an int8 -func int8ToPtr(i int8) *int8 { - return &i -} - -// intToPtr returns the pointer to an int -func intToPtr(i int) *int { - return &i -} - -// uint64ToPtr returns the pointer to an uint64 -func uint64ToPtr(u uint64) *uint64 { - return &u -} - -// int64ToPtr returns the pointer to a int64 -func int64ToPtr(i int64) *int64 { - return &i -} - -// stringToPtr returns the pointer to a string -func stringToPtr(str string) *string { - return &str -} - -// timeToPtr returns the pointer to a time stamp -func timeToPtr(t time.Duration) *time.Duration { - return &t -} - // formatFloat converts the floating-point number f to a string, // after rounding it to the passed unit. // @@ -61,3 +25,8 @@ func formatFloat(f float64, maxPrec int) string { return v[:sublen] } + +// pointerOf returns a pointer to a. +func pointerOf[A any](a A) *A { + return &a +} diff --git a/api/utils_test.go b/api/utils_test.go index 7e0d789bda5..ef4d2f524b5 100644 --- a/api/utils_test.go +++ b/api/utils_test.go @@ -4,11 +4,12 @@ import ( "testing" "github.com/hashicorp/nomad/api/internal/testutil" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" ) func TestFormatRoundedFloat(t *testing.T) { testutil.Parallel(t) + cases := []struct { input float64 expected string @@ -36,6 +37,17 @@ func TestFormatRoundedFloat(t *testing.T) { } for _, c := range cases { - require.Equal(t, c.expected, formatFloat(c.input, 3)) + must.Eq(t, c.expected, formatFloat(c.input, 3)) } } + +func Test_PointerOf(t *testing.T) { + s := "hello" + sPtr := pointerOf(s) + + must.Eq(t, s, *sPtr) + + b := "bye" + sPtr = &b + must.NotEq(t, s, *sPtr) +} diff --git a/ci/README.md b/ci/README.md new file mode 100644 index 00000000000..fd8ebc47b84 --- /dev/null +++ b/ci/README.md @@ -0,0 +1,34 @@ +# CI (unit testing) + +This README describes how the Core CI Tests Github Actions works, which provides +Nomad with continuous integration unit testing. + +## Steps + +1. When a branch is pushed, GHA triggers `.github/workflows/test-core.yaml`. + +2. The first job is `mods` which creates a pre-cache of Go modules. + - Only useful for the followup jobs on Linux runners + - Is keyed on `hash(go.sum)`, so a cache is re-used until deps are modified. + +3. The `checks`, `test-api`, `test-*` jobs are started. + - The checks job runs `make check` + - The test job runs groups of tests, see below + +3i. The check step also runs `make missing` + - Invokes `tools/missing` to scan `ci/test-cores.json` && nomad source. + - Fails the build if any packages in Nomad are not covered. + +4a. The `test-*` jobs are run. + - Configured as a matrix of "groups"; each group is a set of packages. + - The GHA invokes `test-nomad` with $GOTEST_GROUP for each group. + - The makefile uses `tools/missing` to translate the group into packages + - Package groups are configured in `ci/test-core.json` + +4b. The `test-api` job is run. + - Because `api` is a submodule, invokation of test command is special. + - The GHA invokes `test-nomad-module` with the name of the submodule. + +5. The `compile` jobs are run + - Waits on checks to complete first + - Runs on each of `linux`, `macos`, `windows` diff --git a/ci/ports.go b/ci/ports.go new file mode 100644 index 00000000000..d22f9b1feb0 --- /dev/null +++ b/ci/ports.go @@ -0,0 +1,20 @@ +package ci + +import ( + "fmt" + + "github.com/shoenig/test/portal" +) + +type fatalTester struct{} + +func (t *fatalTester) Fatalf(msg string, args ...any) { + panic(fmt.Sprintf(msg, args...)) +} + +// PortAllocator is used to acquire unused ports for testing real network +// listeners. +var PortAllocator = portal.New( + new(fatalTester), + portal.WithAddress("127.0.0.1"), +) diff --git a/ci/test-core.json b/ci/test-core.json new file mode 100644 index 00000000000..2614f7ea3e8 --- /dev/null +++ b/ci/test-core.json @@ -0,0 +1,45 @@ +{ + "nomad": ["nomad"], + "client": [ + "client", + "client/allocrunner/..." + ], + "command": ["command"], + "drivers": ["drivers/..."], + "quick": [ + "acl/...", + "client/allocdir/...", + "client/allochealth/...", + "client/allocwatcher/...", + "client/config/...", + "client/consul/...", + "client/devicemanager/...", + "client/dynamicplugins/...", + "client/fingerprint/...", + "client/interfaces/...", + "client/lib/...", + "client/logmon/...", + "client/pluginmanager/...", + "client/servers/...", + "client/serviceregistration/...", + "client/state/...", + "client/stats/...", + "client/structs/...", + "client/taskenv/...", + "command/agent/...", + "command/raft_tools/...", + "helper/...", + "internal/...", + "jobspec/...", + "lib/...", + "nomad/deploymentwatcher/...", + "nomad/drainer/...", + "nomad/state/...", + "nomad/stream/...", + "nomad/structs/...", + "nomad/volumewatcher/...", + "plugins/...", + "scheduler/...", + "testutil/..." + ] +} diff --git a/client/acl.go b/client/acl.go index 43d994bc4dd..bf666fbaa05 100644 --- a/client/acl.go +++ b/client/acl.go @@ -81,7 +81,7 @@ func (c *Client) ResolveSecretToken(secretID string) (*structs.ACLToken, error) func (c *Client) resolveTokenAndACL(secretID string) (*acl.ACL, *structs.ACLToken, error) { // Fast-path if ACLs are disabled - if !c.config.ACLEnabled { + if !c.GetConfig().ACLEnabled { return nil, nil, nil } defer metrics.MeasureSince([]string{"client", "acl", "resolve_token"}, time.Now()) @@ -127,7 +127,7 @@ func (c *Client) resolveTokenValue(secretID string) (*structs.ACLToken, error) { raw, ok := c.tokenCache.Get(secretID) if ok { cached := raw.(*cachedACLValue) - if cached.Age() <= c.config.ACLTokenTTL { + if cached.Age() <= c.GetConfig().ACLTokenTTL { return cached.Token, nil } } @@ -179,7 +179,7 @@ func (c *Client) resolvePolicies(secretID string, policies []string) ([]*structs // Check if the cached value is valid or expired cached := raw.(*cachedACLValue) - if cached.Age() <= c.config.ACLPolicyTTL { + if cached.Age() <= c.GetConfig().ACLPolicyTTL { out = append(out, cached.Policy) } else { expired = append(expired, cached.Policy) diff --git a/client/agent_endpoint.go b/client/agent_endpoint.go index 12993c4985c..6508fbebe1c 100644 --- a/client/agent_endpoint.go +++ b/client/agent_endpoint.go @@ -12,7 +12,7 @@ import ( "github.com/hashicorp/nomad/command/agent/host" "github.com/hashicorp/nomad/command/agent/monitor" "github.com/hashicorp/nomad/command/agent/pprof" - "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/nomad/structs" metrics "github.com/armon/go-metrics" @@ -42,7 +42,7 @@ func (a *Agent) Profile(args *structs.AgentPprofRequest, reply *structs.AgentPpr } // If ACLs are disabled, EnableDebug must be enabled - if aclObj == nil && !a.c.config.EnableDebug { + if aclObj == nil && !a.c.GetConfig().EnableDebug { return structs.ErrPermissionDenied } @@ -89,16 +89,16 @@ func (a *Agent) monitor(conn io.ReadWriteCloser) { encoder := codec.NewEncoder(conn, structs.MsgpackHandle) if err := decoder.Decode(&args); err != nil { - handleStreamResultError(err, helper.Int64ToPtr(500), encoder) + handleStreamResultError(err, pointer.Of(int64(500)), encoder) return } // Check acl if aclObj, err := a.c.ResolveToken(args.AuthToken); err != nil { - handleStreamResultError(err, helper.Int64ToPtr(403), encoder) + handleStreamResultError(err, pointer.Of(int64(403)), encoder) return } else if aclObj != nil && !aclObj.AllowAgentRead() { - handleStreamResultError(structs.ErrPermissionDenied, helper.Int64ToPtr(403), encoder) + handleStreamResultError(structs.ErrPermissionDenied, pointer.Of(int64(403)), encoder) return } @@ -108,7 +108,7 @@ func (a *Agent) monitor(conn io.ReadWriteCloser) { } if logLevel == log.NoLevel { - handleStreamResultError(errors.New("Unknown log level"), helper.Int64ToPtr(400), encoder) + handleStreamResultError(errors.New("Unknown log level"), pointer.Of(int64(400)), encoder) return } @@ -206,7 +206,7 @@ OUTER: } if streamErr != nil { - handleStreamResultError(streamErr, helper.Int64ToPtr(500), encoder) + handleStreamResultError(streamErr, pointer.Of(int64(500)), encoder) return } } @@ -218,7 +218,7 @@ func (a *Agent) Host(args *structs.HostDataRequest, reply *structs.HostDataRespo return err } if (aclObj != nil && !aclObj.AllowAgentRead()) || - (aclObj == nil && !a.c.config.EnableDebug) { + (aclObj == nil && !a.c.GetConfig().EnableDebug) { return structs.ErrPermissionDenied } diff --git a/client/alloc_endpoint.go b/client/alloc_endpoint.go index 6b1ff760420..b52334f1635 100644 --- a/client/alloc_endpoint.go +++ b/client/alloc_endpoint.go @@ -13,7 +13,7 @@ import ( "github.com/hashicorp/nomad/acl" cstructs "github.com/hashicorp/nomad/client/structs" - "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/uuid" nstructs "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/plugins/drivers" @@ -103,7 +103,7 @@ func (a *Allocations) Restart(args *nstructs.AllocRestartRequest, reply *nstruct return nstructs.ErrPermissionDenied } - return a.c.RestartAllocation(args.AllocID, args.TaskName) + return a.c.RestartAllocation(args.AllocID, args.TaskName, args.AllTasks) } // Stats is used to collect allocation statistics @@ -161,7 +161,7 @@ func (a *Allocations) execImpl(encoder *codec.Encoder, decoder *codec.Decoder, e // Decode the arguments var req cstructs.AllocExecRequest if err := decoder.Decode(&req); err != nil { - return helper.Int64ToPtr(500), err + return pointer.Of(int64(500)), err } if a.c.GetConfig().DisableRemoteExec { @@ -169,13 +169,13 @@ func (a *Allocations) execImpl(encoder *codec.Encoder, decoder *codec.Decoder, e } if req.AllocID == "" { - return helper.Int64ToPtr(400), allocIDNotPresentErr + return pointer.Of(int64(400)), allocIDNotPresentErr } ar, err := a.c.getAllocRunner(req.AllocID) if err != nil { - code := helper.Int64ToPtr(500) + code := pointer.Of(int64(500)) if nstructs.IsErrUnknownAllocation(err) { - code = helper.Int64ToPtr(404) + code = pointer.Of(int64(404)) } return code, err @@ -210,17 +210,17 @@ func (a *Allocations) execImpl(encoder *codec.Encoder, decoder *codec.Decoder, e // Validate the arguments if req.Task == "" { - return helper.Int64ToPtr(400), taskNotPresentErr + return pointer.Of(int64(400)), taskNotPresentErr } if len(req.Cmd) == 0 { - return helper.Int64ToPtr(400), errors.New("command is not present") + return pointer.Of(int64(400)), errors.New("command is not present") } capabilities, err := ar.GetTaskDriverCapabilities(req.Task) if err != nil { - code := helper.Int64ToPtr(500) + code := pointer.Of(int64(500)) if nstructs.IsErrUnknownAllocation(err) { - code = helper.Int64ToPtr(404) + code = pointer.Of(int64(404)) } return code, err @@ -236,9 +236,9 @@ func (a *Allocations) execImpl(encoder *codec.Encoder, decoder *codec.Decoder, e allocState, err := a.c.GetAllocState(req.AllocID) if err != nil { - code := helper.Int64ToPtr(500) + code := pointer.Of(int64(500)) if nstructs.IsErrUnknownAllocation(err) { - code = helper.Int64ToPtr(404) + code = pointer.Of(int64(404)) } return code, err @@ -247,11 +247,11 @@ func (a *Allocations) execImpl(encoder *codec.Encoder, decoder *codec.Decoder, e // Check that the task is there taskState := allocState.TaskStates[req.Task] if taskState == nil { - return helper.Int64ToPtr(400), fmt.Errorf("unknown task name %q", req.Task) + return pointer.Of(int64(400)), fmt.Errorf("unknown task name %q", req.Task) } if taskState.StartedAt.IsZero() { - return helper.Int64ToPtr(404), fmt.Errorf("task %q not started yet.", req.Task) + return pointer.Of(int64(404)), fmt.Errorf("task %q not started yet.", req.Task) } ctx, cancel := context.WithCancel(context.Background()) @@ -259,12 +259,12 @@ func (a *Allocations) execImpl(encoder *codec.Encoder, decoder *codec.Decoder, e h := ar.GetTaskExecHandler(req.Task) if h == nil { - return helper.Int64ToPtr(404), fmt.Errorf("task %q is not running.", req.Task) + return pointer.Of(int64(404)), fmt.Errorf("task %q is not running.", req.Task) } err = h(ctx, req.Cmd, req.Tty, newExecStream(decoder, encoder)) if err != nil { - code := helper.Int64ToPtr(500) + code := pointer.Of(int64(500)) return code, err } diff --git a/client/alloc_endpoint_test.go b/client/alloc_endpoint_test.go index 272819ecc35..9fc912e460b 100644 --- a/client/alloc_endpoint_test.go +++ b/client/alloc_endpoint_test.go @@ -67,6 +67,45 @@ func TestAllocations_Restart(t *testing.T) { }) } +func TestAllocations_RestartAllTasks(t *testing.T) { + ci.Parallel(t) + + require := require.New(t) + client, cleanup := TestClient(t, nil) + defer cleanup() + + alloc := mock.LifecycleAlloc() + require.Nil(client.addAlloc(alloc, "")) + + // Can't restart all tasks while specifying a task name. + req := &nstructs.AllocRestartRequest{ + AllocID: alloc.ID, + AllTasks: true, + TaskName: "web", + } + var resp nstructs.GenericResponse + err := client.ClientRPC("Allocations.Restart", &req, &resp) + require.Error(err) + + // Good request. + req = &nstructs.AllocRestartRequest{ + AllocID: alloc.ID, + AllTasks: true, + } + + testutil.WaitForResult(func() (bool, error) { + var resp2 nstructs.GenericResponse + err := client.ClientRPC("Allocations.Restart", &req, &resp2) + if err != nil && strings.Contains(err.Error(), "not running") { + return false, err + } + + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) +} + func TestAllocations_Restart_ACL(t *testing.T) { ci.Parallel(t) require := require.New(t) diff --git a/client/alloc_watcher_e2e_test.go b/client/alloc_watcher_e2e_test.go index 18cac24a761..9e04e6b3e55 100644 --- a/client/alloc_watcher_e2e_test.go +++ b/client/alloc_watcher_e2e_test.go @@ -3,7 +3,7 @@ package client_test import ( "bytes" "fmt" - "io/ioutil" + "os" "path/filepath" "testing" @@ -55,6 +55,7 @@ func TestPrevAlloc_StreamAllocDir_TLS(t *testing.T) { CertFile: clientCertFn, KeyFile: clientKeyFn, } + c.Client.Enabled = true c.Client.Servers = []string{server.GetConfig().RPCAddr.String()} } @@ -102,7 +103,7 @@ func TestPrevAlloc_StreamAllocDir_TLS(t *testing.T) { // Save a file into alloc dir contents := []byte("123\n456") allocFn := filepath.Join(client1.DataDir, "alloc", origAlloc, "alloc", "data", "bar") - require.NoError(ioutil.WriteFile(allocFn, contents, 0666)) + require.NoError(os.WriteFile(allocFn, contents, 0666)) t.Logf("[TEST] Wrote initial file: %s", allocFn) // Migrate alloc to other node @@ -141,7 +142,7 @@ func TestPrevAlloc_StreamAllocDir_TLS(t *testing.T) { allocFn2 := filepath.Join(client2.DataDir, "alloc", newAlloc.ID, "alloc", "data", "bar") t.Logf("[TEST] Comparing against file: %s", allocFn2) testutil.WaitForResult(func() (bool, error) { - found, err := ioutil.ReadFile(allocFn2) + found, err := os.ReadFile(allocFn2) if err != nil { return false, err } diff --git a/client/allocdir/alloc_dir.go b/client/allocdir/alloc_dir.go index da05aacb39d..f0c79225a33 100644 --- a/client/allocdir/alloc_dir.go +++ b/client/allocdir/alloc_dir.go @@ -5,7 +5,6 @@ import ( "context" "fmt" "io" - "io/ioutil" "net/http" "os" "path/filepath" @@ -357,12 +356,16 @@ func (d *AllocDir) List(path string) ([]*cstructs.AllocFileInfo, error) { } p := filepath.Join(d.AllocDir, path) - finfos, err := ioutil.ReadDir(p) + finfos, err := os.ReadDir(p) if err != nil { return []*cstructs.AllocFileInfo{}, err } files := make([]*cstructs.AllocFileInfo, len(finfos)) - for idx, info := range finfos { + for idx, file := range finfos { + info, err := file.Info() + if err != nil { + return []*cstructs.AllocFileInfo{}, err + } files[idx] = &cstructs.AllocFileInfo{ Name: info.Name(), IsDir: info.IsDir(), diff --git a/client/allocdir/alloc_dir_test.go b/client/allocdir/alloc_dir_test.go index f0764f26e92..ffe69569206 100644 --- a/client/allocdir/alloc_dir_test.go +++ b/client/allocdir/alloc_dir_test.go @@ -6,8 +6,6 @@ import ( "context" "io" "io/fs" - "io/ioutil" - "log" "os" "path/filepath" "runtime" @@ -51,11 +49,7 @@ var ( func TestAllocDir_BuildAlloc(t *testing.T) { ci.Parallel(t) - tmp, err := ioutil.TempDir("", "AllocDir") - if err != nil { - t.Fatalf("Couldn't create temp dir: %v", err) - } - defer os.RemoveAll(tmp) + tmp := t.TempDir() d := NewAllocDir(testlog.HCLogger(t), tmp, "test") defer d.Destroy() @@ -87,8 +81,9 @@ func TestAllocDir_BuildAlloc(t *testing.T) { } // HACK: This function is copy/pasted from client.testutil to prevent a test -// import cycle, due to testutil transitively importing allocdir. This -// should be fixed after DriverManager is implemented. +// +// import cycle, due to testutil transitively importing allocdir. This +// should be fixed after DriverManager is implemented. func MountCompatible(t *testing.T) { if runtime.GOOS == "windows" { t.Skip("Windows does not support mount") @@ -103,11 +98,7 @@ func TestAllocDir_MountSharedAlloc(t *testing.T) { ci.Parallel(t) MountCompatible(t) - tmp, err := ioutil.TempDir("", "AllocDir") - if err != nil { - t.Fatalf("Couldn't create temp dir: %v", err) - } - defer os.RemoveAll(tmp) + tmp := t.TempDir() d := NewAllocDir(testlog.HCLogger(t), tmp, "test") defer d.Destroy() @@ -128,14 +119,14 @@ func TestAllocDir_MountSharedAlloc(t *testing.T) { // Write a file to the shared dir. contents := []byte("foo") const filename = "bar" - if err := ioutil.WriteFile(filepath.Join(d.SharedDir, filename), contents, 0666); err != nil { + if err := os.WriteFile(filepath.Join(d.SharedDir, filename), contents, 0666); err != nil { t.Fatalf("Couldn't write file to shared directory: %v", err) } // Check that the file exists in the task directories for _, td := range []*TaskDir{td1, td2} { taskFile := filepath.Join(td.SharedTaskDir, filename) - act, err := ioutil.ReadFile(taskFile) + act, err := os.ReadFile(taskFile) if err != nil { t.Errorf("Failed to read shared alloc file from task dir: %v", err) continue @@ -150,11 +141,7 @@ func TestAllocDir_MountSharedAlloc(t *testing.T) { func TestAllocDir_Snapshot(t *testing.T) { ci.Parallel(t) - tmp, err := ioutil.TempDir("", "AllocDir") - if err != nil { - t.Fatalf("Couldn't create temp dir: %v", err) - } - defer os.RemoveAll(tmp) + tmp := t.TempDir() d := NewAllocDir(testlog.HCLogger(t), tmp, "test") defer d.Destroy() @@ -175,7 +162,7 @@ func TestAllocDir_Snapshot(t *testing.T) { // Write a file to the shared dir. exp := []byte{'f', 'o', 'o'} file := "bar" - if err := ioutil.WriteFile(filepath.Join(d.SharedDir, "data", file), exp, 0666); err != nil { + if err := os.WriteFile(filepath.Join(d.SharedDir, "data", file), exp, 0666); err != nil { t.Fatalf("Couldn't write file to shared directory: %v", err) } @@ -188,7 +175,7 @@ func TestAllocDir_Snapshot(t *testing.T) { // Write a file to the task local exp = []byte{'b', 'a', 'r'} file1 := "lol" - if err := ioutil.WriteFile(filepath.Join(td1.LocalDir, file1), exp, 0666); err != nil { + if err := os.WriteFile(filepath.Join(td1.LocalDir, file1), exp, 0666); err != nil { t.Fatalf("couldn't write file to task local directory: %v", err) } @@ -232,17 +219,8 @@ func TestAllocDir_Snapshot(t *testing.T) { func TestAllocDir_Move(t *testing.T) { ci.Parallel(t) - tmp1, err := ioutil.TempDir("", "AllocDir") - if err != nil { - t.Fatalf("Couldn't create temp dir: %v", err) - } - defer os.RemoveAll(tmp1) - - tmp2, err := ioutil.TempDir("", "AllocDir") - if err != nil { - t.Fatalf("Couldn't create temp dir: %v", err) - } - defer os.RemoveAll(tmp2) + tmp1 := t.TempDir() + tmp2 := t.TempDir() // Create two alloc dirs d1 := NewAllocDir(testlog.HCLogger(t), tmp1, "test") @@ -271,14 +249,14 @@ func TestAllocDir_Move(t *testing.T) { // Write a file to the shared dir. exp1 := []byte("foo") file1 := "bar" - if err := ioutil.WriteFile(filepath.Join(dataDir, file1), exp1, 0666); err != nil { + if err := os.WriteFile(filepath.Join(dataDir, file1), exp1, 0666); err != nil { t.Fatalf("Couldn't write file to shared directory: %v", err) } // Write a file to the task local exp2 := []byte("bar") file2 := "lol" - if err := ioutil.WriteFile(filepath.Join(td1.LocalDir, file2), exp2, 0666); err != nil { + if err := os.WriteFile(filepath.Join(td1.LocalDir, file2), exp2, 0666); err != nil { t.Fatalf("couldn't write to task local directory: %v", err) } @@ -302,11 +280,7 @@ func TestAllocDir_Move(t *testing.T) { func TestAllocDir_EscapeChecking(t *testing.T) { ci.Parallel(t) - tmp, err := ioutil.TempDir("", "AllocDir") - if err != nil { - t.Fatalf("Couldn't create temp dir: %v", err) - } - defer os.RemoveAll(tmp) + tmp := t.TempDir() d := NewAllocDir(testlog.HCLogger(t), tmp, "test") if err := d.Build(); err != nil { @@ -362,7 +336,7 @@ func TestAllocDir_ReadAt_SecretDir(t *testing.T) { // create target file in the task secrets dir full := filepath.Join(d.AllocDir, target) - err = ioutil.WriteFile(full, []byte("hi"), 0600) + err = os.WriteFile(full, []byte("hi"), 0600) require.NoError(t, err) // ReadAt of a file in the task secrets dir should fail @@ -373,11 +347,7 @@ func TestAllocDir_ReadAt_SecretDir(t *testing.T) { func TestAllocDir_SplitPath(t *testing.T) { ci.Parallel(t) - dir, err := ioutil.TempDir("", "tmpdirtest") - if err != nil { - log.Fatal(err) - } - defer os.RemoveAll(dir) + dir := t.TempDir() dest := filepath.Join(dir, "/foo/bar/baz") if err := os.MkdirAll(dest, os.ModePerm); err != nil { @@ -401,11 +371,7 @@ func TestAllocDir_CreateDir(t *testing.T) { t.Skip("Must be root to run test") } - dir, err := ioutil.TempDir("", "tmpdirtest") - if err != nil { - t.Fatalf("err: %v", err) - } - defer os.RemoveAll(dir) + dir := t.TempDir() // create a subdir and a file subdir := filepath.Join(dir, "subdir") @@ -418,10 +384,7 @@ func TestAllocDir_CreateDir(t *testing.T) { } // Create the above hierarchy under another destination - dir1, err := ioutil.TempDir("/tmp", "tempdirdest") - if err != nil { - t.Fatalf("err: %v", err) - } + dir1 := t.TempDir() if err := createDir(dir1, subdir); err != nil { t.Fatalf("err: %v", err) @@ -440,11 +403,7 @@ func TestAllocDir_CreateDir(t *testing.T) { func TestPathFuncs(t *testing.T) { ci.Parallel(t) - dir, err := ioutil.TempDir("", "nomadtest-pathfuncs") - if err != nil { - t.Fatalf("error creating temp dir: %v", err) - } - defer os.RemoveAll(dir) + dir := t.TempDir() missingDir := filepath.Join(dir, "does-not-exist") diff --git a/client/allocdir/fs_linux_test.go b/client/allocdir/fs_linux_test.go index e8087086e4a..95a8b7f047c 100644 --- a/client/allocdir/fs_linux_test.go +++ b/client/allocdir/fs_linux_test.go @@ -4,7 +4,6 @@ import ( "bufio" "fmt" "io" - "io/ioutil" "os" "path/filepath" "strings" @@ -55,13 +54,7 @@ func TestLinuxRootSecretDir(t *testing.T) { t.Skip("Must be run as root") } - tmpdir, err := ioutil.TempDir("", "nomadtest-rootsecretdir") - if err != nil { - t.Fatalf("unable to create tempdir for test: %v", err) - } - defer os.RemoveAll(tmpdir) - - secretsDir := filepath.Join(tmpdir, TaskSecrets) + secretsDir := filepath.Join(t.TempDir(), TaskSecrets) // removing a nonexistent secrets dir should NOT error if err := removeSecretDir(secretsDir); err != nil { @@ -117,13 +110,7 @@ func TestLinuxUnprivilegedSecretDir(t *testing.T) { t.Skip("Must not be run as root") } - tmpdir, err := ioutil.TempDir("", "nomadtest-secretdir") - if err != nil { - t.Fatalf("unable to create tempdir for test: %s", err) - } - defer os.RemoveAll(tmpdir) - - secretsDir := filepath.Join(tmpdir, TaskSecrets) + secretsDir := filepath.Join(t.TempDir(), TaskSecrets) // removing a nonexistent secrets dir should NOT error if err := removeSecretDir(secretsDir); err != nil { diff --git a/client/allocdir/task_dir.go b/client/allocdir/task_dir.go index d516c313cf1..187b820050f 100644 --- a/client/allocdir/task_dir.go +++ b/client/allocdir/task_dir.go @@ -2,7 +2,6 @@ package allocdir import ( "fmt" - "io/ioutil" "os" "path/filepath" @@ -184,12 +183,16 @@ func (t *TaskDir) embedDirs(entries map[string]string) error { } // Enumerate the files in source. - dirEntries, err := ioutil.ReadDir(source) + dirEntries, err := os.ReadDir(source) if err != nil { return fmt.Errorf("Couldn't read directory %v: %v", source, err) } - for _, entry := range dirEntries { + for _, fileEntry := range dirEntries { + entry, err := fileEntry.Info() + if err != nil { + return fmt.Errorf("Couldn't read the file information %v: %v", entry, err) + } hostEntry := filepath.Join(source, entry.Name()) taskEntry := filepath.Join(destDir, filepath.Base(hostEntry)) if entry.IsDir() { diff --git a/client/allocdir/task_dir_test.go b/client/allocdir/task_dir_test.go index 5ae12404ba4..885ea9d7e24 100644 --- a/client/allocdir/task_dir_test.go +++ b/client/allocdir/task_dir_test.go @@ -1,7 +1,6 @@ package allocdir import ( - "io/ioutil" "os" "path/filepath" "testing" @@ -14,11 +13,7 @@ import ( func TestTaskDir_EmbedNonexistent(t *testing.T) { ci.Parallel(t) - tmp, err := ioutil.TempDir("", "AllocDir") - if err != nil { - t.Fatalf("Couldn't create temp dir: %v", err) - } - defer os.RemoveAll(tmp) + tmp := t.TempDir() d := NewAllocDir(testlog.HCLogger(t), tmp, "test") defer d.Destroy() @@ -38,11 +33,7 @@ func TestTaskDir_EmbedNonexistent(t *testing.T) { func TestTaskDir_EmbedDirs(t *testing.T) { ci.Parallel(t) - tmp, err := ioutil.TempDir("", "AllocDir") - if err != nil { - t.Fatalf("Couldn't create temp dir: %v", err) - } - defer os.RemoveAll(tmp) + tmp := t.TempDir() d := NewAllocDir(testlog.HCLogger(t), tmp, "test") defer d.Destroy() @@ -53,11 +44,7 @@ func TestTaskDir_EmbedDirs(t *testing.T) { // Create a fake host directory, with a file, and a subfolder that contains // a file. - host, err := ioutil.TempDir("", "AllocDirHost") - if err != nil { - t.Fatalf("Couldn't create temp dir: %v", err) - } - defer os.RemoveAll(host) + host := t.TempDir() subDirName := "subdir" subDir := filepath.Join(host, subDirName) @@ -67,11 +54,11 @@ func TestTaskDir_EmbedDirs(t *testing.T) { file := "foo" subFile := "bar" - if err := ioutil.WriteFile(filepath.Join(host, file), []byte{'a'}, 0777); err != nil { + if err := os.WriteFile(filepath.Join(host, file), []byte{'a'}, 0777); err != nil { t.Fatalf("Couldn't create file in host dir %v: %v", host, err) } - if err := ioutil.WriteFile(filepath.Join(subDir, subFile), []byte{'a'}, 0777); err != nil { + if err := os.WriteFile(filepath.Join(subDir, subFile), []byte{'a'}, 0777); err != nil { t.Fatalf("Couldn't create file in host subdir %v: %v", subDir, err) } @@ -96,11 +83,7 @@ func TestTaskDir_NonRoot_Image(t *testing.T) { if os.Geteuid() == 0 { t.Skip("test should be run as non-root user") } - tmp, err := ioutil.TempDir("", "AllocDir") - if err != nil { - t.Fatalf("Couldn't create temp dir: %v", err) - } - defer os.RemoveAll(tmp) + tmp := t.TempDir() d := NewAllocDir(testlog.HCLogger(t), tmp, "test") defer d.Destroy() @@ -121,11 +104,7 @@ func TestTaskDir_NonRoot(t *testing.T) { t.Skip("test should be run as non-root user") } - tmp, err := ioutil.TempDir("", "AllocDir") - if err != nil { - t.Fatalf("Couldn't create temp dir: %v", err) - } - defer os.RemoveAll(tmp) + tmp := t.TempDir() d := NewAllocDir(testlog.HCLogger(t), tmp, "test") defer d.Destroy() @@ -139,7 +118,7 @@ func TestTaskDir_NonRoot(t *testing.T) { } // ${TASK_DIR}/alloc should not exist! - if _, err = os.Stat(td.SharedTaskDir); !os.IsNotExist(err) { + if _, err := os.Stat(td.SharedTaskDir); !os.IsNotExist(err) { t.Fatalf("Expected a NotExist error for shared alloc dir in task dir: %q", td.SharedTaskDir) } } diff --git a/client/allocdir/testing.go b/client/allocdir/testing.go index 6ea7d7bb990..c534a99d39c 100644 --- a/client/allocdir/testing.go +++ b/client/allocdir/testing.go @@ -1,7 +1,6 @@ package allocdir import ( - "io/ioutil" "os" hclog "github.com/hashicorp/go-hclog" @@ -11,7 +10,7 @@ import ( // TestAllocDir returns a built alloc dir in a temporary directory and cleanup // func. func TestAllocDir(t testing.T, l hclog.Logger, prefix, id string) (*AllocDir, func()) { - dir, err := ioutil.TempDir("", prefix) + dir, err := os.MkdirTemp("", prefix) if err != nil { t.Fatalf("Couldn't create temp dir: %v", err) } diff --git a/client/allochealth/tracker.go b/client/allochealth/tracker.go index 2bd727180f7..d4e0596ce78 100644 --- a/client/allochealth/tracker.go +++ b/client/allochealth/tracker.go @@ -171,7 +171,7 @@ func (t *Tracker) TaskEvents() map[string]*structs.TaskEvent { // Go through are task information and build the event map for task, state := range t.taskHealth { useChecks := t.tg.Update.HealthCheck == structs.UpdateStrategyHealthCheck_Checks - if e, ok := state.event(deadline, t.tg.Update.MinHealthyTime, useChecks); ok { + if e, ok := state.event(deadline, t.tg.Update.HealthyDeadline, t.tg.Update.MinHealthyTime, useChecks); ok { events[task] = structs.NewTaskEvent(AllocHealthEventSource).SetMessage(e) } } @@ -210,6 +210,10 @@ func (t *Tracker) setTaskHealth(healthy, terminal bool) { // setCheckHealth is used to mark the checks as either healthy or unhealthy. // returns true if health is propagated and no more health monitoring is needed +// +// todo: this is currently being shared by watchConsulEvents and watchNomadEvents, +// and must be split up if/when we support registering services (and thus checks) +// of different providers. func (t *Tracker) setCheckHealth(healthy bool) bool { t.l.Lock() defer t.l.Unlock() @@ -487,7 +491,7 @@ type taskHealthState struct { // event takes the deadline time for the allocation to be healthy and the update // strategy of the group. It returns true if the task has contributed to the // allocation being unhealthy and if so, an event description of why. -func (t *taskHealthState) event(deadline time.Time, minHealthyTime time.Duration, useChecks bool) (string, bool) { +func (t *taskHealthState) event(deadline time.Time, healthyDeadline, minHealthyTime time.Duration, useChecks bool) (string, bool) { requireChecks := false desiredChecks := 0 for _, s := range t.task.Services { @@ -505,7 +509,7 @@ func (t *taskHealthState) event(deadline time.Time, minHealthyTime time.Duration switch t.state.State { case structs.TaskStatePending: - return "Task not running by deadline", true + return fmt.Sprintf("Task not running by healthy_deadline of %v", healthyDeadline), true case structs.TaskStateDead: // hook tasks are healthy when dead successfully if t.task.Lifecycle == nil || t.task.Lifecycle.Sidecar { @@ -514,7 +518,7 @@ func (t *taskHealthState) event(deadline time.Time, minHealthyTime time.Duration case structs.TaskStateRunning: // We are running so check if we have been running long enough if t.state.StartedAt.Add(minHealthyTime).After(deadline) { - return fmt.Sprintf("Task not running for min_healthy_time of %v by deadline", minHealthyTime), true + return fmt.Sprintf("Task not running for min_healthy_time of %v by healthy_deadline of %v", minHealthyTime, healthyDeadline), true } } } diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 32b8d4e0246..911dcc63aa4 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -11,6 +11,7 @@ import ( "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/allocrunner/interfaces" "github.com/hashicorp/nomad/client/allocrunner/state" + "github.com/hashicorp/nomad/client/allocrunner/tasklifecycle" "github.com/hashicorp/nomad/client/allocrunner/taskrunner" "github.com/hashicorp/nomad/client/allocwatcher" "github.com/hashicorp/nomad/client/config" @@ -26,8 +27,7 @@ import ( cstate "github.com/hashicorp/nomad/client/state" cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/client/vaultclient" - agentconsul "github.com/hashicorp/nomad/command/agent/consul" - "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/plugins/device" "github.com/hashicorp/nomad/plugins/drivers" @@ -170,7 +170,9 @@ type allocRunner struct { // restore. serversContactedCh chan struct{} - taskHookCoordinator *taskHookCoordinator + // taskCoordinator is used to controlled when tasks are allowed to run + // depending on their lifecycle configuration. + taskCoordinator *tasklifecycle.Coordinator shutdownDelayCtx context.Context shutdownDelayCancelFn context.CancelFunc @@ -182,6 +184,9 @@ type allocRunner struct { // serviceRegWrapper is the handler wrapper that is used by service hooks // to perform service and check registration and deregistration. serviceRegWrapper *wrapper.HandlerWrapper + + // getter is an interface for retrieving artifacts. + getter cinterfaces.ArtifactGetter } // RPCer is the interface needed by hooks to make RPC calls. @@ -226,6 +231,7 @@ func NewAllocRunner(config *Config) (*allocRunner, error) { serversContactedCh: config.ServersContactedCh, rpcClient: config.RPCClient, serviceRegWrapper: config.ServiceRegWrapper, + getter: config.Getter, } // Create the logger based on the allocation ID @@ -237,7 +243,7 @@ func NewAllocRunner(config *Config) (*allocRunner, error) { // Create alloc dir ar.allocDir = allocdir.NewAllocDir(ar.logger, config.ClientConfig.AllocDir, alloc.ID) - ar.taskHookCoordinator = newTaskHookCoordinator(ar.logger, tg.Tasks) + ar.taskCoordinator = tasklifecycle.NewCoordinator(ar.logger, tg.Tasks, ar.waitCh) shutdownDelayCtx, shutdownDelayCancel := context.WithCancel(context.Background()) ar.shutdownDelayCtx = shutdownDelayCtx @@ -260,26 +266,27 @@ func NewAllocRunner(config *Config) (*allocRunner, error) { func (ar *allocRunner) initTaskRunners(tasks []*structs.Task) error { for _, task := range tasks { trConfig := &taskrunner.Config{ - Alloc: ar.alloc, - ClientConfig: ar.clientConfig, - Task: task, - TaskDir: ar.allocDir.NewTaskDir(task.Name), - Logger: ar.logger, - StateDB: ar.stateDB, - StateUpdater: ar, - DynamicRegistry: ar.dynamicRegistry, - Consul: ar.consulClient, - ConsulProxies: ar.consulProxiesClient, - ConsulSI: ar.sidsClient, - Vault: ar.vaultClient, - DeviceStatsReporter: ar.deviceStatsReporter, - CSIManager: ar.csiManager, - DeviceManager: ar.devicemanager, - DriverManager: ar.driverManager, - ServersContactedCh: ar.serversContactedCh, - StartConditionMetCtx: ar.taskHookCoordinator.startConditionForTask(task), - ShutdownDelayCtx: ar.shutdownDelayCtx, - ServiceRegWrapper: ar.serviceRegWrapper, + Alloc: ar.alloc, + ClientConfig: ar.clientConfig, + Task: task, + TaskDir: ar.allocDir.NewTaskDir(task.Name), + Logger: ar.logger, + StateDB: ar.stateDB, + StateUpdater: ar, + DynamicRegistry: ar.dynamicRegistry, + Consul: ar.consulClient, + ConsulProxies: ar.consulProxiesClient, + ConsulSI: ar.sidsClient, + Vault: ar.vaultClient, + DeviceStatsReporter: ar.deviceStatsReporter, + CSIManager: ar.csiManager, + DeviceManager: ar.devicemanager, + DriverManager: ar.driverManager, + ServersContactedCh: ar.serversContactedCh, + StartConditionMetCh: ar.taskCoordinator.StartConditionForTask(task), + ShutdownDelayCtx: ar.shutdownDelayCtx, + ServiceRegWrapper: ar.serviceRegWrapper, + Getter: ar.getter, } if ar.cpusetManager != nil { @@ -377,26 +384,12 @@ func (ar *allocRunner) shouldRun() bool { // runTasks is used to run the task runners and block until they exit. func (ar *allocRunner) runTasks() { - // Start all tasks + // Start and wait for all tasks. for _, task := range ar.tasks { go task.Run() } - - // Block on all tasks except poststop tasks for _, task := range ar.tasks { - if !task.IsPoststopTask() { - <-task.WaitCh() - } - } - - // Signal poststop tasks to proceed to main runtime - ar.taskHookCoordinator.StartPoststopTasks() - - // Wait for poststop tasks to finish before proceeding - for _, task := range ar.tasks { - if task.IsPoststopTask() { - <-task.WaitCh() - } + <-task.WaitCh() } } @@ -450,7 +443,7 @@ func (ar *allocRunner) Restore() error { states[tr.Task().Name] = tr.TaskState() } - ar.taskHookCoordinator.taskStateUpdated(states) + ar.taskCoordinator.Restore(states) return nil } @@ -520,21 +513,21 @@ func (ar *allocRunner) handleTaskStateUpdates() { states := make(map[string]*structs.TaskState, trNum) for name, tr := range ar.tasks { - state := tr.TaskState() - states[name] = state + taskState := tr.TaskState() + states[name] = taskState if tr.IsPoststopTask() { continue } // Capture live task runners in case we need to kill them - if state.State != structs.TaskStateDead { + if taskState.State != structs.TaskStateDead { liveRunners = append(liveRunners, tr) continue } // Task is dead, determine if other tasks should be killed - if state.Failed { + if taskState.Failed { // Only set failed event if no event has been // set yet to give dead leaders priority. if killEvent == nil { @@ -547,45 +540,69 @@ func (ar *allocRunner) handleTaskStateUpdates() { } } - // if all live runners are sidecars - kill alloc - if killEvent == nil && hasSidecars && !hasNonSidecarTasks(liveRunners) { - killEvent = structs.NewTaskEvent(structs.TaskMainDead) - } - - // If there's a kill event set and live runners, kill them - if killEvent != nil && len(liveRunners) > 0 { - - // Log kill reason - switch killEvent.Type { - case structs.TaskLeaderDead: - ar.logger.Debug("leader task dead, destroying all tasks", "leader_task", killTask) - case structs.TaskMainDead: - ar.logger.Debug("main tasks dead, destroying all sidecar tasks") - default: - ar.logger.Debug("task failure, destroying all tasks", "failed_task", killTask) + if len(liveRunners) > 0 { + // if all live runners are sidecars - kill alloc + onlySidecarsRemaining := hasSidecars && !hasNonSidecarTasks(liveRunners) + if killEvent == nil && onlySidecarsRemaining { + killEvent = structs.NewTaskEvent(structs.TaskMainDead) } - // Emit kill event for live runners - for _, tr := range liveRunners { - tr.EmitEvent(killEvent) - } + // If there's a kill event set and live runners, kill them + if killEvent != nil { + + // Log kill reason + switch killEvent.Type { + case structs.TaskLeaderDead: + ar.logger.Debug("leader task dead, destroying all tasks", "leader_task", killTask) + case structs.TaskMainDead: + ar.logger.Debug("main tasks dead, destroying all sidecar tasks") + default: + ar.logger.Debug("task failure, destroying all tasks", "failed_task", killTask) + } - // Kill 'em all - states = ar.killTasks() + // Emit kill event for live runners + for _, tr := range liveRunners { + tr.EmitEvent(killEvent) + } + + // Kill 'em all + states = ar.killTasks() + + // Wait for TaskRunners to exit before continuing. This will + // prevent looping before TaskRunners have transitioned to + // Dead. + for _, tr := range liveRunners { + ar.logger.Info("waiting for task to exit", "task", tr.Task().Name) + select { + case <-tr.WaitCh(): + case <-ar.waitCh: + } + } + } + } else { + // If there are no live runners left kill all non-poststop task + // runners to unblock them from the alloc restart loop. + for _, tr := range ar.tasks { + if tr.IsPoststopTask() { + continue + } - // Wait for TaskRunners to exit before continuing to - // prevent looping before TaskRunners have transitioned - // to Dead. - for _, tr := range liveRunners { - ar.logger.Info("killing task", "task", tr.Task().Name) select { case <-tr.WaitCh(): case <-ar.waitCh: + default: + // Kill task runner without setting an event because the + // task is already dead, it's just waiting in the alloc + // restart loop. + err := tr.Kill(context.TODO(), nil) + if err != nil { + ar.logger.Warn("failed to kill task", "task", tr.Task().Name, "error", err) + } } } } - ar.taskHookCoordinator.taskStateUpdated(states) + ar.taskCoordinator.TaskStateUpdated(states) // Get the client allocation calloc := ar.clientAlloc(states) @@ -598,6 +615,28 @@ func (ar *allocRunner) handleTaskStateUpdates() { } } +// hasNonSidecarTasks returns false if all the passed tasks are sidecar tasks +func hasNonSidecarTasks(tasks []*taskrunner.TaskRunner) bool { + for _, tr := range tasks { + if !tr.IsSidecarTask() { + return true + } + } + + return false +} + +// hasSidecarTasks returns true if any of the passed tasks are sidecar tasks +func hasSidecarTasks(tasks map[string]*taskrunner.TaskRunner) bool { + for _, tr := range tasks { + if tr.IsSidecarTask() { + return true + } + } + + return false +} + // killTasks kills all task runners, leader (if there is one) first. Errors are // logged except taskrunner.ErrTaskNotRunning which is ignored. Task states // after Kill has been called are returned. @@ -615,22 +654,22 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState { } taskEvent := structs.NewTaskEvent(structs.TaskKilling) - taskEvent.SetKillTimeout(tr.Task().KillTimeout) + taskEvent.SetKillTimeout(tr.Task().KillTimeout, ar.clientConfig.MaxKillTimeout) err := tr.Kill(context.TODO(), taskEvent) if err != nil && err != taskrunner.ErrTaskNotRunning { ar.logger.Warn("error stopping leader task", "error", err, "task_name", name) } - state := tr.TaskState() - states[name] = state + taskState := tr.TaskState() + states[name] = taskState break } - // Kill the rest concurrently + // Kill the rest non-sidecar and non-poststop tasks concurrently wg := sync.WaitGroup{} for name, tr := range ar.tasks { - // Filter out poststop tasks so they run after all the other tasks are killed - if tr.IsLeader() || tr.IsPoststopTask() { + // Filter out poststop and sidecar tasks so that they stop after all the other tasks are killed + if tr.IsLeader() || tr.IsPoststopTask() || tr.IsSidecarTask() { continue } @@ -638,15 +677,39 @@ func (ar *allocRunner) killTasks() map[string]*structs.TaskState { go func(name string, tr *taskrunner.TaskRunner) { defer wg.Done() taskEvent := structs.NewTaskEvent(structs.TaskKilling) - taskEvent.SetKillTimeout(tr.Task().KillTimeout) + taskEvent.SetKillTimeout(tr.Task().KillTimeout, ar.clientConfig.MaxKillTimeout) err := tr.Kill(context.TODO(), taskEvent) if err != nil && err != taskrunner.ErrTaskNotRunning { ar.logger.Warn("error stopping task", "error", err, "task_name", name) } - state := tr.TaskState() + taskState := tr.TaskState() mu.Lock() - states[name] = state + states[name] = taskState + mu.Unlock() + }(name, tr) + } + wg.Wait() + + // Kill the sidecar tasks last. + for name, tr := range ar.tasks { + if !tr.IsSidecarTask() || tr.IsLeader() || tr.IsPoststopTask() { + continue + } + + wg.Add(1) + go func(name string, tr *taskrunner.TaskRunner) { + defer wg.Done() + taskEvent := structs.NewTaskEvent(structs.TaskKilling) + taskEvent.SetKillTimeout(tr.Task().KillTimeout, ar.clientConfig.MaxKillTimeout) + err := tr.Kill(context.TODO(), taskEvent) + if err != nil && err != taskrunner.ErrTaskNotRunning { + ar.logger.Warn("error stopping sidecar task", "error", err, "task_name", name) + } + + taskState := tr.TaskState() + mu.Lock() + states[name] = taskState mu.Unlock() }(name, tr) } @@ -692,7 +755,7 @@ func (ar *allocRunner) clientAlloc(taskStates map[string]*structs.TaskState) *st if a.ClientStatus == structs.AllocClientStatusFailed && alloc.DeploymentID != "" && !a.DeploymentStatus.HasHealth() { a.DeploymentStatus = &structs.AllocDeploymentStatus{ - Healthy: helper.BoolToPtr(false), + Healthy: pointer.Of(false), } } @@ -1159,19 +1222,37 @@ func (ar *allocRunner) GetTaskEventHandler(taskName string) drivermanager.EventH return nil } -// RestartTask signalls the task runner for the provided task to restart. -func (ar *allocRunner) RestartTask(taskName string, taskEvent *structs.TaskEvent) error { +// Restart satisfies the WorkloadRestarter interface and restarts all tasks +// that are currently running. +func (ar *allocRunner) Restart(ctx context.Context, event *structs.TaskEvent, failure bool) error { + return ar.restartTasks(ctx, event, failure, false) +} + +// RestartTask restarts the provided task. +func (ar *allocRunner) RestartTask(taskName string, event *structs.TaskEvent) error { tr, ok := ar.tasks[taskName] if !ok { return fmt.Errorf("Could not find task runner for task: %s", taskName) } - return tr.Restart(context.TODO(), taskEvent, false) + return tr.Restart(context.TODO(), event, false) } -// Restart satisfies the WorkloadRestarter interface restarts all task runners -// concurrently -func (ar *allocRunner) Restart(ctx context.Context, event *structs.TaskEvent, failure bool) error { +// RestartRunning restarts all tasks that are currently running. +func (ar *allocRunner) RestartRunning(event *structs.TaskEvent) error { + return ar.restartTasks(context.TODO(), event, false, false) +} + +// RestartAll restarts all tasks in the allocation, including dead ones. They +// will restart following their lifecycle order. +func (ar *allocRunner) RestartAll(event *structs.TaskEvent) error { + // Restart the taskCoordinator to allow dead tasks to run again. + ar.taskCoordinator.Restart() + return ar.restartTasks(context.TODO(), event, false, true) +} + +// restartTasks restarts all task runners concurrently. +func (ar *allocRunner) restartTasks(ctx context.Context, event *structs.TaskEvent, failure bool, force bool) error { waitCh := make(chan struct{}) var err *multierror.Error var errMutex sync.Mutex @@ -1184,10 +1265,19 @@ func (ar *allocRunner) Restart(ctx context.Context, event *structs.TaskEvent, fa defer close(waitCh) for tn, tr := range ar.tasks { wg.Add(1) - go func(taskName string, r agentconsul.WorkloadRestarter) { + go func(taskName string, taskRunner *taskrunner.TaskRunner) { defer wg.Done() - e := r.Restart(ctx, event, failure) - if e != nil { + + var e error + if force { + e = taskRunner.ForceRestart(ctx, event.Copy(), failure) + } else { + e = taskRunner.Restart(ctx, event.Copy(), failure) + } + + // Ignore ErrTaskNotRunning errors since tasks that are not + // running are expected to not be restarted. + if e != nil && e != taskrunner.ErrTaskNotRunning { errMutex.Lock() defer errMutex.Unlock() err = multierror.Append(err, fmt.Errorf("failed to restart task %s: %v", taskName, e)) @@ -1205,25 +1295,6 @@ func (ar *allocRunner) Restart(ctx context.Context, event *structs.TaskEvent, fa return err.ErrorOrNil() } -// RestartAll signalls all task runners in the allocation to restart and passes -// a copy of the task event to each restart event. -// Returns any errors in a concatenated form. -func (ar *allocRunner) RestartAll(taskEvent *structs.TaskEvent) error { - var err *multierror.Error - - // run alloc task restart hooks - ar.taskRestartHooks() - - for tn := range ar.tasks { - rerr := ar.RestartTask(tn, taskEvent.Copy()) - if rerr != nil { - err = multierror.Append(err, rerr) - } - } - - return err.ErrorOrNil() -} - // Signal sends a signal request to task runners inside an allocation. If the // taskName is empty, then it is sent to all tasks. func (ar *allocRunner) Signal(taskName, signal string) error { diff --git a/client/allocrunner/alloc_runner_hooks.go b/client/allocrunner/alloc_runner_hooks.go index 30611b394aa..d42c9993d6b 100644 --- a/client/allocrunner/alloc_runner_hooks.go +++ b/client/allocrunner/alloc_runner_hooks.go @@ -161,7 +161,7 @@ func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error { logger: hookLogger, shutdownDelayCtx: ar.shutdownDelayCtx, }), - newConsulGRPCSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig), + newConsulGRPCSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig, config.Node.Attributes), newConsulHTTPSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig), newCSIHook(alloc, hookLogger, ar.csiManager, ar.rpcClient, ar, hrs, ar.clientConfig.Node.SecretID), } diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go index 510d19940dd..4b604c6a4da 100644 --- a/client/allocrunner/alloc_runner_test.go +++ b/client/allocrunner/alloc_runner_test.go @@ -3,15 +3,17 @@ package allocrunner import ( "errors" "fmt" - "io/ioutil" "os" "path/filepath" "testing" "time" "github.com/hashicorp/consul/api" + multierror "github.com/hashicorp/go-multierror" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/allochealth" + "github.com/hashicorp/nomad/client/allocrunner/tasklifecycle" + "github.com/hashicorp/nomad/client/allocrunner/taskrunner" "github.com/hashicorp/nomad/client/allocwatcher" "github.com/hashicorp/nomad/client/serviceregistration" regMock "github.com/hashicorp/nomad/client/serviceregistration/mock" @@ -126,7 +128,7 @@ func TestAllocRunner_TaskLeader_KillTG(t *testing.T) { expectedKillingMsg := "Sent interrupt. Waiting 10ms before force killing" if killingMsg != expectedKillingMsg { - return false, fmt.Errorf("Unexpected task event message - wanted %q. got %q", killingMsg, expectedKillingMsg) + return false, fmt.Errorf("Unexpected task event message - wanted %q. got %q", expectedKillingMsg, killingMsg) } // Task Two should be dead @@ -481,6 +483,464 @@ func TestAllocRunner_Lifecycle_Poststop(t *testing.T) { } +func TestAllocRunner_Lifecycle_Restart(t *testing.T) { + ci.Parallel(t) + + // test cases can use this default or override w/ taskDefs param + alloc := mock.LifecycleAllocFromTasks([]mock.LifecycleTaskDef{ + {Name: "main", RunFor: "100s", ExitCode: 0, Hook: "", IsSidecar: false}, + {Name: "prestart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "prestart", IsSidecar: false}, + {Name: "prestart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "prestart", IsSidecar: true}, + {Name: "poststart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "poststart", IsSidecar: false}, + {Name: "poststart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "poststart", IsSidecar: true}, + {Name: "poststop", RunFor: "1s", ExitCode: 0, Hook: "poststop", IsSidecar: false}, + }) + alloc.Job.Type = structs.JobTypeService + rp := &structs.RestartPolicy{ + Attempts: 1, + Interval: 10 * time.Minute, + Delay: 1 * time.Nanosecond, + Mode: structs.RestartPolicyModeFail, + } + + ev := &structs.TaskEvent{Type: structs.TaskRestartSignal} + + testCases := []struct { + name string + taskDefs []mock.LifecycleTaskDef + isBatch bool + hasLeader bool + action func(*allocRunner, *structs.Allocation) error + expectedErr string + expectedAfter map[string]structs.TaskState + }{ + { + name: "restart entire allocation", + action: func(ar *allocRunner, alloc *structs.Allocation) error { + return ar.RestartAll(ev) + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "running", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 1}, + "prestart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 1}, + "poststart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststop": structs.TaskState{State: "pending", Restarts: 0}, + }, + }, + { + name: "restart only running tasks", + action: func(ar *allocRunner, alloc *structs.Allocation) error { + return ar.RestartRunning(ev) + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "running", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststop": structs.TaskState{State: "pending", Restarts: 0}, + }, + }, + { + name: "batch job restart entire allocation", + taskDefs: []mock.LifecycleTaskDef{ + {Name: "main", RunFor: "100s", ExitCode: 1, Hook: "", IsSidecar: false}, + {Name: "prestart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "prestart", IsSidecar: false}, + {Name: "prestart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "prestart", IsSidecar: true}, + {Name: "poststart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "poststart", IsSidecar: false}, + {Name: "poststart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "poststart", IsSidecar: true}, + {Name: "poststop", RunFor: "1s", ExitCode: 0, Hook: "poststop", IsSidecar: false}, + }, + isBatch: true, + action: func(ar *allocRunner, alloc *structs.Allocation) error { + return ar.RestartAll(ev) + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "running", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 1}, + "prestart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 1}, + "poststart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststop": structs.TaskState{State: "pending", Restarts: 0}, + }, + }, + { + name: "batch job restart only running tasks ", + taskDefs: []mock.LifecycleTaskDef{ + {Name: "main", RunFor: "100s", ExitCode: 1, Hook: "", IsSidecar: false}, + {Name: "prestart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "prestart", IsSidecar: false}, + {Name: "prestart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "prestart", IsSidecar: true}, + {Name: "poststart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "poststart", IsSidecar: false}, + {Name: "poststart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "poststart", IsSidecar: true}, + {Name: "poststop", RunFor: "1s", ExitCode: 0, Hook: "poststop", IsSidecar: false}, + }, + isBatch: true, + action: func(ar *allocRunner, alloc *structs.Allocation) error { + return ar.RestartRunning(ev) + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "running", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststop": structs.TaskState{State: "pending", Restarts: 0}, + }, + }, + { + name: "restart entire allocation with leader", + hasLeader: true, + action: func(ar *allocRunner, alloc *structs.Allocation) error { + return ar.RestartAll(ev) + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "running", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 1}, + "prestart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 1}, + "poststart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststop": structs.TaskState{State: "pending", Restarts: 0}, + }, + }, + { + name: "stop from server", + action: func(ar *allocRunner, alloc *structs.Allocation) error { + stopAlloc := alloc.Copy() + stopAlloc.DesiredStatus = structs.AllocDesiredStatusStop + ar.Update(stopAlloc) + return nil + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststop": structs.TaskState{State: "dead", Restarts: 0}, + }, + }, + { + name: "restart main task", + action: func(ar *allocRunner, alloc *structs.Allocation) error { + return ar.RestartTask("main", ev) + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "running", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "running", Restarts: 0}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "running", Restarts: 0}, + "poststop": structs.TaskState{State: "pending", Restarts: 0}, + }, + }, + { + name: "restart leader main task", + hasLeader: true, + action: func(ar *allocRunner, alloc *structs.Allocation) error { + return ar.RestartTask("main", ev) + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "running", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "running", Restarts: 0}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "running", Restarts: 0}, + "poststop": structs.TaskState{State: "pending", Restarts: 0}, + }, + }, + { + name: "main task fails and restarts once", + taskDefs: []mock.LifecycleTaskDef{ + {Name: "main", RunFor: "2s", ExitCode: 1, Hook: "", IsSidecar: false}, + {Name: "prestart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "prestart", IsSidecar: false}, + {Name: "prestart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "prestart", IsSidecar: true}, + {Name: "poststart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "poststart", IsSidecar: false}, + {Name: "poststart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "poststart", IsSidecar: true}, + {Name: "poststop", RunFor: "1s", ExitCode: 0, Hook: "poststop", IsSidecar: false}, + }, + action: func(ar *allocRunner, alloc *structs.Allocation) error { + time.Sleep(3 * time.Second) // make sure main task has exited + return nil + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "dead", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststop": structs.TaskState{State: "dead", Restarts: 0}, + }, + }, + { + name: "leader main task fails and restarts once", + taskDefs: []mock.LifecycleTaskDef{ + {Name: "main", RunFor: "2s", ExitCode: 1, Hook: "", IsSidecar: false}, + {Name: "prestart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "prestart", IsSidecar: false}, + {Name: "prestart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "prestart", IsSidecar: true}, + {Name: "poststart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "poststart", IsSidecar: false}, + {Name: "poststart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "poststart", IsSidecar: true}, + {Name: "poststop", RunFor: "1s", ExitCode: 0, Hook: "poststop", IsSidecar: false}, + }, + hasLeader: true, + action: func(ar *allocRunner, alloc *structs.Allocation) error { + time.Sleep(3 * time.Second) // make sure main task has exited + return nil + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "dead", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststop": structs.TaskState{State: "dead", Restarts: 0}, + }, + }, + { + name: "main stopped unexpectedly and restarts once", + taskDefs: []mock.LifecycleTaskDef{ + {Name: "main", RunFor: "2s", ExitCode: 0, Hook: "", IsSidecar: false}, + {Name: "prestart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "prestart", IsSidecar: false}, + {Name: "prestart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "prestart", IsSidecar: true}, + {Name: "poststart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "poststart", IsSidecar: false}, + {Name: "poststart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "poststart", IsSidecar: true}, + {Name: "poststop", RunFor: "1s", ExitCode: 0, Hook: "poststop", IsSidecar: false}, + }, + action: func(ar *allocRunner, alloc *structs.Allocation) error { + time.Sleep(3 * time.Second) // make sure main task has exited + return nil + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "dead", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststop": structs.TaskState{State: "dead", Restarts: 0}, + }, + }, + { + name: "leader main stopped unexpectedly and restarts once", + taskDefs: []mock.LifecycleTaskDef{ + {Name: "main", RunFor: "2s", ExitCode: 0, Hook: "", IsSidecar: false}, + {Name: "prestart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "prestart", IsSidecar: false}, + {Name: "prestart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "prestart", IsSidecar: true}, + {Name: "poststart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "poststart", IsSidecar: false}, + {Name: "poststart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "poststart", IsSidecar: true}, + {Name: "poststop", RunFor: "1s", ExitCode: 0, Hook: "poststop", IsSidecar: false}, + }, + action: func(ar *allocRunner, alloc *structs.Allocation) error { + time.Sleep(3 * time.Second) // make sure main task has exited + return nil + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "dead", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststop": structs.TaskState{State: "dead", Restarts: 0}, + }, + }, + { + name: "failed main task cannot be restarted", + taskDefs: []mock.LifecycleTaskDef{ + {Name: "main", RunFor: "2s", ExitCode: 1, Hook: "", IsSidecar: false}, + {Name: "prestart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "prestart", IsSidecar: false}, + {Name: "prestart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "prestart", IsSidecar: true}, + {Name: "poststart-oneshot", RunFor: "1s", ExitCode: 0, Hook: "poststart", IsSidecar: false}, + {Name: "poststart-sidecar", RunFor: "100s", ExitCode: 0, Hook: "poststart", IsSidecar: true}, + {Name: "poststop", RunFor: "1s", ExitCode: 0, Hook: "poststop", IsSidecar: false}, + }, + action: func(ar *allocRunner, alloc *structs.Allocation) error { + // make sure main task has had a chance to restart once on its + // own and fail again before we try to manually restart it + time.Sleep(5 * time.Second) + return ar.RestartTask("main", ev) + }, + expectedErr: "Task not running", + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "dead", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststop": structs.TaskState{State: "dead", Restarts: 0}, + }, + }, + { + name: "restart prestart-sidecar task", + action: func(ar *allocRunner, alloc *structs.Allocation) error { + return ar.RestartTask("prestart-sidecar", ev) + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "running", Restarts: 0}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "running", Restarts: 0}, + "poststop": structs.TaskState{State: "pending", Restarts: 0}, + }, + }, + { + name: "restart poststart-sidecar task", + action: func(ar *allocRunner, alloc *structs.Allocation) error { + return ar.RestartTask("poststart-sidecar", ev) + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "running", Restarts: 0}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "running", Restarts: 0}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststop": structs.TaskState{State: "pending", Restarts: 0}, + }, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + ci.Parallel(t) + + alloc := alloc.Copy() + alloc.Job.TaskGroups[0].RestartPolicy = rp + if tc.taskDefs != nil { + alloc = mock.LifecycleAllocFromTasks(tc.taskDefs) + alloc.Job.Type = structs.JobTypeService + } + for _, task := range alloc.Job.TaskGroups[0].Tasks { + task.RestartPolicy = rp // tasks inherit the group policy + } + if tc.hasLeader { + for _, task := range alloc.Job.TaskGroups[0].Tasks { + if task.Name == "main" { + task.Leader = true + } + } + } + if tc.isBatch { + alloc.Job.Type = structs.JobTypeBatch + } + + conf, cleanup := testAllocRunnerConfig(t, alloc) + defer cleanup() + ar, err := NewAllocRunner(conf) + require.NoError(t, err) + defer destroy(ar) + go ar.Run() + + upd := conf.StateUpdater.(*MockStateUpdater) + + // assert our "before" states: + // - all one-shot tasks should be dead but not failed + // - all main tasks and sidecars should be running + // - no tasks should have restarted + testutil.WaitForResult(func() (bool, error) { + last := upd.Last() + if last == nil { + return false, fmt.Errorf("no update") + } + if last.ClientStatus != structs.AllocClientStatusRunning { + return false, fmt.Errorf( + "expected alloc to be running not %s", last.ClientStatus) + } + var errs *multierror.Error + + expectedBefore := map[string]string{ + "main": "running", + "prestart-oneshot": "dead", + "prestart-sidecar": "running", + "poststart-oneshot": "dead", + "poststart-sidecar": "running", + "poststop": "pending", + } + + for task, expected := range expectedBefore { + got, ok := last.TaskStates[task] + if !ok { + continue + } + if got.State != expected { + errs = multierror.Append(errs, fmt.Errorf( + "expected initial state of task %q to be %q not %q", + task, expected, got.State)) + } + if got.Restarts != 0 { + errs = multierror.Append(errs, fmt.Errorf( + "expected no initial restarts of task %q, not %q", + task, got.Restarts)) + } + if expected == "dead" && got.Failed { + errs = multierror.Append(errs, fmt.Errorf( + "expected ephemeral task %q to be dead but not failed", + task)) + } + + } + if errs.ErrorOrNil() != nil { + return false, errs.ErrorOrNil() + } + return true, nil + }, func(err error) { + require.NoError(t, err, "error waiting for initial state") + }) + + // perform the action + err = tc.action(ar, alloc.Copy()) + if tc.expectedErr != "" { + require.EqualError(t, err, tc.expectedErr) + } else { + require.NoError(t, err) + } + + // assert our "after" states + testutil.WaitForResult(func() (bool, error) { + last := upd.Last() + if last == nil { + return false, fmt.Errorf("no update") + } + var errs *multierror.Error + for task, expected := range tc.expectedAfter { + got, ok := last.TaskStates[task] + if !ok { + errs = multierror.Append(errs, fmt.Errorf( + "no final state found for task %q", task, + )) + } + if got.State != expected.State { + errs = multierror.Append(errs, fmt.Errorf( + "expected final state of task %q to be %q not %q", + task, expected.State, got.State)) + } + if expected.State == "dead" { + if got.FinishedAt.IsZero() || got.StartedAt.IsZero() { + errs = multierror.Append(errs, fmt.Errorf( + "expected final state of task %q to have start and finish time", task)) + } + if len(got.Events) < 2 { + errs = multierror.Append(errs, fmt.Errorf( + "expected final state of task %q to include at least 2 tasks", task)) + } + } + + if got.Restarts != expected.Restarts { + errs = multierror.Append(errs, fmt.Errorf( + "expected final restarts of task %q to be %v not %v", + task, expected.Restarts, got.Restarts)) + } + } + if errs.ErrorOrNil() != nil { + return false, errs.ErrorOrNil() + } + return true, nil + }, func(err error) { + require.NoError(t, err, "error waiting for final state") + }) + }) + } +} + func TestAllocRunner_TaskGroup_ShutdownDelay(t *testing.T) { ci.Parallel(t) @@ -803,28 +1263,44 @@ func TestAllocRunner_Restore_LifecycleHooks(t *testing.T) { ar, err := NewAllocRunner(conf) require.NoError(t, err) - // We should see all tasks with Prestart hooks are not blocked from running: - // i.e. the "init" and "side" task hook coordinator channels are closed - require.Truef(t, isChannelClosed(ar.taskHookCoordinator.startConditionForTask(ar.tasks["init"].Task())), "init channel was open, should be closed") - require.Truef(t, isChannelClosed(ar.taskHookCoordinator.startConditionForTask(ar.tasks["side"].Task())), "side channel was open, should be closed") + go ar.Run() + defer destroy(ar) + + // Wait for the coordinator to transition from the "init" state. + tasklifecycle.WaitNotInitUntil(ar.taskCoordinator, time.Second, func() { + t.Fatalf("task coordinator didn't transition from init in time") + }) - isChannelClosed(ar.taskHookCoordinator.startConditionForTask(ar.tasks["side"].Task())) + // We should see all tasks with Prestart hooks are not blocked from running. + tasklifecycle.RequireTaskAllowed(t, ar.taskCoordinator, ar.tasks["init"].Task()) + tasklifecycle.RequireTaskAllowed(t, ar.taskCoordinator, ar.tasks["side"].Task()) + tasklifecycle.RequireTaskBlocked(t, ar.taskCoordinator, ar.tasks["web"].Task()) + tasklifecycle.RequireTaskBlocked(t, ar.taskCoordinator, ar.tasks["poststart"].Task()) - // Mimic client dies while init task running, and client restarts after init task finished + // Mimic client dies while init task running, and client restarts after + // init task finished and web is running. ar.tasks["init"].UpdateState(structs.TaskStateDead, structs.NewTaskEvent(structs.TaskTerminated)) ar.tasks["side"].UpdateState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted)) + ar.tasks["web"].UpdateState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted)) - // Create a new AllocRunner to test RestoreState and Run + // Create a new AllocRunner to test Restore and Run. ar2, err := NewAllocRunner(conf) require.NoError(t, err) + require.NoError(t, ar2.Restore()) - if err := ar2.Restore(); err != nil { - t.Fatalf("error restoring state: %v", err) - } + go ar2.Run() + defer destroy(ar2) + + // Wait for the coordinator to transition from the "init" state. + tasklifecycle.WaitNotInitUntil(ar.taskCoordinator, time.Second, func() { + t.Fatalf("task coordinator didn't transition from init in time") + }) - // We want to see Restore resume execution with correct hook ordering: - // i.e. we should see the "web" main task hook coordinator channel is closed - require.Truef(t, isChannelClosed(ar2.taskHookCoordinator.startConditionForTask(ar.tasks["web"].Task())), "web channel was open, should be closed") + // Restore resumes execution with correct lifecycle ordering. + tasklifecycle.RequireTaskBlocked(t, ar2.taskCoordinator, ar2.tasks["init"].Task()) + tasklifecycle.RequireTaskAllowed(t, ar2.taskCoordinator, ar2.tasks["side"].Task()) + tasklifecycle.RequireTaskAllowed(t, ar2.taskCoordinator, ar2.tasks["web"].Task()) + tasklifecycle.RequireTaskAllowed(t, ar2.taskCoordinator, ar2.tasks["poststart"].Task()) } func TestAllocRunner_Update_Semantics(t *testing.T) { @@ -1081,7 +1557,7 @@ func TestAllocRunner_DeploymentHealth_Unhealthy_Checks(t *testing.T) { require.NotEmpty(t, state.Events) last := state.Events[len(state.Events)-1] require.Equal(t, allochealth.AllocHealthEventSource, last.Type) - require.Contains(t, last.Message, "by deadline") + require.Contains(t, last.Message, "by healthy_deadline") } // TestAllocRunner_Destroy asserts that Destroy kills and cleans up a running @@ -1195,15 +1671,15 @@ func TestAllocRunner_MoveAllocDir(t *testing.T) { ar.Run() defer destroy(ar) - require.Equal(t, structs.AllocClientStatusComplete, ar.AllocState().ClientStatus) + WaitForClientState(t, ar, structs.AllocClientStatusComplete) // Step 2. Modify its directory task := alloc.Job.TaskGroups[0].Tasks[0] dataFile := filepath.Join(ar.allocDir.SharedDir, "data", "data_file") - ioutil.WriteFile(dataFile, []byte("hello world"), os.ModePerm) + os.WriteFile(dataFile, []byte("hello world"), os.ModePerm) taskDir := ar.allocDir.TaskDirs[task.Name] taskLocalFile := filepath.Join(taskDir.LocalDir, "local_file") - ioutil.WriteFile(taskLocalFile, []byte("good bye world"), os.ModePerm) + os.WriteFile(taskLocalFile, []byte("good bye world"), os.ModePerm) // Step 3. Start a new alloc alloc2 := mock.BatchAlloc() @@ -1223,7 +1699,7 @@ func TestAllocRunner_MoveAllocDir(t *testing.T) { ar2.Run() defer destroy(ar2) - require.Equal(t, structs.AllocClientStatusComplete, ar2.AllocState().ClientStatus) + WaitForClientState(t, ar, structs.AllocClientStatusComplete) // Ensure that data from ar was moved to ar2 dataFile = filepath.Join(ar2.allocDir.SharedDir, "data", "data_file") @@ -1648,7 +2124,6 @@ func TestAllocRunner_Reconnect(t *testing.T) { require.Equal(t, tc.clientStatus, ar.AllocState().ClientStatus) - // Make sure the runner's alloc indexes match the update. require.Equal(t, update.AllocModifyIndex, ar.Alloc().AllocModifyIndex) require.Equal(t, update.ModifyIndex, ar.Alloc().ModifyIndex) @@ -1683,3 +2158,242 @@ func TestAllocRunner_Reconnect(t *testing.T) { }) } } + +// TestAllocRunner_Lifecycle_Shutdown_Order asserts that a service job with 3 +// lifecycle hooks (1 sidecar, 1 ephemeral, 1 poststop) starts all 4 tasks, and shuts down +// the sidecar after main, but before poststop. +func TestAllocRunner_Lifecycle_Shutdown_Order(t *testing.T) { + alloc := mock.LifecycleAllocWithPoststopDeploy() + + alloc.Job.Type = structs.JobTypeService + + mainTask := alloc.Job.TaskGroups[0].Tasks[0] + mainTask.Config["run_for"] = "100s" + + sidecarTask := alloc.Job.TaskGroups[0].Tasks[1] + sidecarTask.Lifecycle.Hook = structs.TaskLifecycleHookPoststart + sidecarTask.Config["run_for"] = "100s" + + poststopTask := alloc.Job.TaskGroups[0].Tasks[2] + ephemeralTask := alloc.Job.TaskGroups[0].Tasks[3] + + alloc.Job.TaskGroups[0].Tasks = []*structs.Task{mainTask, ephemeralTask, sidecarTask, poststopTask} + + conf, cleanup := testAllocRunnerConfig(t, alloc) + defer cleanup() + ar, err := NewAllocRunner(conf) + require.NoError(t, err) + defer destroy(ar) + go ar.Run() + + upd := conf.StateUpdater.(*MockStateUpdater) + + // Wait for main and sidecar tasks to be running, and that the + // ephemeral task ran and exited. + testutil.WaitForResult(func() (bool, error) { + last := upd.Last() + if last == nil { + return false, fmt.Errorf("No updates") + } + + if last.ClientStatus != structs.AllocClientStatusRunning { + return false, fmt.Errorf("expected alloc to be running not %s", last.ClientStatus) + } + + if s := last.TaskStates[mainTask.Name].State; s != structs.TaskStateRunning { + return false, fmt.Errorf("expected main task to be running not %s", s) + } + + if s := last.TaskStates[sidecarTask.Name].State; s != structs.TaskStateRunning { + return false, fmt.Errorf("expected sidecar task to be running not %s", s) + } + + if s := last.TaskStates[ephemeralTask.Name].State; s != structs.TaskStateDead { + return false, fmt.Errorf("expected ephemeral task to be dead not %s", s) + } + + if last.TaskStates[ephemeralTask.Name].Failed { + return false, fmt.Errorf("expected ephemeral task to be successful not failed") + } + + return true, nil + }, func(err error) { + t.Fatalf("error waiting for initial state:\n%v", err) + }) + + // Tell the alloc to stop + stopAlloc := alloc.Copy() + stopAlloc.DesiredStatus = structs.AllocDesiredStatusStop + ar.Update(stopAlloc) + + // Wait for tasks to stop. + testutil.WaitForResult(func() (bool, error) { + last := upd.Last() + + if s := last.TaskStates[ephemeralTask.Name].State; s != structs.TaskStateDead { + return false, fmt.Errorf("expected ephemeral task to be dead not %s", s) + } + + if last.TaskStates[ephemeralTask.Name].Failed { + return false, fmt.Errorf("expected ephemeral task to be successful not failed") + } + + if s := last.TaskStates[mainTask.Name].State; s != structs.TaskStateDead { + return false, fmt.Errorf("expected main task to be dead not %s", s) + } + + if last.TaskStates[mainTask.Name].Failed { + return false, fmt.Errorf("expected main task to be successful not failed") + } + + if s := last.TaskStates[sidecarTask.Name].State; s != structs.TaskStateDead { + return false, fmt.Errorf("expected sidecar task to be dead not %s", s) + } + + if last.TaskStates[sidecarTask.Name].Failed { + return false, fmt.Errorf("expected sidecar task to be successful not failed") + } + + if s := last.TaskStates[poststopTask.Name].State; s != structs.TaskStateRunning { + return false, fmt.Errorf("expected poststop task to be running not %s", s) + } + + return true, nil + }, func(err error) { + t.Fatalf("error waiting for kill state:\n%v", err) + }) + + last := upd.Last() + require.Less(t, last.TaskStates[ephemeralTask.Name].FinishedAt, last.TaskStates[mainTask.Name].FinishedAt) + require.Less(t, last.TaskStates[mainTask.Name].FinishedAt, last.TaskStates[sidecarTask.Name].FinishedAt) + + // Wait for poststop task to stop. + testutil.WaitForResult(func() (bool, error) { + last := upd.Last() + + if s := last.TaskStates[poststopTask.Name].State; s != structs.TaskStateDead { + return false, fmt.Errorf("expected poststop task to be dead not %s", s) + } + + if last.TaskStates[poststopTask.Name].Failed { + return false, fmt.Errorf("expected poststop task to be successful not failed") + } + + return true, nil + }, func(err error) { + t.Fatalf("error waiting for poststop state:\n%v", err) + }) + + last = upd.Last() + require.Less(t, last.TaskStates[sidecarTask.Name].FinishedAt, last.TaskStates[poststopTask.Name].FinishedAt) +} + +func TestHasSidecarTasks(t *testing.T) { + ci.Parallel(t) + + testCases := []struct { + name string + lifecycle []*structs.TaskLifecycleConfig + hasSidecars bool + hasNonsidecars bool + }{ + { + name: "all sidecar - one", + lifecycle: []*structs.TaskLifecycleConfig{ + { + Hook: structs.TaskLifecycleHookPrestart, + Sidecar: true, + }, + }, + hasSidecars: true, + hasNonsidecars: false, + }, + { + name: "all sidecar - multiple", + lifecycle: []*structs.TaskLifecycleConfig{ + { + Hook: structs.TaskLifecycleHookPrestart, + Sidecar: true, + }, + { + Hook: structs.TaskLifecycleHookPrestart, + Sidecar: true, + }, + { + Hook: structs.TaskLifecycleHookPrestart, + Sidecar: true, + }, + }, + hasSidecars: true, + hasNonsidecars: false, + }, + { + name: "some sidecars, some others", + lifecycle: []*structs.TaskLifecycleConfig{ + nil, + { + Hook: structs.TaskLifecycleHookPrestart, + Sidecar: false, + }, + { + Hook: structs.TaskLifecycleHookPrestart, + Sidecar: true, + }, + }, + hasSidecars: true, + hasNonsidecars: true, + }, + { + name: "no sidecars", + lifecycle: []*structs.TaskLifecycleConfig{ + nil, + { + Hook: structs.TaskLifecycleHookPrestart, + Sidecar: false, + }, + nil, + }, + hasSidecars: false, + hasNonsidecars: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create alloc with the given task lifecycle configurations. + alloc := mock.BatchAlloc() + + tasks := []*structs.Task{} + resources := map[string]*structs.AllocatedTaskResources{} + + tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name] + + for i, lifecycle := range tc.lifecycle { + task := alloc.Job.TaskGroups[0].Tasks[0].Copy() + task.Name = fmt.Sprintf("task%d", i) + task.Lifecycle = lifecycle + tasks = append(tasks, task) + resources[task.Name] = tr + } + + alloc.Job.TaskGroups[0].Tasks = tasks + alloc.AllocatedResources.Tasks = resources + + // Create alloc runner. + arConf, cleanup := testAllocRunnerConfig(t, alloc) + defer cleanup() + + ar, err := NewAllocRunner(arConf) + require.NoError(t, err) + + require.Equal(t, tc.hasSidecars, hasSidecarTasks(ar.tasks), "sidecars") + + runners := []*taskrunner.TaskRunner{} + for _, r := range ar.tasks { + runners = append(runners, r) + } + require.Equal(t, tc.hasNonsidecars, hasNonSidecarTasks(runners), "non-sidecars") + + }) + } +} diff --git a/client/allocrunner/alloc_runner_unix_test.go b/client/allocrunner/alloc_runner_unix_test.go index ab3c777dd20..0859569101e 100644 --- a/client/allocrunner/alloc_runner_unix_test.go +++ b/client/allocrunner/alloc_runner_unix_test.go @@ -207,18 +207,18 @@ func TestAllocRunner_Restore_CompletedBatch(t *testing.T) { go ar2.Run() defer destroy(ar2) - // AR waitCh must be closed even when task doesn't run again + // AR waitCh must be open as the task waits for a possible alloc restart. select { case <-ar2.WaitCh(): - case <-time.After(10 * time.Second): - require.Fail(t, "alloc.waitCh wasn't closed") + require.Fail(t, "alloc.waitCh was closed") + default: } - // TR waitCh must be closed too! + // TR waitCh must be open too! select { case <-ar2.tasks[task.Name].WaitCh(): - case <-time.After(10 * time.Second): - require.Fail(t, "tr.waitCh wasn't closed") + require.Fail(t, "tr.waitCh was closed") + default: } // Assert that events are unmodified, which they would if task re-run diff --git a/client/allocrunner/config.go b/client/allocrunner/config.go index 0ec3ba51c3a..99d0490ef13 100644 --- a/client/allocrunner/config.go +++ b/client/allocrunner/config.go @@ -86,4 +86,7 @@ type Config struct { // ServiceRegWrapper is the handler wrapper that is used by service hooks // to perform service and check registration and deregistration. ServiceRegWrapper *wrapper.HandlerWrapper + + // Getter is an interface for retrieving artifacts. + Getter interfaces.ArtifactGetter } diff --git a/client/allocrunner/consul_grpc_sock_hook.go b/client/allocrunner/consul_grpc_sock_hook.go index 4a7cf4e34c6..2e6c60f7cd5 100644 --- a/client/allocrunner/consul_grpc_sock_hook.go +++ b/client/allocrunner/consul_grpc_sock_hook.go @@ -25,6 +25,11 @@ const ( // socketProxyStopWaitTime is the amount of time to wait for a socket proxy // to stop before assuming something went awry and return a timeout error. socketProxyStopWaitTime = 3 * time.Second + + // consulGRPCFallbackPort is the last resort fallback port to use in + // combination with the Consul HTTP config address when creating the + // socket. + consulGRPCFallbackPort = "8502" ) var ( @@ -34,7 +39,7 @@ var ( // consulGRPCSocketHook creates Unix sockets to allow communication from inside a // netns to Consul gRPC endpoint. // -// Noop for allocations without a group Connect stanza using bridge networking. +// Noop for allocations without a group Connect block using bridge networking. type consulGRPCSocketHook struct { logger hclog.Logger @@ -45,10 +50,20 @@ type consulGRPCSocketHook struct { proxy *grpcSocketProxy } -func newConsulGRPCSocketHook(logger hclog.Logger, alloc *structs.Allocation, allocDir *allocdir.AllocDir, config *config.ConsulConfig) *consulGRPCSocketHook { +func newConsulGRPCSocketHook( + logger hclog.Logger, alloc *structs.Allocation, allocDir *allocdir.AllocDir, + config *config.ConsulConfig, nodeAttrs map[string]string) *consulGRPCSocketHook { + + // Attempt to find the gRPC port via the node attributes, otherwise use the + // default fallback. + consulGRPCPort, ok := nodeAttrs["consul.grpc"] + if !ok { + consulGRPCPort = consulGRPCFallbackPort + } + return &consulGRPCSocketHook{ alloc: alloc, - proxy: newGRPCSocketProxy(logger, allocDir, config), + proxy: newGRPCSocketProxy(logger, allocDir, config, consulGRPCPort), logger: logger.Named(consulGRPCSockHookName), } } @@ -119,21 +134,29 @@ type grpcSocketProxy struct { allocDir *allocdir.AllocDir config *config.ConsulConfig + // consulGRPCFallbackPort is the port to use if the operator did not + // specify a gRPC config address. + consulGRPCFallbackPort string + ctx context.Context cancel func() doneCh chan struct{} runOnce bool } -func newGRPCSocketProxy(logger hclog.Logger, allocDir *allocdir.AllocDir, config *config.ConsulConfig) *grpcSocketProxy { +func newGRPCSocketProxy( + logger hclog.Logger, allocDir *allocdir.AllocDir, config *config.ConsulConfig, + consulGRPCFallbackPort string) *grpcSocketProxy { + ctx, cancel := context.WithCancel(context.Background()) return &grpcSocketProxy{ - allocDir: allocDir, - config: config, - ctx: ctx, - cancel: cancel, - doneCh: make(chan struct{}), - logger: logger, + allocDir: allocDir, + config: config, + consulGRPCFallbackPort: consulGRPCFallbackPort, + ctx: ctx, + cancel: cancel, + doneCh: make(chan struct{}), + logger: logger, } } @@ -172,8 +195,7 @@ func (p *grpcSocketProxy) run(alloc *structs.Allocation) error { return fmt.Errorf("error parsing Consul address %q: %v", p.config.Addr, err) } - - destAddr = net.JoinHostPort(host, "8502") + destAddr = net.JoinHostPort(host, p.consulGRPCFallbackPort) } hostGRPCSocketPath := filepath.Join(p.allocDir.AllocDir, allocdir.AllocGRPCSocket) diff --git a/client/allocrunner/consul_grpc_sock_hook_test.go b/client/allocrunner/consul_grpc_sock_hook_test.go index d7e961db584..6e8e1ed069f 100644 --- a/client/allocrunner/consul_grpc_sock_hook_test.go +++ b/client/allocrunner/consul_grpc_sock_hook_test.go @@ -4,9 +4,7 @@ import ( "bytes" "context" "fmt" - "io/ioutil" "net" - "os" "path/filepath" "sync" "testing" @@ -44,7 +42,7 @@ func TestConsulGRPCSocketHook_PrerunPostrun_Ok(t *testing.T) { defer cleanup() // Start the unix socket proxy - h := newConsulGRPCSocketHook(logger, alloc, allocDir, consulConfig) + h := newConsulGRPCSocketHook(logger, alloc, allocDir, consulConfig, map[string]string{}) require.NoError(t, h.Prerun()) gRPCSock := filepath.Join(allocDir.AllocDir, allocdir.AllocGRPCSocket) @@ -118,7 +116,7 @@ func TestConsulGRPCSocketHook_Prerun_Error(t *testing.T) { { // An alloc without a Connect proxy sidecar should not return // an error. - h := newConsulGRPCSocketHook(logger, alloc, allocDir, consulConfig) + h := newConsulGRPCSocketHook(logger, alloc, allocDir, consulConfig, map[string]string{}) require.NoError(t, h.Prerun()) // Postrun should be a noop @@ -128,7 +126,7 @@ func TestConsulGRPCSocketHook_Prerun_Error(t *testing.T) { { // An alloc *with* a Connect proxy sidecar *should* return an error // when Consul is not configured. - h := newConsulGRPCSocketHook(logger, connectAlloc, allocDir, consulConfig) + h := newConsulGRPCSocketHook(logger, connectAlloc, allocDir, consulConfig, map[string]string{}) require.EqualError(t, h.Prerun(), "consul address must be set on nomad client") // Postrun should be a noop @@ -138,7 +136,7 @@ func TestConsulGRPCSocketHook_Prerun_Error(t *testing.T) { { // Updating an alloc without a sidecar to have a sidecar should // error when the sidecar is added. - h := newConsulGRPCSocketHook(logger, alloc, allocDir, consulConfig) + h := newConsulGRPCSocketHook(logger, alloc, allocDir, consulConfig, map[string]string{}) require.NoError(t, h.Prerun()) req := &interfaces.RunnerUpdateRequest{ @@ -156,11 +154,7 @@ func TestConsulGRPCSocketHook_Prerun_Error(t *testing.T) { func TestConsulGRPCSocketHook_proxy_Unix(t *testing.T) { ci.Parallel(t) - dir, err := ioutil.TempDir("", "nomadtest_proxy_Unix") - require.NoError(t, err) - defer func() { - require.NoError(t, os.RemoveAll(dir)) - }() + dir := t.TempDir() // Setup fake listener that would be inside the netns (normally a unix // socket, but it doesn't matter for this test). diff --git a/client/allocrunner/csi_hook.go b/client/allocrunner/csi_hook.go index 2a2fe2963d1..0a09b469595 100644 --- a/client/allocrunner/csi_hook.go +++ b/client/allocrunner/csi_hook.go @@ -184,24 +184,29 @@ type volumeAndRequest struct { func (c *csiHook) claimVolumesFromAlloc() (map[string]*volumeAndRequest, error) { result := make(map[string]*volumeAndRequest) tg := c.alloc.Job.LookupTaskGroup(c.alloc.TaskGroup) + supportsVolumes := false - // Initially, populate the result map with all of the requests - for alias, volumeRequest := range tg.Volumes { + for _, task := range tg.Tasks { + caps, err := c.taskCapabilityGetter.GetTaskDriverCapabilities(task.Name) + if err != nil { + return nil, fmt.Errorf("could not validate task driver capabilities: %v", err) + } - if volumeRequest.Type == structs.VolumeTypeCSI { + if caps.MountConfigs == drivers.MountConfigSupportNone { + continue + } - for _, task := range tg.Tasks { - caps, err := c.taskCapabilityGetter.GetTaskDriverCapabilities(task.Name) - if err != nil { - return nil, fmt.Errorf("could not validate task driver capabilities: %v", err) - } + supportsVolumes = true + break + } - if caps.MountConfigs == drivers.MountConfigSupportNone { - return nil, fmt.Errorf( - "task driver %q for %q does not support CSI", task.Driver, task.Name) - } - } + if !supportsVolumes { + return nil, fmt.Errorf("no task supports CSI") + } + // Initially, populate the result map with all of the requests + for alias, volumeRequest := range tg.Volumes { + if volumeRequest.Type == structs.VolumeTypeCSI { result[alias] = &volumeAndRequest{request: volumeRequest} } } diff --git a/client/allocrunner/csi_hook_test.go b/client/allocrunner/csi_hook_test.go index 1d3b04ed36a..bb5362b9537 100644 --- a/client/allocrunner/csi_hook_test.go +++ b/client/allocrunner/csi_hook_test.go @@ -13,7 +13,7 @@ import ( "github.com/hashicorp/nomad/client/pluginmanager" "github.com/hashicorp/nomad/client/pluginmanager/csimanager" cstructs "github.com/hashicorp/nomad/client/structs" - "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" @@ -193,8 +193,8 @@ func TestCSIHook(t *testing.T) { rpcer := mockRPCer{ alloc: alloc, callCounts: callCounts, - hasExistingClaim: helper.BoolToPtr(tc.startsWithClaims), - schedulable: helper.BoolToPtr(!tc.startsUnschedulable), + hasExistingClaim: pointer.Of(tc.startsWithClaims), + schedulable: pointer.Of(!tc.startsUnschedulable), } ar := mockAllocRunner{ res: &cstructs.AllocHookResources{}, @@ -232,6 +232,99 @@ func TestCSIHook(t *testing.T) { } +// TestCSIHook_claimVolumesFromAlloc_Validation tests that the validation of task +// capabilities in claimVolumesFromAlloc ensures at least one task supports CSI. +func TestCSIHook_claimVolumesFromAlloc_Validation(t *testing.T) { + ci.Parallel(t) + + alloc := mock.Alloc() + logger := testlog.HCLogger(t) + volumeRequests := map[string]*structs.VolumeRequest{ + "vol0": { + Name: "vol0", + Type: structs.VolumeTypeCSI, + Source: "testvolume0", + ReadOnly: true, + AccessMode: structs.CSIVolumeAccessModeSingleNodeReader, + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + MountOptions: &structs.CSIMountOptions{}, + PerAlloc: false, + }, + } + + type testCase struct { + name string + caps *drivers.Capabilities + capFunc func() (*drivers.Capabilities, error) + expectedClaimErr error + } + + testcases := []testCase{ + { + name: "invalid - driver does not support CSI", + caps: &drivers.Capabilities{ + MountConfigs: drivers.MountConfigSupportNone, + }, + capFunc: nil, + expectedClaimErr: errors.New("claim volumes: no task supports CSI"), + }, + + { + name: "invalid - driver error", + caps: &drivers.Capabilities{}, + capFunc: func() (*drivers.Capabilities, error) { + return nil, errors.New("error thrown by driver") + }, + expectedClaimErr: errors.New("claim volumes: could not validate task driver capabilities: error thrown by driver"), + }, + + { + name: "valid - driver supports CSI", + caps: &drivers.Capabilities{ + MountConfigs: drivers.MountConfigSupportAll, + }, + capFunc: nil, + expectedClaimErr: nil, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + alloc.Job.TaskGroups[0].Volumes = volumeRequests + + callCounts := map[string]int{} + mgr := mockPluginManager{mounter: mockVolumeMounter{callCounts: callCounts}} + + rpcer := mockRPCer{ + alloc: alloc, + callCounts: callCounts, + hasExistingClaim: pointer.Of(false), + schedulable: pointer.Of(true), + } + + ar := mockAllocRunner{ + res: &cstructs.AllocHookResources{}, + caps: tc.caps, + capFunc: tc.capFunc, + } + + hook := newCSIHook(alloc, logger, mgr, rpcer, ar, ar, "secret") + require.NotNil(t, hook) + + if tc.expectedClaimErr != nil { + require.EqualError(t, hook.Prerun(), tc.expectedClaimErr.Error()) + mounts := ar.GetAllocHookResources().GetCSIMounts() + require.Nil(t, mounts) + } else { + require.NoError(t, hook.Prerun()) + mounts := ar.GetAllocHookResources().GetCSIMounts() + require.NotNil(t, mounts) + require.NoError(t, hook.Postrun()) + } + }) + } +} + // HELPERS AND MOCKS type mockRPCer struct { @@ -333,8 +426,9 @@ func (mgr mockPluginManager) PluginManager() pluginmanager.PluginManager { retur func (mgr mockPluginManager) Shutdown() {} type mockAllocRunner struct { - res *cstructs.AllocHookResources - caps *drivers.Capabilities + res *cstructs.AllocHookResources + caps *drivers.Capabilities + capFunc func() (*drivers.Capabilities, error) } func (ar mockAllocRunner) GetAllocHookResources() *cstructs.AllocHookResources { @@ -346,5 +440,8 @@ func (ar mockAllocRunner) SetAllocHookResources(res *cstructs.AllocHookResources } func (ar mockAllocRunner) GetTaskDriverCapabilities(taskName string) (*drivers.Capabilities, error) { + if ar.capFunc != nil { + return ar.capFunc() + } return ar.caps, nil } diff --git a/client/allocrunner/groupservice_hook.go b/client/allocrunner/groupservice_hook.go index 3e567537bb6..99b61015aa4 100644 --- a/client/allocrunner/groupservice_hook.go +++ b/client/allocrunner/groupservice_hook.go @@ -5,12 +5,13 @@ import ( "sync" "time" - log "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/client/allocrunner/interfaces" "github.com/hashicorp/nomad/client/serviceregistration" "github.com/hashicorp/nomad/client/serviceregistration/wrapper" "github.com/hashicorp/nomad/client/taskenv" agentconsul "github.com/hashicorp/nomad/command/agent/consul" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/nomad/structs" ) @@ -42,7 +43,7 @@ type groupServiceHook struct { // and check registration and deregistration. serviceRegWrapper *wrapper.HandlerWrapper - logger log.Logger + logger hclog.Logger // The following fields may be updated canary bool @@ -59,11 +60,11 @@ type groupServiceHook struct { type groupServiceHookConfig struct { alloc *structs.Allocation - restarter agentconsul.WorkloadRestarter + restarter serviceregistration.WorkloadRestarter taskEnvBuilder *taskenv.Builder networkStatusGetter networkStatusGetter shutdownDelayCtx context.Context - logger log.Logger + logger hclog.Logger // namespace is the Nomad or Consul namespace in which service // registrations will be made. @@ -120,23 +121,26 @@ func (h *groupServiceHook) Prerun() error { h.prerun = true h.mu.Unlock() }() - return h.prerunLocked() + return h.preRunLocked() } -func (h *groupServiceHook) prerunLocked() error { +// caller must hold h.lock +func (h *groupServiceHook) preRunLocked() error { if len(h.services) == 0 { return nil } - services := h.getWorkloadServices() + services := h.getWorkloadServicesLocked() return h.serviceRegWrapper.RegisterWorkload(services) } +// Update is run when a job submitter modifies service(s) (but not much else - +// otherwise a full alloc replacement would occur). func (h *groupServiceHook) Update(req *interfaces.RunnerUpdateRequest) error { h.mu.Lock() defer h.mu.Unlock() - oldWorkloadServices := h.getWorkloadServices() + oldWorkloadServices := h.getWorkloadServicesLocked() // Store new updated values out of request canary := false @@ -168,7 +172,7 @@ func (h *groupServiceHook) Update(req *interfaces.RunnerUpdateRequest) error { h.namespace = req.Alloc.ServiceProviderNamespace() // Create new task services struct with those new values - newWorkloadServices := h.getWorkloadServices() + newWorkloadServices := h.getWorkloadServicesLocked() if !h.prerun { // Update called before Prerun. Update alloc and exit to allow @@ -188,21 +192,20 @@ func (h *groupServiceHook) PreTaskRestart() error { }() h.preKillLocked() - return h.prerunLocked() + return h.preRunLocked() } func (h *groupServiceHook) PreKill() { - h.mu.Lock() - defer h.mu.Unlock() - h.preKillLocked() + helper.WithLock(&h.mu, h.preKillLocked) } -// implements the PreKill hook but requires the caller hold the lock +// implements the PreKill hook +// +// caller must hold h.lock func (h *groupServiceHook) preKillLocked() { // If we have a shutdown delay deregister group services and then wait // before continuing to kill tasks. - h.deregister() - h.deregistered = true + h.deregisterLocked() if h.delay == 0 { return @@ -219,24 +222,31 @@ func (h *groupServiceHook) preKillLocked() { } func (h *groupServiceHook) Postrun() error { - h.mu.Lock() - defer h.mu.Unlock() - - if !h.deregistered { - h.deregister() - } + helper.WithLock(&h.mu, h.deregisterLocked) return nil } -// deregister services from Consul. -func (h *groupServiceHook) deregister() { +// deregisterLocked will deregister services from Consul/Nomad service provider. +// +// caller must hold h.lock +func (h *groupServiceHook) deregisterLocked() { + if h.deregistered { + return + } + if len(h.services) > 0 { - workloadServices := h.getWorkloadServices() + workloadServices := h.getWorkloadServicesLocked() h.serviceRegWrapper.RemoveWorkload(workloadServices) } + + h.deregistered = true } -func (h *groupServiceHook) getWorkloadServices() *serviceregistration.WorkloadServices { +// getWorkloadServicesLocked returns the set of workload services currently +// on the hook. +// +// caller must hold h.lock +func (h *groupServiceHook) getWorkloadServicesLocked() *serviceregistration.WorkloadServices { // Interpolate with the task's environment interpolatedServices := taskenv.InterpolateServices(h.taskEnvBuilder.Build(), h.services) diff --git a/client/allocrunner/groupservice_hook_test.go b/client/allocrunner/groupservice_hook_test.go index 822ae04d72f..606dab8f37a 100644 --- a/client/allocrunner/groupservice_hook_test.go +++ b/client/allocrunner/groupservice_hook_test.go @@ -10,11 +10,11 @@ import ( "github.com/hashicorp/nomad/client/serviceregistration/wrapper" "github.com/hashicorp/nomad/client/taskenv" agentconsul "github.com/hashicorp/nomad/command/agent/consul" - "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" ) var _ interfaces.RunnerPrerunHook = (*groupServiceHook)(nil) @@ -50,22 +50,21 @@ func TestGroupServiceHook_NoGroupServices(t *testing.T) { taskEnvBuilder: taskenv.NewBuilder(mock.Node(), alloc, nil, alloc.Job.Region), logger: logger, }) - require.NoError(t, h.Prerun()) + must.NoError(t, h.Prerun()) req := &interfaces.RunnerUpdateRequest{Alloc: alloc} - require.NoError(t, h.Update(req)) + must.NoError(t, h.Update(req)) - require.NoError(t, h.Postrun()) + must.NoError(t, h.Postrun()) - require.NoError(t, h.PreTaskRestart()) + must.NoError(t, h.PreTaskRestart()) ops := consulMockClient.GetOps() - require.Len(t, ops, 5) - require.Equal(t, "add", ops[0].Op) // Prerun - require.Equal(t, "update", ops[1].Op) // Update - require.Equal(t, "remove", ops[2].Op) // Postrun - require.Equal(t, "remove", ops[3].Op) // Restart -> preKill - require.Equal(t, "add", ops[4].Op) // Restart -> preRun + must.Len(t, 4, ops) + must.Eq(t, "add", ops[0].Op) // Prerun + must.Eq(t, "update", ops[1].Op) // Update + must.Eq(t, "remove", ops[2].Op) // Postrun + must.Eq(t, "add", ops[3].Op) // Restart -> preRun } // TestGroupServiceHook_ShutdownDelayUpdate asserts calling group service hooks @@ -74,7 +73,7 @@ func TestGroupServiceHook_ShutdownDelayUpdate(t *testing.T) { ci.Parallel(t) alloc := mock.Alloc() - alloc.Job.TaskGroups[0].ShutdownDelay = helper.TimeToPtr(10 * time.Second) + alloc.Job.TaskGroups[0].ShutdownDelay = pointer.Of(10 * time.Second) logger := testlog.HCLogger(t) consulMockClient := regMock.NewServiceRegistrationHandler(logger) @@ -92,23 +91,23 @@ func TestGroupServiceHook_ShutdownDelayUpdate(t *testing.T) { taskEnvBuilder: taskenv.NewBuilder(mock.Node(), alloc, nil, alloc.Job.Region), logger: logger, }) - require.NoError(t, h.Prerun()) + must.NoError(t, h.Prerun()) // Incease shutdown Delay - alloc.Job.TaskGroups[0].ShutdownDelay = helper.TimeToPtr(15 * time.Second) + alloc.Job.TaskGroups[0].ShutdownDelay = pointer.Of(15 * time.Second) req := &interfaces.RunnerUpdateRequest{Alloc: alloc} - require.NoError(t, h.Update(req)) + must.NoError(t, h.Update(req)) // Assert that update updated the delay value - require.Equal(t, h.delay, 15*time.Second) + must.Eq(t, h.delay, 15*time.Second) // Remove shutdown delay alloc.Job.TaskGroups[0].ShutdownDelay = nil req = &interfaces.RunnerUpdateRequest{Alloc: alloc} - require.NoError(t, h.Update(req)) + must.NoError(t, h.Update(req)) // Assert that update updated the delay value - require.Equal(t, h.delay, 0*time.Second) + must.Eq(t, h.delay, 0*time.Second) } // TestGroupServiceHook_GroupServices asserts group service hooks with group @@ -133,22 +132,21 @@ func TestGroupServiceHook_GroupServices(t *testing.T) { taskEnvBuilder: taskenv.NewBuilder(mock.Node(), alloc, nil, alloc.Job.Region), logger: logger, }) - require.NoError(t, h.Prerun()) + must.NoError(t, h.Prerun()) req := &interfaces.RunnerUpdateRequest{Alloc: alloc} - require.NoError(t, h.Update(req)) + must.NoError(t, h.Update(req)) - require.NoError(t, h.Postrun()) + must.NoError(t, h.Postrun()) - require.NoError(t, h.PreTaskRestart()) + must.NoError(t, h.PreTaskRestart()) ops := consulMockClient.GetOps() - require.Len(t, ops, 5) - require.Equal(t, "add", ops[0].Op) // Prerun - require.Equal(t, "update", ops[1].Op) // Update - require.Equal(t, "remove", ops[2].Op) // Postrun - require.Equal(t, "remove", ops[3].Op) // Restart -> preKill - require.Equal(t, "add", ops[4].Op) // Restart -> preRun + must.Len(t, 4, ops) + must.Eq(t, "add", ops[0].Op) // Prerun + must.Eq(t, "update", ops[1].Op) // Update + must.Eq(t, "remove", ops[2].Op) // Postrun + must.Eq(t, "add", ops[3].Op) // Restart -> preRun } // TestGroupServiceHook_GroupServices_Nomad asserts group service hooks with @@ -179,25 +177,24 @@ func TestGroupServiceHook_GroupServices_Nomad(t *testing.T) { taskEnvBuilder: taskenv.NewBuilder(mock.Node(), alloc, nil, alloc.Job.Region), logger: logger, }) - require.NoError(t, h.Prerun()) + must.NoError(t, h.Prerun()) // Trigger our hook requests. req := &interfaces.RunnerUpdateRequest{Alloc: alloc} - require.NoError(t, h.Update(req)) - require.NoError(t, h.Postrun()) - require.NoError(t, h.PreTaskRestart()) + must.NoError(t, h.Update(req)) + must.NoError(t, h.Postrun()) + must.NoError(t, h.PreTaskRestart()) // Ensure the Nomad mock provider has the expected operations. ops := nomadMockClient.GetOps() - require.Len(t, ops, 5) - require.Equal(t, "add", ops[0].Op) // Prerun - require.Equal(t, "update", ops[1].Op) // Update - require.Equal(t, "remove", ops[2].Op) // Postrun - require.Equal(t, "remove", ops[3].Op) // Restart -> preKill - require.Equal(t, "add", ops[4].Op) // Restart -> preRun + must.Len(t, 4, ops) + must.Eq(t, "add", ops[0].Op) // Prerun + must.Eq(t, "update", ops[1].Op) // Update + must.Eq(t, "remove", ops[2].Op) // Postrun + must.Eq(t, "add", ops[3].Op) // Restart -> preRun // Ensure the Consul mock provider has zero operations. - require.Len(t, consulMockClient.GetOps(), 0) + must.SliceEmpty(t, consulMockClient.GetOps()) } // TestGroupServiceHook_Error asserts group service hooks with group @@ -234,22 +231,21 @@ func TestGroupServiceHook_NoNetwork(t *testing.T) { taskEnvBuilder: taskenv.NewBuilder(mock.Node(), alloc, nil, alloc.Job.Region), logger: logger, }) - require.NoError(t, h.Prerun()) + must.NoError(t, h.Prerun()) req := &interfaces.RunnerUpdateRequest{Alloc: alloc} - require.NoError(t, h.Update(req)) + must.NoError(t, h.Update(req)) - require.NoError(t, h.Postrun()) + must.NoError(t, h.Postrun()) - require.NoError(t, h.PreTaskRestart()) + must.NoError(t, h.PreTaskRestart()) ops := consulMockClient.GetOps() - require.Len(t, ops, 5) - require.Equal(t, "add", ops[0].Op) // Prerun - require.Equal(t, "update", ops[1].Op) // Update - require.Equal(t, "remove", ops[2].Op) // Postrun - require.Equal(t, "remove", ops[3].Op) // Restart -> preKill - require.Equal(t, "add", ops[4].Op) // Restart -> preRun + must.Len(t, 4, ops) + must.Eq(t, "add", ops[0].Op) // Prerun + must.Eq(t, "update", ops[1].Op) // Update + must.Eq(t, "remove", ops[2].Op) // Postrun + must.Eq(t, "add", ops[3].Op) // Restart -> preRun } func TestGroupServiceHook_getWorkloadServices(t *testing.T) { @@ -284,6 +280,6 @@ func TestGroupServiceHook_getWorkloadServices(t *testing.T) { logger: logger, }) - services := h.getWorkloadServices() - require.Len(t, services.Services, 1) + services := h.getWorkloadServicesLocked() + must.Len(t, 1, services.Services) } diff --git a/client/allocrunner/network_manager_linux.go b/client/allocrunner/network_manager_linux.go index a4a08ce29ce..3186f7c74c8 100644 --- a/client/allocrunner/network_manager_linux.go +++ b/client/allocrunner/network_manager_linux.go @@ -122,7 +122,18 @@ func (*defaultNetworkManager) CreateNetwork(allocID string, _ *drivers.NetworkCr nsPath := path.Join(nsutil.NetNSRunDir, allocID) _, err := os.Stat(nsPath) if err == nil { - return nil, false, nil + // Let's return a spec that points to the tested nspath, but indicate + // that we didn't make the namespace. That will stop the network_hook + // from calling its networkConfigurator.Setup function in the reconnect + // case, but provide the spec value necessary for the network_hook's + // Postrun function to not fast exit. + spec := &drivers.NetworkIsolationSpec{ + Mode: drivers.NetIsolationModeGroup, + Path: nsPath, + Labels: make(map[string]string), + } + + return spec, false, nil } } return nil, false, err @@ -160,7 +171,7 @@ func netModeToIsolationMode(netMode string) drivers.NetIsolationMode { func newNetworkConfigurator(log hclog.Logger, alloc *structs.Allocation, config *clientconfig.Config) (NetworkConfigurator, error) { tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) - // Check if network stanza is given + // Check if network block is given if len(tg.Networks) == 0 { return &hostNetworkConfigurator{}, nil } diff --git a/client/allocrunner/networking_bridge_linux.go b/client/allocrunner/networking_bridge_linux.go index 1c331bddd22..732dd85a783 100644 --- a/client/allocrunner/networking_bridge_linux.go +++ b/client/allocrunner/networking_bridge_linux.go @@ -142,6 +142,9 @@ const nomadCNIConfigTemplate = `{ "cniVersion": "0.4.0", "name": "nomad", "plugins": [ + { + "type": "loopback" + }, { "type": "bridge", "bridge": "%s", diff --git a/client/allocrunner/networking_cni.go b/client/allocrunner/networking_cni.go index 0ca806fb8f3..638e07eeb37 100644 --- a/client/allocrunner/networking_cni.go +++ b/client/allocrunner/networking_cni.go @@ -12,12 +12,14 @@ import ( "math/rand" "os" "path/filepath" + "regexp" "sort" "strings" "time" cni "github.com/containerd/go-cni" cnilibrary "github.com/containernetworking/cni/libcni" + "github.com/coreos/go-iptables/iptables" log "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/plugins/drivers" @@ -226,7 +228,101 @@ func (c *cniNetworkConfigurator) Teardown(ctx context.Context, alloc *structs.Al return err } - return c.cni.Remove(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc, c.ignorePortMappingHostIP))) + if err := c.cni.Remove(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc, c.ignorePortMappingHostIP))); err != nil { + // create a real handle to iptables + ipt, iptErr := iptables.New() + if iptErr != nil { + return fmt.Errorf("failed to detect iptables: %w", iptErr) + } + // most likely the pause container was removed from underneath nomad + return c.forceCleanup(ipt, alloc.ID) + } + + return nil +} + +// IPTables is a subset of iptables.IPTables +type IPTables interface { + List(table, chain string) ([]string, error) + Delete(table, chain string, rule ...string) error + ClearAndDeleteChain(table, chain string) error +} + +var ( + // ipRuleRe is used to parse a postrouting iptables rule created by nomad, e.g. + // -A POSTROUTING -s 172.26.64.191/32 -m comment --comment "name: \"nomad\" id: \"6b235529-8111-4bbe-520b-d639b1d2a94e\"" -j CNI-50e58ea77dc52e0c731e3799 + ipRuleRe = regexp.MustCompile(`-A POSTROUTING -s (\S+) -m comment --comment "name: \\"nomad\\" id: \\"([[:xdigit:]-]+)\\"" -j (CNI-[[:xdigit:]]+)`) +) + +// forceCleanup is the backup plan for removing the iptables rule and chain associated with +// an allocation that was using bridge networking. The cni library refuses to handle a +// dirty state - e.g. the pause container is removed out of band, and so we must cleanup +// iptables ourselves to avoid leaking rules. +func (c *cniNetworkConfigurator) forceCleanup(ipt IPTables, allocID string) error { + const ( + natTable = "nat" + postRoutingChain = "POSTROUTING" + commentFmt = `--comment "name: \"nomad\" id: \"%s\""` + ) + + // list the rules on the POSTROUTING chain of the nat table + rules, err := ipt.List(natTable, postRoutingChain) + if err != nil { + return fmt.Errorf("failed to list iptables rules: %w", err) + } + + // find the POSTROUTING rule associated with our allocation + matcher := fmt.Sprintf(commentFmt, allocID) + var ruleToPurge string + for _, rule := range rules { + if strings.Contains(rule, matcher) { + ruleToPurge = rule + break + } + } + + // no rule found for our allocation, just give up + if ruleToPurge == "" { + return fmt.Errorf("failed to find postrouting rule for alloc %s", allocID) + } + + // re-create the rule we need to delete, as tokens + subs := ipRuleRe.FindStringSubmatch(ruleToPurge) + if len(subs) != 4 { + return fmt.Errorf("failed to parse postrouting rule for alloc %s", allocID) + } + cidr := subs[1] + id := subs[2] + chainID := subs[3] + toDel := []string{ + `-s`, + cidr, + `-m`, + `comment`, + `--comment`, + `name: "nomad" id: "` + id + `"`, + `-j`, + chainID, + } + + // remove the jump rule + ok := true + if err = ipt.Delete(natTable, postRoutingChain, toDel...); err != nil { + c.logger.Warn("failed to remove iptables nat.POSTROUTING rule", "alloc_id", allocID, "chain", chainID, "error", err) + ok = false + } + + // remote the associated chain + if err = ipt.ClearAndDeleteChain(natTable, chainID); err != nil { + c.logger.Warn("failed to remove iptables nat chain", "chain", chainID, "error", err) + ok = false + } + + if !ok { + return fmt.Errorf("failed to cleanup iptables rules for alloc %s", allocID) + } + + return nil } func (c *cniNetworkConfigurator) ensureCNIInitialized() error { @@ -240,7 +336,7 @@ func (c *cniNetworkConfigurator) ensureCNIInitialized() error { // getPortMapping builds a list of portMapping structs that are used as the // portmapping capability arguments for the portmap CNI plugin func getPortMapping(alloc *structs.Allocation, ignoreHostIP bool) []cni.PortMapping { - ports := []cni.PortMapping{} + var ports []cni.PortMapping if len(alloc.AllocatedResources.Shared.Ports) == 0 && len(alloc.AllocatedResources.Shared.Networks) > 0 { for _, network := range alloc.AllocatedResources.Shared.Networks { diff --git a/client/allocrunner/networking_cni_test.go b/client/allocrunner/networking_cni_test.go index bc759272f50..3bc8f859908 100644 --- a/client/allocrunner/networking_cni_test.go +++ b/client/allocrunner/networking_cni_test.go @@ -1,19 +1,129 @@ //go:build linux -// +build linux package allocrunner import ( + "errors" "net" "testing" - cni "github.com/containerd/go-cni" + "github.com/containerd/go-cni" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/helper/testlog" + "github.com/shoenig/test/must" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) +type mockIPTables struct { + listCall [2]string + listRules []string + listErr error + + deleteCall [2]string + deleteErr error + + clearCall [2]string + clearErr error +} + +func (ipt *mockIPTables) List(table, chain string) ([]string, error) { + ipt.listCall[0], ipt.listCall[1] = table, chain + return ipt.listRules, ipt.listErr +} + +func (ipt *mockIPTables) Delete(table, chain string, rule ...string) error { + ipt.deleteCall[0], ipt.deleteCall[1] = table, chain + return ipt.deleteErr +} + +func (ipt *mockIPTables) ClearAndDeleteChain(table, chain string) error { + ipt.clearCall[0], ipt.clearCall[1] = table, chain + return ipt.clearErr +} + +func (ipt *mockIPTables) assert(t *testing.T, jumpChain string) { + // List assertions + must.Eq(t, "nat", ipt.listCall[0]) + must.Eq(t, "POSTROUTING", ipt.listCall[1]) + + // Delete assertions + must.Eq(t, "nat", ipt.deleteCall[0]) + must.Eq(t, "POSTROUTING", ipt.deleteCall[1]) + + // Clear assertions + must.Eq(t, "nat", ipt.clearCall[0]) + must.Eq(t, jumpChain, ipt.clearCall[1]) +} + +func TestCNI_forceCleanup(t *testing.T) { + t.Run("ok", func(t *testing.T) { + c := cniNetworkConfigurator{logger: testlog.HCLogger(t)} + ipt := &mockIPTables{ + listRules: []string{ + `-A POSTROUTING -m comment --comment "CNI portfwd requiring masquerade" -j CNI-HOSTPORT-MASQ`, + `-A POSTROUTING -s 172.17.0.0/16 ! -o docker0 -j MASQUERADE`, + `-A POSTROUTING -s 172.26.64.216/32 -m comment --comment "name: \"nomad\" id: \"79e8bf2e-a9c8-70ac-8d4e-fa5c4da99fbf\"" -j CNI-f2338c31d4de44472fe99c43`, + `-A POSTROUTING -s 172.26.64.217/32 -m comment --comment "name: \"nomad\" id: \"2dd71cac-2b1e-ff08-167c-735f7f9f4964\"" -j CNI-5d36f286cfbb35c5776509ec`, + `-A POSTROUTING -s 172.26.64.218/32 -m comment --comment "name: \"nomad\" id: \"5ff6deb7-9bc1-1491-f20c-e87b15de501d\"" -j CNI-2fe7686eac2fe43714a7b850`, + `-A POSTROUTING -m mark --mark 0x2000/0x2000 -j MASQUERADE`, + `-A POSTROUTING -m comment --comment "CNI portfwd masquerade mark" -j MARK --set-xmark 0x2000/0x2000`, + }, + } + err := c.forceCleanup(ipt, "2dd71cac-2b1e-ff08-167c-735f7f9f4964") + must.NoError(t, err) + ipt.assert(t, "CNI-5d36f286cfbb35c5776509ec") + }) + + t.Run("missing allocation", func(t *testing.T) { + c := cniNetworkConfigurator{logger: testlog.HCLogger(t)} + ipt := &mockIPTables{ + listRules: []string{ + `-A POSTROUTING -m comment --comment "CNI portfwd requiring masquerade" -j CNI-HOSTPORT-MASQ`, + `-A POSTROUTING -s 172.17.0.0/16 ! -o docker0 -j MASQUERADE`, + `-A POSTROUTING -s 172.26.64.216/32 -m comment --comment "name: \"nomad\" id: \"79e8bf2e-a9c8-70ac-8d4e-fa5c4da99fbf\"" -j CNI-f2338c31d4de44472fe99c43`, + `-A POSTROUTING -s 172.26.64.217/32 -m comment --comment "name: \"nomad\" id: \"262d57a7-8f85-f3a4-9c3b-120c00ccbff1\"" -j CNI-5d36f286cfbb35c5776509ec`, + `-A POSTROUTING -s 172.26.64.218/32 -m comment --comment "name: \"nomad\" id: \"5ff6deb7-9bc1-1491-f20c-e87b15de501d\"" -j CNI-2fe7686eac2fe43714a7b850`, + `-A POSTROUTING -m mark --mark 0x2000/0x2000 -j MASQUERADE`, + `-A POSTROUTING -m comment --comment "CNI portfwd masquerade mark" -j MARK --set-xmark 0x2000/0x2000`, + }, + } + err := c.forceCleanup(ipt, "2dd71cac-2b1e-ff08-167c-735f7f9f4964") + must.EqError(t, err, "failed to find postrouting rule for alloc 2dd71cac-2b1e-ff08-167c-735f7f9f4964") + }) + + t.Run("list error", func(t *testing.T) { + c := cniNetworkConfigurator{logger: testlog.HCLogger(t)} + ipt := &mockIPTables{listErr: errors.New("list error")} + err := c.forceCleanup(ipt, "2dd71cac-2b1e-ff08-167c-735f7f9f4964") + must.EqError(t, err, "failed to list iptables rules: list error") + }) + + t.Run("delete error", func(t *testing.T) { + c := cniNetworkConfigurator{logger: testlog.HCLogger(t)} + ipt := &mockIPTables{ + deleteErr: errors.New("delete error"), + listRules: []string{ + `-A POSTROUTING -s 172.26.64.217/32 -m comment --comment "name: \"nomad\" id: \"2dd71cac-2b1e-ff08-167c-735f7f9f4964\"" -j CNI-5d36f286cfbb35c5776509ec`, + }, + } + err := c.forceCleanup(ipt, "2dd71cac-2b1e-ff08-167c-735f7f9f4964") + must.EqError(t, err, "failed to cleanup iptables rules for alloc 2dd71cac-2b1e-ff08-167c-735f7f9f4964") + }) + + t.Run("clear error", func(t *testing.T) { + c := cniNetworkConfigurator{logger: testlog.HCLogger(t)} + ipt := &mockIPTables{ + clearErr: errors.New("clear error"), + listRules: []string{ + `-A POSTROUTING -s 172.26.64.217/32 -m comment --comment "name: \"nomad\" id: \"2dd71cac-2b1e-ff08-167c-735f7f9f4964\"" -j CNI-5d36f286cfbb35c5776509ec`, + }, + } + err := c.forceCleanup(ipt, "2dd71cac-2b1e-ff08-167c-735f7f9f4964") + must.EqError(t, err, "failed to cleanup iptables rules for alloc 2dd71cac-2b1e-ff08-167c-735f7f9f4964") + }) +} + // TestCNI_cniToAllocNet_Fallback asserts if a CNI plugin result lacks an IP on // its sandbox interface, the first IP found is used. func TestCNI_cniToAllocNet_Fallback(t *testing.T) { diff --git a/client/allocrunner/task_hook_coordinator.go b/client/allocrunner/task_hook_coordinator.go deleted file mode 100644 index 09f90d22d67..00000000000 --- a/client/allocrunner/task_hook_coordinator.go +++ /dev/null @@ -1,201 +0,0 @@ -package allocrunner - -import ( - "context" - - "github.com/hashicorp/go-hclog" - "github.com/hashicorp/nomad/client/allocrunner/taskrunner" - "github.com/hashicorp/nomad/nomad/structs" -) - -// TaskHookCoordinator helps coordinate when mainTasks start tasks can launch -// namely after all Prestart Tasks have run, and after all BlockUntilCompleted have completed -type taskHookCoordinator struct { - logger hclog.Logger - - // constant for quickly starting all prestart tasks - closedCh chan struct{} - - // Each context is used to gate task runners launching the tasks. A task - // runner waits until the context associated its lifecycle context is - // done/cancelled. - mainTaskCtx context.Context - mainTaskCtxCancel func() - - poststartTaskCtx context.Context - poststartTaskCtxCancel func() - poststopTaskCtx context.Context - poststopTaskCtxCancel context.CancelFunc - - prestartSidecar map[string]struct{} - prestartEphemeral map[string]struct{} - mainTasksRunning map[string]struct{} // poststop: main tasks running -> finished - mainTasksPending map[string]struct{} // poststart: main tasks pending -> running -} - -func newTaskHookCoordinator(logger hclog.Logger, tasks []*structs.Task) *taskHookCoordinator { - closedCh := make(chan struct{}) - close(closedCh) - - mainTaskCtx, mainCancelFn := context.WithCancel(context.Background()) - poststartTaskCtx, poststartCancelFn := context.WithCancel(context.Background()) - poststopTaskCtx, poststopTaskCancelFn := context.WithCancel(context.Background()) - - c := &taskHookCoordinator{ - logger: logger, - closedCh: closedCh, - mainTaskCtx: mainTaskCtx, - mainTaskCtxCancel: mainCancelFn, - prestartSidecar: map[string]struct{}{}, - prestartEphemeral: map[string]struct{}{}, - mainTasksRunning: map[string]struct{}{}, - mainTasksPending: map[string]struct{}{}, - poststartTaskCtx: poststartTaskCtx, - poststartTaskCtxCancel: poststartCancelFn, - poststopTaskCtx: poststopTaskCtx, - poststopTaskCtxCancel: poststopTaskCancelFn, - } - c.setTasks(tasks) - return c -} - -func (c *taskHookCoordinator) setTasks(tasks []*structs.Task) { - for _, task := range tasks { - - if task.Lifecycle == nil { - c.mainTasksPending[task.Name] = struct{}{} - c.mainTasksRunning[task.Name] = struct{}{} - continue - } - - switch task.Lifecycle.Hook { - case structs.TaskLifecycleHookPrestart: - if task.Lifecycle.Sidecar { - c.prestartSidecar[task.Name] = struct{}{} - } else { - c.prestartEphemeral[task.Name] = struct{}{} - } - case structs.TaskLifecycleHookPoststart: - // Poststart hooks don't need to be tracked. - case structs.TaskLifecycleHookPoststop: - // Poststop hooks don't need to be tracked. - default: - c.logger.Error("invalid lifecycle hook", "task", task.Name, "hook", task.Lifecycle.Hook) - } - } - - if !c.hasPrestartTasks() { - c.mainTaskCtxCancel() - } -} - -func (c *taskHookCoordinator) hasPrestartTasks() bool { - return len(c.prestartSidecar)+len(c.prestartEphemeral) > 0 -} - -func (c *taskHookCoordinator) hasRunningMainTasks() bool { - return len(c.mainTasksRunning) > 0 -} - -func (c *taskHookCoordinator) hasPendingMainTasks() bool { - return len(c.mainTasksPending) > 0 -} - -func (c *taskHookCoordinator) startConditionForTask(task *structs.Task) <-chan struct{} { - if task.Lifecycle == nil { - return c.mainTaskCtx.Done() - } - - switch task.Lifecycle.Hook { - case structs.TaskLifecycleHookPrestart: - // Prestart tasks start without checking status of other tasks - return c.closedCh - case structs.TaskLifecycleHookPoststart: - return c.poststartTaskCtx.Done() - case structs.TaskLifecycleHookPoststop: - return c.poststopTaskCtx.Done() - default: - // it should never have a lifecycle stanza w/o a hook, so report an error but allow the task to start normally - c.logger.Error("invalid lifecycle hook", "task", task.Name, "hook", task.Lifecycle.Hook) - return c.mainTaskCtx.Done() - } -} - -// This is not thread safe! This must only be called from one thread per alloc runner. -func (c *taskHookCoordinator) taskStateUpdated(states map[string]*structs.TaskState) { - for task := range c.prestartSidecar { - st := states[task] - if st == nil || st.StartedAt.IsZero() { - continue - } - - delete(c.prestartSidecar, task) - } - - for task := range c.prestartEphemeral { - st := states[task] - if st == nil || !st.Successful() { - continue - } - - delete(c.prestartEphemeral, task) - } - - for task := range c.mainTasksRunning { - st := states[task] - - if st == nil || st.State != structs.TaskStateDead { - continue - } - - delete(c.mainTasksRunning, task) - } - - for task := range c.mainTasksPending { - st := states[task] - if st == nil || st.StartedAt.IsZero() { - continue - } - - delete(c.mainTasksPending, task) - } - - if !c.hasPrestartTasks() { - c.mainTaskCtxCancel() - } - - if !c.hasPendingMainTasks() { - c.poststartTaskCtxCancel() - } - if !c.hasRunningMainTasks() { - c.poststopTaskCtxCancel() - } -} - -func (c *taskHookCoordinator) StartPoststopTasks() { - c.poststopTaskCtxCancel() -} - -// hasNonSidecarTasks returns false if all the passed tasks are sidecar tasks -func hasNonSidecarTasks(tasks []*taskrunner.TaskRunner) bool { - for _, tr := range tasks { - lc := tr.Task().Lifecycle - if lc == nil || !lc.Sidecar { - return true - } - } - - return false -} - -// hasSidecarTasks returns true if all the passed tasks are sidecar tasks -func hasSidecarTasks(tasks map[string]*taskrunner.TaskRunner) bool { - for _, tr := range tasks { - lc := tr.Task().Lifecycle - if lc != nil && lc.Sidecar { - return true - } - } - - return false -} diff --git a/client/allocrunner/task_hook_coordinator_test.go b/client/allocrunner/task_hook_coordinator_test.go deleted file mode 100644 index 7399acdabb8..00000000000 --- a/client/allocrunner/task_hook_coordinator_test.go +++ /dev/null @@ -1,381 +0,0 @@ -package allocrunner - -import ( - "fmt" - "testing" - "time" - - "github.com/hashicorp/nomad/ci" - "github.com/hashicorp/nomad/client/allocrunner/taskrunner" - "github.com/hashicorp/nomad/helper/testlog" - "github.com/hashicorp/nomad/nomad/mock" - "github.com/hashicorp/nomad/nomad/structs" - "github.com/stretchr/testify/require" -) - -func TestTaskHookCoordinator_OnlyMainApp(t *testing.T) { - ci.Parallel(t) - - alloc := mock.Alloc() - tasks := alloc.Job.TaskGroups[0].Tasks - task := tasks[0] - logger := testlog.HCLogger(t) - - coord := newTaskHookCoordinator(logger, tasks) - - ch := coord.startConditionForTask(task) - - require.Truef(t, isChannelClosed(ch), "%s channel was open, should be closed", task.Name) -} - -func TestTaskHookCoordinator_PrestartRunsBeforeMain(t *testing.T) { - ci.Parallel(t) - - logger := testlog.HCLogger(t) - - alloc := mock.LifecycleAlloc() - tasks := alloc.Job.TaskGroups[0].Tasks - - mainTask := tasks[0] - sideTask := tasks[1] - initTask := tasks[2] - - coord := newTaskHookCoordinator(logger, tasks) - initCh := coord.startConditionForTask(initTask) - sideCh := coord.startConditionForTask(sideTask) - mainCh := coord.startConditionForTask(mainTask) - - require.Truef(t, isChannelClosed(initCh), "%s channel was open, should be closed", initTask.Name) - require.Truef(t, isChannelClosed(sideCh), "%s channel was open, should be closed", sideTask.Name) - require.Falsef(t, isChannelClosed(mainCh), "%s channel was closed, should be open", mainTask.Name) -} - -func TestTaskHookCoordinator_MainRunsAfterPrestart(t *testing.T) { - ci.Parallel(t) - - logger := testlog.HCLogger(t) - - alloc := mock.LifecycleAlloc() - tasks := alloc.Job.TaskGroups[0].Tasks - - mainTask := tasks[0] - sideTask := tasks[1] - initTask := tasks[2] - - coord := newTaskHookCoordinator(logger, tasks) - initCh := coord.startConditionForTask(initTask) - sideCh := coord.startConditionForTask(sideTask) - mainCh := coord.startConditionForTask(mainTask) - - require.Truef(t, isChannelClosed(initCh), "%s channel was open, should be closed", initTask.Name) - require.Truef(t, isChannelClosed(sideCh), "%s channel was open, should be closed", sideTask.Name) - require.Falsef(t, isChannelClosed(mainCh), "%s channel was closed, should be open", mainTask.Name) - - states := map[string]*structs.TaskState{ - mainTask.Name: { - State: structs.TaskStatePending, - Failed: false, - }, - initTask.Name: { - State: structs.TaskStateDead, - Failed: false, - StartedAt: time.Now(), - FinishedAt: time.Now(), - }, - sideTask.Name: { - State: structs.TaskStateRunning, - Failed: false, - StartedAt: time.Now(), - }, - } - - coord.taskStateUpdated(states) - - require.Truef(t, isChannelClosed(initCh), "%s channel was open, should be closed", initTask.Name) - require.Truef(t, isChannelClosed(sideCh), "%s channel was open, should be closed", sideTask.Name) - require.Truef(t, isChannelClosed(mainCh), "%s channel was open, should be closed", mainTask.Name) -} - -func TestTaskHookCoordinator_MainRunsAfterManyInitTasks(t *testing.T) { - ci.Parallel(t) - - logger := testlog.HCLogger(t) - - alloc := mock.LifecycleAlloc() - alloc.Job = mock.VariableLifecycleJob(structs.Resources{CPU: 100, MemoryMB: 256}, 1, 2, 0) - tasks := alloc.Job.TaskGroups[0].Tasks - - mainTask := tasks[0] - init1Task := tasks[1] - init2Task := tasks[2] - - coord := newTaskHookCoordinator(logger, tasks) - mainCh := coord.startConditionForTask(mainTask) - init1Ch := coord.startConditionForTask(init1Task) - init2Ch := coord.startConditionForTask(init2Task) - - require.Truef(t, isChannelClosed(init1Ch), "%s channel was open, should be closed", init1Task.Name) - require.Truef(t, isChannelClosed(init2Ch), "%s channel was open, should be closed", init2Task.Name) - require.Falsef(t, isChannelClosed(mainCh), "%s channel was closed, should be open", mainTask.Name) - - states := map[string]*structs.TaskState{ - mainTask.Name: { - State: structs.TaskStatePending, - Failed: false, - }, - init1Task.Name: { - State: structs.TaskStateDead, - Failed: false, - StartedAt: time.Now(), - FinishedAt: time.Now(), - }, - init2Task.Name: { - State: structs.TaskStateDead, - Failed: false, - StartedAt: time.Now(), - }, - } - - coord.taskStateUpdated(states) - - require.Truef(t, isChannelClosed(init1Ch), "%s channel was open, should be closed", init1Task.Name) - require.Truef(t, isChannelClosed(init2Ch), "%s channel was open, should be closed", init2Task.Name) - require.Truef(t, isChannelClosed(mainCh), "%s channel was open, should be closed", mainTask.Name) -} - -func TestTaskHookCoordinator_FailedInitTask(t *testing.T) { - ci.Parallel(t) - - logger := testlog.HCLogger(t) - - alloc := mock.LifecycleAlloc() - alloc.Job = mock.VariableLifecycleJob(structs.Resources{CPU: 100, MemoryMB: 256}, 1, 2, 0) - tasks := alloc.Job.TaskGroups[0].Tasks - - mainTask := tasks[0] - init1Task := tasks[1] - init2Task := tasks[2] - - coord := newTaskHookCoordinator(logger, tasks) - mainCh := coord.startConditionForTask(mainTask) - init1Ch := coord.startConditionForTask(init1Task) - init2Ch := coord.startConditionForTask(init2Task) - - require.Truef(t, isChannelClosed(init1Ch), "%s channel was open, should be closed", init1Task.Name) - require.Truef(t, isChannelClosed(init2Ch), "%s channel was open, should be closed", init2Task.Name) - require.Falsef(t, isChannelClosed(mainCh), "%s channel was closed, should be open", mainTask.Name) - - states := map[string]*structs.TaskState{ - mainTask.Name: { - State: structs.TaskStatePending, - Failed: false, - }, - init1Task.Name: { - State: structs.TaskStateDead, - Failed: false, - StartedAt: time.Now(), - FinishedAt: time.Now(), - }, - init2Task.Name: { - State: structs.TaskStateDead, - Failed: true, - StartedAt: time.Now(), - }, - } - - coord.taskStateUpdated(states) - - require.Truef(t, isChannelClosed(init1Ch), "%s channel was open, should be closed", init1Task.Name) - require.Truef(t, isChannelClosed(init2Ch), "%s channel was open, should be closed", init2Task.Name) - require.Falsef(t, isChannelClosed(mainCh), "%s channel was closed, should be open", mainTask.Name) -} - -func TestTaskHookCoordinator_SidecarNeverStarts(t *testing.T) { - ci.Parallel(t) - - logger := testlog.HCLogger(t) - - alloc := mock.LifecycleAlloc() - tasks := alloc.Job.TaskGroups[0].Tasks - - mainTask := tasks[0] - sideTask := tasks[1] - initTask := tasks[2] - - coord := newTaskHookCoordinator(logger, tasks) - initCh := coord.startConditionForTask(initTask) - sideCh := coord.startConditionForTask(sideTask) - mainCh := coord.startConditionForTask(mainTask) - - require.Truef(t, isChannelClosed(initCh), "%s channel was open, should be closed", initTask.Name) - require.Truef(t, isChannelClosed(sideCh), "%s channel was open, should be closed", sideTask.Name) - require.Falsef(t, isChannelClosed(mainCh), "%s channel was closed, should be open", mainTask.Name) - - states := map[string]*structs.TaskState{ - mainTask.Name: { - State: structs.TaskStatePending, - Failed: false, - }, - initTask.Name: { - State: structs.TaskStateDead, - Failed: false, - StartedAt: time.Now(), - FinishedAt: time.Now(), - }, - sideTask.Name: { - State: structs.TaskStatePending, - Failed: false, - }, - } - - coord.taskStateUpdated(states) - - require.Truef(t, isChannelClosed(initCh), "%s channel was open, should be closed", initTask.Name) - require.Truef(t, isChannelClosed(sideCh), "%s channel was open, should be closed", sideTask.Name) - require.Falsef(t, isChannelClosed(mainCh), "%s channel was closed, should be open", mainTask.Name) -} - -func TestTaskHookCoordinator_PoststartStartsAfterMain(t *testing.T) { - ci.Parallel(t) - - logger := testlog.HCLogger(t) - - alloc := mock.LifecycleAlloc() - tasks := alloc.Job.TaskGroups[0].Tasks - - mainTask := tasks[0] - sideTask := tasks[1] - postTask := tasks[2] - - // Make the the third task a poststart hook - postTask.Lifecycle.Hook = structs.TaskLifecycleHookPoststart - - coord := newTaskHookCoordinator(logger, tasks) - postCh := coord.startConditionForTask(postTask) - sideCh := coord.startConditionForTask(sideTask) - mainCh := coord.startConditionForTask(mainTask) - - require.Truef(t, isChannelClosed(sideCh), "%s channel was open, should be closed", sideTask.Name) - require.Falsef(t, isChannelClosed(mainCh), "%s channel was closed, should be open", mainTask.Name) - require.Falsef(t, isChannelClosed(mainCh), "%s channel was closed, should be open", postTask.Name) - - states := map[string]*structs.TaskState{ - postTask.Name: { - State: structs.TaskStatePending, - Failed: false, - }, - mainTask.Name: { - State: structs.TaskStateRunning, - Failed: false, - StartedAt: time.Now(), - }, - sideTask.Name: { - State: structs.TaskStateRunning, - Failed: false, - StartedAt: time.Now(), - }, - } - - coord.taskStateUpdated(states) - - require.Truef(t, isChannelClosed(postCh), "%s channel was open, should be closed", postTask.Name) - require.Truef(t, isChannelClosed(sideCh), "%s channel was open, should be closed", sideTask.Name) - require.Truef(t, isChannelClosed(mainCh), "%s channel was open, should be closed", mainTask.Name) -} - -func isChannelClosed(ch <-chan struct{}) bool { - select { - case <-ch: - return true - default: - return false - } -} - -func TestHasSidecarTasks(t *testing.T) { - ci.Parallel(t) - - falseV, trueV := false, true - - cases := []struct { - name string - // nil if main task, false if non-sidecar hook, true if sidecar hook - indicators []*bool - - hasSidecars bool - hasNonsidecars bool - }{ - { - name: "all sidecar - one", - indicators: []*bool{&trueV}, - hasSidecars: true, - hasNonsidecars: false, - }, - { - name: "all sidecar - multiple", - indicators: []*bool{&trueV, &trueV, &trueV}, - hasSidecars: true, - hasNonsidecars: false, - }, - { - name: "some sidecars, some others", - indicators: []*bool{nil, &falseV, &trueV}, - hasSidecars: true, - hasNonsidecars: true, - }, - { - name: "no sidecars", - indicators: []*bool{nil, &falseV, nil}, - hasSidecars: false, - hasNonsidecars: true, - }, - } - - for _, c := range cases { - t.Run(c.name, func(t *testing.T) { - alloc := allocWithSidecarIndicators(c.indicators) - arConf, cleanup := testAllocRunnerConfig(t, alloc) - defer cleanup() - - ar, err := NewAllocRunner(arConf) - require.NoError(t, err) - - require.Equal(t, c.hasSidecars, hasSidecarTasks(ar.tasks), "sidecars") - - runners := []*taskrunner.TaskRunner{} - for _, r := range ar.tasks { - runners = append(runners, r) - } - require.Equal(t, c.hasNonsidecars, hasNonSidecarTasks(runners), "non-sidecars") - - }) - } -} - -func allocWithSidecarIndicators(indicators []*bool) *structs.Allocation { - alloc := mock.BatchAlloc() - - tasks := []*structs.Task{} - resources := map[string]*structs.AllocatedTaskResources{} - - tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name] - - for i, indicator := range indicators { - task := alloc.Job.TaskGroups[0].Tasks[0].Copy() - task.Name = fmt.Sprintf("task%d", i) - if indicator != nil { - task.Lifecycle = &structs.TaskLifecycleConfig{ - Hook: structs.TaskLifecycleHookPrestart, - Sidecar: *indicator, - } - } - tasks = append(tasks, task) - resources[task.Name] = tr - } - - alloc.Job.TaskGroups[0].Tasks = tasks - - alloc.AllocatedResources.Tasks = resources - return alloc - -} diff --git a/client/allocrunner/tasklifecycle/coordinator.go b/client/allocrunner/tasklifecycle/coordinator.go new file mode 100644 index 00000000000..90a8e7fc097 --- /dev/null +++ b/client/allocrunner/tasklifecycle/coordinator.go @@ -0,0 +1,427 @@ +package tasklifecycle + +import ( + "fmt" + "sync" + + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/nomad/structs" +) + +// coordinatorState represents a state of the task lifecycle Coordinator FSM. +type coordinatorState uint8 + +const ( + coordinatorStateInit coordinatorState = iota + coordinatorStatePrestart + coordinatorStateMain + coordinatorStatePoststart + coordinatorStateWaitAlloc + coordinatorStatePoststop +) + +func (s coordinatorState) String() string { + switch s { + case coordinatorStateInit: + return "init" + case coordinatorStatePrestart: + return "prestart" + case coordinatorStateMain: + return "main" + case coordinatorStatePoststart: + return "poststart" + case coordinatorStateWaitAlloc: + return "wait_alloc" + case coordinatorStatePoststop: + return "poststart" + } + panic(fmt.Sprintf("Unexpected task coordinator state %d", s)) +} + +// lifecycleStage represents a lifecycle configuration used for task +// coordination. +// +// Not all possible combinations of hook X sidecar are defined, only the ones +// that are relevant for coordinating task initialization order. For example, a +// main task with sidecar set to `true` starts at the same time as a +// non-sidecar main task, so there is no need to treat them differently. +type lifecycleStage uint8 + +const ( + // lifecycleStagePrestartEphemeral are tasks with the "prestart" hook and + // sidecar set to "false". + lifecycleStagePrestartEphemeral lifecycleStage = iota + + // lifecycleStagePrestartSidecar are tasks with the "prestart" hook and + // sidecar set to "true". + lifecycleStagePrestartSidecar + + // lifecycleStageMain are tasks without a lifecycle or a lifecycle with an + // empty hook value. + lifecycleStageMain + + // lifecycleStagePoststartEphemeral are tasks with the "poststart" hook and + // sidecar set to "false" + lifecycleStagePoststartEphemeral + + // lifecycleStagePoststartSidecar are tasks with the "poststart" hook and + // sidecar set to "true". + lifecycleStagePoststartSidecar + + // lifecycleStagePoststop are tasks with the "poststop" hook. + lifecycleStagePoststop +) + +// Coordinator controls when tasks with a given lifecycle configuration are +// allowed to start and run. +// +// It behaves like a finite state machine where each state transition blocks or +// allows some task lifecycle types to run. +type Coordinator struct { + logger hclog.Logger + + // tasksByLifecycle is an index used to group and quickly access tasks by + // their lifecycle stage. + tasksByLifecycle map[lifecycleStage][]string + + // currentState is the current state of the FSM. It must only be accessed + // while holding the lock. + currentState coordinatorState + currentStateLock sync.RWMutex + + // gates store the gates that control each task lifecycle stage. + gates map[lifecycleStage]*Gate +} + +// NewCoordinator returns a new Coordinator with all tasks initially blocked. +func NewCoordinator(logger hclog.Logger, tasks []*structs.Task, shutdownCh <-chan struct{}) *Coordinator { + c := &Coordinator{ + logger: logger.Named("task_coordinator"), + tasksByLifecycle: indexTasksByLifecycle(tasks), + gates: make(map[lifecycleStage]*Gate), + } + + for lifecycle := range c.tasksByLifecycle { + c.gates[lifecycle] = NewGate(shutdownCh) + } + + c.enterStateLocked(coordinatorStateInit) + return c +} + +// Restart sets the Coordinator state back to "init" and is used to coordinate +// a full alloc restart. Since all tasks will run again they need to be pending +// before they are allowed to proceed. +func (c *Coordinator) Restart() { + c.currentStateLock.Lock() + defer c.currentStateLock.Unlock() + c.enterStateLocked(coordinatorStateInit) +} + +// Restore is used to set the Coordinator FSM to the correct state when an +// alloc is restored. Must be called before the allocrunner is running. +func (c *Coordinator) Restore(states map[string]*structs.TaskState) { + // Skip the "init" state when restoring since the tasks were likely already + // running, causing the Coordinator to be stuck waiting for them to be + // "pending". + c.enterStateLocked(coordinatorStatePrestart) + c.TaskStateUpdated(states) +} + +// StartConditionForTask returns a channel that is unblocked when the task is +// allowed to run. +func (c *Coordinator) StartConditionForTask(task *structs.Task) <-chan struct{} { + lifecycle := taskLifecycleStage(task) + return c.gates[lifecycle].WaitCh() +} + +// TaskStateUpdated notifies that a task state has changed. This may cause the +// Coordinator to transition to another state. +func (c *Coordinator) TaskStateUpdated(states map[string]*structs.TaskState) { + c.currentStateLock.Lock() + defer c.currentStateLock.Unlock() + + // We may be able to move directly through some states (for example, when + // an alloc doesn't have any prestart task we can skip the prestart state), + // so loop until we stabilize. + // This is also important when restoring an alloc since we need to find the + // state where FSM was last positioned. + for { + nextState := c.nextStateLocked(states) + if nextState == c.currentState { + return + } + + c.enterStateLocked(nextState) + } +} + +// nextStateLocked returns the state the FSM should transition to given its +// current internal state and the received states of the tasks. +// The currentStateLock must be held before calling this method. +func (c *Coordinator) nextStateLocked(states map[string]*structs.TaskState) coordinatorState { + + // coordinatorStatePoststop is the terminal state of the FSM, and can be + // reached at any time. + if c.isAllocDone(states) { + return coordinatorStatePoststop + } + + switch c.currentState { + case coordinatorStateInit: + if !c.isInitDone(states) { + return coordinatorStateInit + } + return coordinatorStatePrestart + + case coordinatorStatePrestart: + if !c.isPrestartDone(states) { + return coordinatorStatePrestart + } + return coordinatorStateMain + + case coordinatorStateMain: + if !c.isMainDone(states) { + return coordinatorStateMain + } + return coordinatorStatePoststart + + case coordinatorStatePoststart: + if !c.isPoststartDone(states) { + return coordinatorStatePoststart + } + return coordinatorStateWaitAlloc + + case coordinatorStateWaitAlloc: + if !c.isAllocDone(states) { + return coordinatorStateWaitAlloc + } + return coordinatorStatePoststop + + case coordinatorStatePoststop: + return coordinatorStatePoststop + } + + // If the code reaches here it's a programming error, since the switch + // statement should cover all possible states and return the next state. + panic(fmt.Sprintf("unexpected state %s", c.currentState)) +} + +// enterStateLocked updates the current state of the Coordinator FSM and +// executes any action necessary for the state transition. +// The currentStateLock must be held before calling this method. +func (c *Coordinator) enterStateLocked(state coordinatorState) { + c.logger.Trace("state transition", "from", c.currentState, "to", state) + + switch state { + case coordinatorStateInit: + c.block(lifecycleStagePrestartEphemeral) + c.block(lifecycleStagePrestartSidecar) + c.block(lifecycleStageMain) + c.block(lifecycleStagePoststartEphemeral) + c.block(lifecycleStagePoststartSidecar) + c.block(lifecycleStagePoststop) + + case coordinatorStatePrestart: + c.block(lifecycleStageMain) + c.block(lifecycleStagePoststartEphemeral) + c.block(lifecycleStagePoststartSidecar) + c.block(lifecycleStagePoststop) + + c.allow(lifecycleStagePrestartEphemeral) + c.allow(lifecycleStagePrestartSidecar) + + case coordinatorStateMain: + c.block(lifecycleStagePrestartEphemeral) + c.block(lifecycleStagePoststartEphemeral) + c.block(lifecycleStagePoststartSidecar) + c.block(lifecycleStagePoststop) + + c.allow(lifecycleStagePrestartSidecar) + c.allow(lifecycleStageMain) + + case coordinatorStatePoststart: + c.block(lifecycleStagePrestartEphemeral) + c.block(lifecycleStagePoststop) + + c.allow(lifecycleStagePrestartSidecar) + c.allow(lifecycleStageMain) + c.allow(lifecycleStagePoststartEphemeral) + c.allow(lifecycleStagePoststartSidecar) + + case coordinatorStateWaitAlloc: + c.block(lifecycleStagePrestartEphemeral) + c.block(lifecycleStagePoststartEphemeral) + c.block(lifecycleStagePoststop) + + c.allow(lifecycleStagePrestartSidecar) + c.allow(lifecycleStageMain) + c.allow(lifecycleStagePoststartSidecar) + + case coordinatorStatePoststop: + c.block(lifecycleStagePrestartEphemeral) + c.block(lifecycleStagePrestartSidecar) + c.block(lifecycleStageMain) + c.block(lifecycleStagePoststartEphemeral) + c.block(lifecycleStagePoststartSidecar) + + c.allow(lifecycleStagePoststop) + } + + c.currentState = state +} + +// isInitDone returns true when the following conditions are met: +// - all tasks are in the "pending" state. +func (c *Coordinator) isInitDone(states map[string]*structs.TaskState) bool { + for _, task := range states { + if task.State != structs.TaskStatePending { + return false + } + } + return true +} + +// isPrestartDone returns true when the following conditions are met: +// - there is at least one prestart task +// - all ephemeral prestart tasks are successful. +// - no ephemeral prestart task has failed. +// - all prestart sidecar tasks are running. +func (c *Coordinator) isPrestartDone(states map[string]*structs.TaskState) bool { + if !c.hasPrestart() { + return true + } + + for _, task := range c.tasksByLifecycle[lifecycleStagePrestartEphemeral] { + if !states[task].Successful() { + return false + } + } + for _, task := range c.tasksByLifecycle[lifecycleStagePrestartSidecar] { + if states[task].State != structs.TaskStateRunning { + return false + } + } + return true +} + +// isMainDone returns true when the following conditions are met: +// - there is at least one main task. +// - all main tasks are no longer "pending". +func (c *Coordinator) isMainDone(states map[string]*structs.TaskState) bool { + if !c.hasMain() { + return true + } + + for _, task := range c.tasksByLifecycle[lifecycleStageMain] { + if states[task].State == structs.TaskStatePending { + return false + } + } + return true +} + +// isPoststartDone returns true when the following conditions are met: +// - there is at least one poststart task. +// - all ephemeral poststart tasks are in the "dead" state. +func (c *Coordinator) isPoststartDone(states map[string]*structs.TaskState) bool { + if !c.hasPoststart() { + return true + } + + for _, task := range c.tasksByLifecycle[lifecycleStagePoststartEphemeral] { + if states[task].State != structs.TaskStateDead { + return false + } + } + return true +} + +// isAllocDone returns true when the following conditions are met: +// - all non-poststop tasks are in the "dead" state. +func (c *Coordinator) isAllocDone(states map[string]*structs.TaskState) bool { + for lifecycle, tasks := range c.tasksByLifecycle { + if lifecycle == lifecycleStagePoststop { + continue + } + + for _, task := range tasks { + if states[task].State != structs.TaskStateDead { + return false + } + } + } + return true +} + +func (c *Coordinator) hasPrestart() bool { + return len(c.tasksByLifecycle[lifecycleStagePrestartEphemeral])+ + len(c.tasksByLifecycle[lifecycleStagePrestartSidecar]) > 0 +} + +func (c *Coordinator) hasMain() bool { + return len(c.tasksByLifecycle[lifecycleStageMain]) > 0 +} + +func (c *Coordinator) hasPoststart() bool { + return len(c.tasksByLifecycle[lifecycleStagePoststartEphemeral])+ + len(c.tasksByLifecycle[lifecycleStagePoststartSidecar]) > 0 +} + +func (c *Coordinator) hasPoststop() bool { + return len(c.tasksByLifecycle[lifecycleStagePoststop]) > 0 +} + +// block is used to block the execution of tasks in the given lifecycle stage. +func (c *Coordinator) block(lifecycle lifecycleStage) { + gate := c.gates[lifecycle] + if gate != nil { + gate.Close() + } +} + +// allows is used to allow the execution of tasks in the given lifecycle stage. +func (c *Coordinator) allow(lifecycle lifecycleStage) { + gate := c.gates[lifecycle] + if gate != nil { + gate.Open() + } +} + +// indexTasksByLifecycle generates a map that groups tasks by their lifecycle +// configuration. This makes it easier to retrieve tasks by these groups or to +// determine if a task has a certain lifecycle configuration. +func indexTasksByLifecycle(tasks []*structs.Task) map[lifecycleStage][]string { + index := make(map[lifecycleStage][]string) + + for _, task := range tasks { + lifecycle := taskLifecycleStage(task) + + if _, ok := index[lifecycle]; !ok { + index[lifecycle] = []string{} + } + index[lifecycle] = append(index[lifecycle], task.Name) + } + + return index +} + +// taskLifecycleStage returns the relevant lifecycle stage for a given task. +func taskLifecycleStage(task *structs.Task) lifecycleStage { + if task.IsPrestart() { + if task.Lifecycle.Sidecar { + return lifecycleStagePrestartSidecar + } + return lifecycleStagePrestartEphemeral + } else if task.IsPoststart() { + if task.Lifecycle.Sidecar { + return lifecycleStagePoststartSidecar + } + return lifecycleStagePoststartEphemeral + } else if task.IsPoststop() { + return lifecycleStagePoststop + } + + // Assume task is "main" by default. + return lifecycleStageMain +} diff --git a/client/allocrunner/tasklifecycle/coordinator_test.go b/client/allocrunner/tasklifecycle/coordinator_test.go new file mode 100644 index 00000000000..3f86dcc99f3 --- /dev/null +++ b/client/allocrunner/tasklifecycle/coordinator_test.go @@ -0,0 +1,560 @@ +package tasklifecycle + +import ( + "testing" + "time" + + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/structs" +) + +func TestCoordinator_OnlyMainApp(t *testing.T) { + ci.Parallel(t) + + alloc := mock.Alloc() + tasks := alloc.Job.TaskGroups[0].Tasks + task := tasks[0] + logger := testlog.HCLogger(t) + + shutdownCh := make(chan struct{}) + defer close(shutdownCh) + coord := NewCoordinator(logger, tasks, shutdownCh) + + // Tasks starts blocked. + RequireTaskBlocked(t, coord, task) + + // When main is pending it's allowed to run. + states := map[string]*structs.TaskState{ + task.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + } + coord.TaskStateUpdated(states) + RequireTaskAllowed(t, coord, task) + + // After main is running, main tasks are still allowed to run. + states = map[string]*structs.TaskState{ + task.Name: { + State: structs.TaskStateRunning, + Failed: false, + }, + } + coord.TaskStateUpdated(states) + RequireTaskAllowed(t, coord, task) +} + +func TestCoordinator_PrestartRunsBeforeMain(t *testing.T) { + ci.Parallel(t) + + logger := testlog.HCLogger(t) + + alloc := mock.LifecycleAlloc() + tasks := alloc.Job.TaskGroups[0].Tasks + + mainTask := tasks[0] + sideTask := tasks[1] + initTask := tasks[2] + + // Only use the tasks that we care about. + tasks = []*structs.Task{mainTask, sideTask, initTask} + + shutdownCh := make(chan struct{}) + defer close(shutdownCh) + coord := NewCoordinator(logger, tasks, shutdownCh) + + // All tasks start blocked. + RequireTaskBlocked(t, coord, initTask) + RequireTaskBlocked(t, coord, sideTask) + RequireTaskBlocked(t, coord, mainTask) + + // Set initial state, prestart tasks are allowed to run. + states := map[string]*structs.TaskState{ + initTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + sideTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + mainTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + } + coord.TaskStateUpdated(states) + RequireTaskAllowed(t, coord, initTask) + RequireTaskAllowed(t, coord, sideTask) + RequireTaskBlocked(t, coord, mainTask) + + // Sidecar task is running, main is blocked. + states = map[string]*structs.TaskState{ + initTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + sideTask.Name: { + State: structs.TaskStateRunning, + Failed: false, + }, + mainTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + } + coord.TaskStateUpdated(states) + RequireTaskAllowed(t, coord, initTask) + RequireTaskAllowed(t, coord, sideTask) + RequireTaskBlocked(t, coord, mainTask) + + // Init task is running, main is blocked. + states = map[string]*structs.TaskState{ + initTask.Name: { + State: structs.TaskStateRunning, + Failed: false, + }, + sideTask.Name: { + State: structs.TaskStateRunning, + Failed: false, + }, + mainTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + } + coord.TaskStateUpdated(states) + RequireTaskAllowed(t, coord, initTask) + RequireTaskAllowed(t, coord, sideTask) + RequireTaskBlocked(t, coord, mainTask) + + // Init task is done, main is now allowed to run. + states = map[string]*structs.TaskState{ + initTask.Name: { + State: structs.TaskStateDead, + Failed: false, + }, + sideTask.Name: { + State: structs.TaskStateRunning, + Failed: false, + }, + mainTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + } + coord.TaskStateUpdated(states) + RequireTaskBlocked(t, coord, initTask) + RequireTaskAllowed(t, coord, sideTask) + RequireTaskAllowed(t, coord, mainTask) +} + +func TestCoordinator_MainRunsAfterManyInitTasks(t *testing.T) { + ci.Parallel(t) + + logger := testlog.HCLogger(t) + + alloc := mock.LifecycleAlloc() + alloc.Job = mock.VariableLifecycleJob(structs.Resources{CPU: 100, MemoryMB: 256}, 1, 2, 0) + tasks := alloc.Job.TaskGroups[0].Tasks + + mainTask := tasks[0] + init1Task := tasks[1] + init2Task := tasks[2] + + // Only use the tasks that we care about. + tasks = []*structs.Task{mainTask, init1Task, init2Task} + + shutdownCh := make(chan struct{}) + defer close(shutdownCh) + coord := NewCoordinator(logger, tasks, shutdownCh) + + // All tasks start blocked. + RequireTaskBlocked(t, coord, init1Task) + RequireTaskBlocked(t, coord, init2Task) + RequireTaskBlocked(t, coord, mainTask) + + // Set initial state, prestart tasks are allowed to run, main is blocked. + states := map[string]*structs.TaskState{ + init1Task.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + init2Task.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + mainTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + } + coord.TaskStateUpdated(states) + RequireTaskAllowed(t, coord, init1Task) + RequireTaskAllowed(t, coord, init2Task) + RequireTaskBlocked(t, coord, mainTask) + + // Init tasks complete, main is allowed to run. + states = map[string]*structs.TaskState{ + init1Task.Name: { + State: structs.TaskStateDead, + Failed: false, + StartedAt: time.Now(), + FinishedAt: time.Now(), + }, + init2Task.Name: { + State: structs.TaskStateDead, + Failed: false, + StartedAt: time.Now(), + }, + mainTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + } + coord.TaskStateUpdated(states) + RequireTaskBlocked(t, coord, init1Task) + RequireTaskBlocked(t, coord, init2Task) + RequireTaskAllowed(t, coord, mainTask) +} + +func TestCoordinator_FailedInitTask(t *testing.T) { + ci.Parallel(t) + + logger := testlog.HCLogger(t) + + alloc := mock.LifecycleAlloc() + alloc.Job = mock.VariableLifecycleJob(structs.Resources{CPU: 100, MemoryMB: 256}, 1, 2, 0) + tasks := alloc.Job.TaskGroups[0].Tasks + + mainTask := tasks[0] + init1Task := tasks[1] + init2Task := tasks[2] + + // Only use the tasks that we care about. + tasks = []*structs.Task{mainTask, init1Task, init2Task} + + shutdownCh := make(chan struct{}) + defer close(shutdownCh) + coord := NewCoordinator(logger, tasks, shutdownCh) + + // All tasks start blocked. + RequireTaskBlocked(t, coord, init1Task) + RequireTaskBlocked(t, coord, init2Task) + RequireTaskBlocked(t, coord, mainTask) + + // Set initial state, prestart tasks are allowed to run, main is blocked. + states := map[string]*structs.TaskState{ + init1Task.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + init2Task.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + mainTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + } + coord.TaskStateUpdated(states) + RequireTaskAllowed(t, coord, init1Task) + RequireTaskAllowed(t, coord, init2Task) + RequireTaskBlocked(t, coord, mainTask) + + // Init task dies, main is still blocked. + states = map[string]*structs.TaskState{ + init1Task.Name: { + State: structs.TaskStateDead, + Failed: false, + StartedAt: time.Now(), + FinishedAt: time.Now(), + }, + init2Task.Name: { + State: structs.TaskStateDead, + Failed: true, + StartedAt: time.Now(), + }, + mainTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + } + coord.TaskStateUpdated(states) + RequireTaskAllowed(t, coord, init1Task) + RequireTaskAllowed(t, coord, init2Task) + RequireTaskBlocked(t, coord, mainTask) +} + +func TestCoordinator_SidecarNeverStarts(t *testing.T) { + ci.Parallel(t) + + logger := testlog.HCLogger(t) + + alloc := mock.LifecycleAlloc() + tasks := alloc.Job.TaskGroups[0].Tasks + + mainTask := tasks[0] + sideTask := tasks[1] + initTask := tasks[2] + + // Only use the tasks that we care about. + tasks = []*structs.Task{mainTask, sideTask, initTask} + + shutdownCh := make(chan struct{}) + defer close(shutdownCh) + coord := NewCoordinator(logger, tasks, shutdownCh) + + // All tasks start blocked. + RequireTaskBlocked(t, coord, initTask) + RequireTaskBlocked(t, coord, sideTask) + RequireTaskBlocked(t, coord, mainTask) + + // Set initial state, prestart tasks are allowed to run, main is blocked. + states := map[string]*structs.TaskState{ + initTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + sideTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + mainTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + } + coord.TaskStateUpdated(states) + RequireTaskAllowed(t, coord, initTask) + RequireTaskAllowed(t, coord, sideTask) + RequireTaskBlocked(t, coord, mainTask) + + // Init completes, but sidecar not yet. + states = map[string]*structs.TaskState{ + initTask.Name: { + State: structs.TaskStateDead, + Failed: false, + StartedAt: time.Now(), + FinishedAt: time.Now(), + }, + sideTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + mainTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + } + coord.TaskStateUpdated(states) + RequireTaskAllowed(t, coord, initTask) + RequireTaskAllowed(t, coord, sideTask) + RequireTaskBlocked(t, coord, mainTask) +} + +func TestCoordinator_PoststartStartsAfterMain(t *testing.T) { + ci.Parallel(t) + + logger := testlog.HCLogger(t) + + alloc := mock.LifecycleAlloc() + tasks := alloc.Job.TaskGroups[0].Tasks + + mainTask := tasks[0] + sideTask := tasks[1] + postTask := tasks[2] + + // Only use the tasks that we care about. + tasks = []*structs.Task{mainTask, sideTask, postTask} + + // Make the the third task is a poststart hook + postTask.Lifecycle.Hook = structs.TaskLifecycleHookPoststart + + shutdownCh := make(chan struct{}) + defer close(shutdownCh) + coord := NewCoordinator(logger, tasks, shutdownCh) + + // All tasks start blocked. + RequireTaskBlocked(t, coord, sideTask) + RequireTaskBlocked(t, coord, mainTask) + RequireTaskBlocked(t, coord, postTask) + + // Set initial state, prestart tasks are allowed to run, main and poststart + // are blocked. + states := map[string]*structs.TaskState{ + sideTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + mainTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + postTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + } + coord.TaskStateUpdated(states) + RequireTaskAllowed(t, coord, sideTask) + RequireTaskBlocked(t, coord, mainTask) + RequireTaskBlocked(t, coord, postTask) + + // Sidecar and main running, poststart allowed to run. + states = map[string]*structs.TaskState{ + sideTask.Name: { + State: structs.TaskStateRunning, + Failed: false, + StartedAt: time.Now(), + }, + mainTask.Name: { + State: structs.TaskStateRunning, + Failed: false, + StartedAt: time.Now(), + }, + postTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + } + coord.TaskStateUpdated(states) + RequireTaskAllowed(t, coord, sideTask) + RequireTaskAllowed(t, coord, mainTask) + RequireTaskAllowed(t, coord, postTask) +} + +func TestCoordinator_Restore(t *testing.T) { + ci.Parallel(t) + + task := mock.Job().TaskGroups[0].Tasks[0] + + preEphemeral := task.Copy() + preEphemeral.Name = "pre_ephemeral" + preEphemeral.Lifecycle = &structs.TaskLifecycleConfig{ + Hook: structs.TaskLifecycleHookPrestart, + Sidecar: false, + } + + preSide := task.Copy() + preSide.Name = "pre_side" + preSide.Lifecycle = &structs.TaskLifecycleConfig{ + Hook: structs.TaskLifecycleHookPrestart, + Sidecar: true, + } + + main := task.Copy() + main.Name = "main" + main.Lifecycle = nil + + postEphemeral := task.Copy() + postEphemeral.Name = "post_ephemeral" + postEphemeral.Lifecycle = &structs.TaskLifecycleConfig{ + Hook: structs.TaskLifecycleHookPoststart, + Sidecar: false, + } + + postSide := task.Copy() + postSide.Name = "post_side" + postSide.Lifecycle = &structs.TaskLifecycleConfig{ + Hook: structs.TaskLifecycleHookPoststart, + Sidecar: true, + } + + poststop := task.Copy() + poststop.Name = "poststop" + poststop.Lifecycle = &structs.TaskLifecycleConfig{ + Hook: structs.TaskLifecycleHookPoststop, + Sidecar: false, + } + + testCases := []struct { + name string + tasks []*structs.Task + tasksState map[string]*structs.TaskState + testFn func(*testing.T, *Coordinator) + }{ + { + name: "prestart ephemeral running", + tasks: []*structs.Task{preEphemeral, preSide, main}, + tasksState: map[string]*structs.TaskState{ + preEphemeral.Name: {State: structs.TaskStateRunning}, + preSide.Name: {State: structs.TaskStateRunning}, + main.Name: {State: structs.TaskStatePending}, + }, + testFn: func(t *testing.T, c *Coordinator) { + RequireTaskBlocked(t, c, main) + + RequireTaskAllowed(t, c, preEphemeral) + RequireTaskAllowed(t, c, preSide) + }, + }, + { + name: "prestart ephemeral complete", + tasks: []*structs.Task{preEphemeral, preSide, main}, + tasksState: map[string]*structs.TaskState{ + preEphemeral.Name: {State: structs.TaskStateDead}, + preSide.Name: {State: structs.TaskStateRunning}, + main.Name: {State: structs.TaskStatePending}, + }, + testFn: func(t *testing.T, c *Coordinator) { + RequireTaskBlocked(t, c, preEphemeral) + + RequireTaskAllowed(t, c, preSide) + RequireTaskAllowed(t, c, main) + }, + }, + { + name: "main running", + tasks: []*structs.Task{main}, + tasksState: map[string]*structs.TaskState{ + main.Name: {State: structs.TaskStateRunning}, + }, + testFn: func(t *testing.T, c *Coordinator) { + RequireTaskAllowed(t, c, main) + }, + }, + { + name: "poststart with sidecar", + tasks: []*structs.Task{main, postEphemeral, postSide}, + tasksState: map[string]*structs.TaskState{ + main.Name: {State: structs.TaskStateRunning}, + postEphemeral.Name: {State: structs.TaskStateDead}, + postSide.Name: {State: structs.TaskStateRunning}, + }, + testFn: func(t *testing.T, c *Coordinator) { + RequireTaskBlocked(t, c, postEphemeral) + + RequireTaskAllowed(t, c, main) + RequireTaskAllowed(t, c, postSide) + }, + }, + { + name: "poststop running", + tasks: []*structs.Task{main, poststop}, + tasksState: map[string]*structs.TaskState{ + main.Name: {State: structs.TaskStateDead}, + poststop.Name: {State: structs.TaskStateRunning}, + }, + testFn: func(t *testing.T, c *Coordinator) { + RequireTaskBlocked(t, c, main) + + RequireTaskAllowed(t, c, poststop) + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + shutdownCh := make(chan struct{}) + defer close(shutdownCh) + + c := NewCoordinator(testlog.HCLogger(t), tc.tasks, shutdownCh) + c.Restore(tc.tasksState) + tc.testFn(t, c) + }) + } +} diff --git a/client/allocrunner/tasklifecycle/doc.go b/client/allocrunner/tasklifecycle/doc.go new file mode 100644 index 00000000000..95d806fe7ff --- /dev/null +++ b/client/allocrunner/tasklifecycle/doc.go @@ -0,0 +1,92 @@ +/* +Package tasklifecycle manages the execution order of tasks based on their +lifecycle configuration. Its main structs are the Coordinator and the Gate. + +The Coordinator is used by an allocRunner to signal if a taskRunner is allowed +to start or not. It does so using a set of Gates, each for a given task +lifecycle configuration. + +The Gate provides a channel that can be used to block its listener on demand. +This is done by calling the Open() and Close() methods in the Gate which will +cause activate or deactivate a producer at the other end of the channel. + +The allocRunner feeds task state updates to the Coordinator that then uses this +information to determine which Gates it should open or close. Each Gate is +connected to a taskRunner with a matching lifecycle configuration. + +In the diagrams below, a solid line from a Gate indicates that it's open +(active), while a dashed line indicates that it's closed (inactive). A +taskRunner connected to an open Gate is allowed to run, while one that is +connected to a closed Gate is blocked. + +The Open/Close control line represents the Coordinator calling the Open() and +Close() methods of the Gates. + +In this state, the Coordinator is allowing prestart tasks to run, while +blocking the main tasks. + + ┌────────┐ + │ ALLOC │ + │ RUNNER │ + └───┬────┘ + │ + Task state + │ + ┌────────────▼────────────┐ + │Current state: │ + │Prestart │ ┌─────────────┐ + │ │ │ TASK RUNNER │ + │ ┌───────────────────┼─────────┤ (Prestart) │ + │ │ │ └─────────────┘ + │ │ │ + │ │ │ ┌─────────────┐ + │ │ COORDINATOR │ │ TASK RUNNER │ + │ │ ┌─ ─ ─┼─ ─ ─ ─┬╶┤ (Main) │ + │ │ ╷ │ ╷ └─────────────┘ + │ │ ╷ │ ╷ + │ │ ╷ │ ╷ ┌─────────────┐ + │ Prestart Main │ ╷ │ TASK RUNNER │ + └─────┬─┬───────────┬─┬───┘ └╶┤ (Main) │ + │ │Open/ ╷ │Open/ └─────────────┘ + │ │Close ╷ │Close + ┌──┴─▼─┐ ┌──┴─▼─┐ + │ GATE │ │ GATE │ + └──────┘ └──────┘ + +When the prestart task completes, the allocRunner will send a new batch of task +states to the Coordinator that will cause it to transition to a state where it +will close the Gate for prestart tasks, blocking their execution, and will open +the Gate for main tasks, allowing them to start. + + ┌────────┐ + │ ALLOC │ + │ RUNNER │ + └───┬────┘ + │ + Task state + │ + ┌────────────▼────────────┐ + │Current state: │ + │Main │ ┌─────────────┐ + │ │ │ TASK RUNNER │ + │ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┼─ ─ ─ ─ ─┤ (Prestart) │ + │ ╷ │ └─────────────┘ + │ ╷ │ + │ ╷ │ ┌─────────────┐ + │ ╷ COORDINATOR │ │ TASK RUNNER │ + │ ╷ ┌─────┼───────┬─┤ (Main) │ + │ ╷ │ │ │ └─────────────┘ + │ ╷ │ │ │ + │ ╷ │ │ │ ┌─────────────┐ + │ Prestart Main │ │ │ TASK RUNNER │ + └─────┼─┬───────────┬─┬───┘ └─┤ (Main) │ + ╷ │Open/ │ │Open/ └─────────────┘ + ╷ │Close │ │Close + ┌──┴─▼─┐ ┌──┴─▼─┐ + │ GATE │ │ GATE │ + └──────┘ └──────┘ + +Diagram source: +https://asciiflow.com/#/share/eJyrVspLzE1VssorzcnRUcpJrEwtUrJSqo5RqohRsjI0MDTViVGqBDKNLA2ArJLUihIgJ0ZJAQYeTdmDB8XE5CGrVHD08fF3BjPRZYJC%2Ffxcg7DIEGk6VDWyUEhicbZCcUliSSp2hfgNR6BpxCmDmelcWlSUmlcCsdkKm62%2BiZmo7kEOCOK8jtVmrGZiMVchxDHYGzXEYSpIspVUpKAREOQaHOIYFKKpgGkvjcIDp8kk2t7zaEoDcWgCmsnO%2Fv5BLp5%2BjiH%2BQVhNbkKLjyY8LtNFAyDdCgoavo6efppQ0%2FDorkETrQGypxDtrxmkmEyiK8iJ24CiVGAeKyqBGgPNVWjmYk%2FrVE7X8LhBiwtEcQRSBcT%2B%2Bs4KyK5D4pOewlFMRglfuDy6vmkoLoaL1yDLwXUquDuGuCogq4aLYDd9CnbT0V2uVKtUCwCqNQgp) +*/ +package tasklifecycle diff --git a/client/allocrunner/tasklifecycle/gate.go b/client/allocrunner/tasklifecycle/gate.go new file mode 100644 index 00000000000..02d96b29763 --- /dev/null +++ b/client/allocrunner/tasklifecycle/gate.go @@ -0,0 +1,87 @@ +package tasklifecycle + +const ( + gateClosed = false + gateOpened = true +) + +// Gate is used by the Coordinator to block or allow tasks from running. +// +// It provides a channel that taskRunners listens on to determine when they are +// allowed to run. The Gate has an infinite loop that is either feeding this +// channel (therefore allowing listeners to proceed) or not doing anything +// (causing listeners to block an wait). +// +// The Coordinator uses the Gate Open() and Close() methods to control this +// producer loop. +type Gate struct { + sendCh chan struct{} + updateCh chan bool + shutdownCh <-chan struct{} +} + +// NewGate returns a new Gate that is initially closed. The Gate should not be +// used after the shutdownCh is closed. +func NewGate(shutdownCh <-chan struct{}) *Gate { + g := &Gate{ + sendCh: make(chan struct{}), + updateCh: make(chan bool), + shutdownCh: shutdownCh, + } + go g.run(gateClosed) + + return g +} + +// WaitCh returns a channel that the listener must block on before starting its +// task. +// +// Callers must also check the state of the shutdownCh used to create the Gate +// to avoid blocking indefinitely. +func (g *Gate) WaitCh() <-chan struct{} { + return g.sendCh +} + +// Open is used to allow listeners to proceed. +// If the gate shutdownCh channel is closed, this method is a no-op so callers +// should check its state. +func (g *Gate) Open() { + select { + case <-g.shutdownCh: + case g.updateCh <- gateOpened: + } +} + +// Close is used to block listeners from proceeding. +// if the gate shutdownch channel is closed, this method is a no-op so callers +// should check its state. +func (g *Gate) Close() { + select { + case <-g.shutdownCh: + case g.updateCh <- gateClosed: + } +} + +// run starts the infinite loop that feeds the channel if the Gate is opened. +func (g *Gate) run(initState bool) { + isOpen := initState + for { + if isOpen { + select { + // Feed channel if the gate is open. + case g.sendCh <- struct{}{}: + case <-g.shutdownCh: + return + case isOpen = <-g.updateCh: + continue + } + } else { + select { + case <-g.shutdownCh: + return + case isOpen = <-g.updateCh: + continue + } + } + } +} diff --git a/client/allocrunner/tasklifecycle/gate_test.go b/client/allocrunner/tasklifecycle/gate_test.go new file mode 100644 index 00000000000..4ff3a2f141e --- /dev/null +++ b/client/allocrunner/tasklifecycle/gate_test.go @@ -0,0 +1,118 @@ +package tasklifecycle + +import ( + "testing" + "time" + + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper" +) + +func TestGate(t *testing.T) { + ci.Parallel(t) + + testCases := []struct { + name string + test func(*testing.T, *Gate) + }{ + { + name: "starts blocked", + test: func(t *testing.T, g *Gate) { + requireChannelBlocking(t, g.WaitCh(), "wait") + }, + }, + { + name: "block", + test: func(t *testing.T, g *Gate) { + g.Close() + requireChannelBlocking(t, g.WaitCh(), "wait") + }, + }, + { + name: "allow", + test: func(t *testing.T, g *Gate) { + g.Open() + requireChannelPassing(t, g.WaitCh(), "wait") + }, + }, + { + name: "block twice", + test: func(t *testing.T, g *Gate) { + g.Close() + g.Close() + requireChannelBlocking(t, g.WaitCh(), "wait") + }, + }, + { + name: "allow twice", + test: func(t *testing.T, g *Gate) { + g.Open() + g.Open() + requireChannelPassing(t, g.WaitCh(), "wait") + }, + }, + { + name: "allow block allow", + test: func(t *testing.T, g *Gate) { + g.Open() + requireChannelPassing(t, g.WaitCh(), "first allow") + g.Close() + requireChannelBlocking(t, g.WaitCh(), "block") + g.Open() + requireChannelPassing(t, g.WaitCh(), "second allow") + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + shutdownCh := make(chan struct{}) + defer close(shutdownCh) + + g := NewGate(shutdownCh) + tc.test(t, g) + }) + } +} + +// TestGate_shutdown tests a gate with a closed shutdown channel. +func TestGate_shutdown(t *testing.T) { + ci.Parallel(t) + + // Create a Gate with a closed shutdownCh. + shutdownCh := make(chan struct{}) + close(shutdownCh) + + g := NewGate(shutdownCh) + + // Test that Open() and Close() doesn't block forever. + openCh := make(chan struct{}) + closeCh := make(chan struct{}) + + go func() { + g.Open() + close(openCh) + }() + go func() { + g.Close() + close(closeCh) + }() + + timer, stop := helper.NewSafeTimer(time.Second) + defer stop() + + select { + case <-openCh: + case <-timer.C: + t.Fatalf("timeout waiting for gate operations") + } + + select { + case <-closeCh: + case <-timer.C: + t.Fatalf("timeout waiting for gate operations") + } + + // A Gate with a shutdownCh should be closed. + requireChannelBlocking(t, g.WaitCh(), "gate should be closed") +} diff --git a/client/allocrunner/tasklifecycle/testing.go b/client/allocrunner/tasklifecycle/testing.go new file mode 100644 index 00000000000..7e7ee0eb18f --- /dev/null +++ b/client/allocrunner/tasklifecycle/testing.go @@ -0,0 +1,56 @@ +package tasklifecycle + +import ( + "time" + + "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/testutil" + testing "github.com/mitchellh/go-testing-interface" +) + +func RequireTaskBlocked(t testing.T, c *Coordinator, task *structs.Task) { + ch := c.StartConditionForTask(task) + requireChannelBlocking(t, ch, task.Name) +} + +func RequireTaskAllowed(t testing.T, c *Coordinator, task *structs.Task) { + ch := c.StartConditionForTask(task) + requireChannelPassing(t, ch, task.Name) +} + +func WaitNotInitUntil(c *Coordinator, until time.Duration, errorFunc func()) { + testutil.WaitForResultUntil(until, + func() (bool, error) { + c.currentStateLock.RLock() + defer c.currentStateLock.RUnlock() + return c.currentState != coordinatorStateInit, nil + }, + func(_ error) { + errorFunc() + }) +} + +func requireChannelPassing(t testing.T, ch <-chan struct{}, name string) { + testutil.WaitForResult(func() (bool, error) { + return !isChannelBlocking(ch), nil + }, func(_ error) { + t.Fatalf("%s channel was blocking, should be passing", name) + }) +} + +func requireChannelBlocking(t testing.T, ch <-chan struct{}, name string) { + testutil.WaitForResult(func() (bool, error) { + return isChannelBlocking(ch), nil + }, func(_ error) { + t.Fatalf("%s channel was passing, should be blocking", name) + }) +} + +func isChannelBlocking(ch <-chan struct{}) bool { + select { + case <-ch: + return false + default: + return true + } +} diff --git a/client/allocrunner/taskrunner/artifact_hook.go b/client/allocrunner/taskrunner/artifact_hook.go index 627ee6e4274..dae238ecf33 100644 --- a/client/allocrunner/taskrunner/artifact_hook.go +++ b/client/allocrunner/taskrunner/artifact_hook.go @@ -7,8 +7,8 @@ import ( log "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/client/allocrunner/interfaces" - "github.com/hashicorp/nomad/client/allocrunner/taskrunner/getter" ti "github.com/hashicorp/nomad/client/allocrunner/taskrunner/interfaces" + ci "github.com/hashicorp/nomad/client/interfaces" "github.com/hashicorp/nomad/nomad/structs" ) @@ -16,11 +16,13 @@ import ( type artifactHook struct { eventEmitter ti.EventEmitter logger log.Logger + getter ci.ArtifactGetter } -func newArtifactHook(e ti.EventEmitter, logger log.Logger) *artifactHook { +func newArtifactHook(e ti.EventEmitter, getter ci.ArtifactGetter, logger log.Logger) *artifactHook { h := &artifactHook{ eventEmitter: e, + getter: getter, } h.logger = logger.Named(h.Name()) return h @@ -40,7 +42,7 @@ func (h *artifactHook) doWork(req *interfaces.TaskPrestartRequest, resp *interfa h.logger.Debug("downloading artifact", "artifact", artifact.GetterSource, "aid", aid) //XXX add ctx to GetArtifact to allow cancelling long downloads - if err := getter.GetArtifact(req.TaskEnv, artifact); err != nil { + if err := h.getter.GetArtifact(req.TaskEnv, artifact); err != nil { wrapped := structs.NewRecoverableError( fmt.Errorf("failed to download artifact %q: %v", artifact.GetterSource, err), diff --git a/client/allocrunner/taskrunner/artifact_hook_test.go b/client/allocrunner/taskrunner/artifact_hook_test.go index c135b5cb414..1571b031338 100644 --- a/client/allocrunner/taskrunner/artifact_hook_test.go +++ b/client/allocrunner/taskrunner/artifact_hook_test.go @@ -3,7 +3,6 @@ package taskrunner import ( "context" "fmt" - "io/ioutil" "net/http" "net/http/httptest" "os" @@ -14,11 +13,12 @@ import ( "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/allocrunner/interfaces" + "github.com/hashicorp/nomad/client/allocrunner/taskrunner/getter" "github.com/hashicorp/nomad/client/taskenv" - "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/nomad/structs" "github.com/stretchr/testify/require" + "golang.org/x/exp/maps" ) // Statically assert the artifact hook implements the expected interface @@ -38,7 +38,7 @@ func TestTaskRunner_ArtifactHook_Recoverable(t *testing.T) { ci.Parallel(t) me := &mockEmitter{} - artifactHook := newArtifactHook(me, testlog.HCLogger(t)) + artifactHook := newArtifactHook(me, getter.TestDefaultGetter(t), testlog.HCLogger(t)) req := &interfaces.TaskPrestartRequest{ TaskEnv: taskenv.NewEmptyTaskEnv(), @@ -71,25 +71,21 @@ func TestTaskRunner_ArtifactHook_PartialDone(t *testing.T) { ci.Parallel(t) me := &mockEmitter{} - artifactHook := newArtifactHook(me, testlog.HCLogger(t)) + artifactHook := newArtifactHook(me, getter.TestDefaultGetter(t), testlog.HCLogger(t)) // Create a source directory with 1 of the 2 artifacts srcdir := t.TempDir() // Only create one of the 2 artifacts to cause an error on first run. file1 := filepath.Join(srcdir, "foo.txt") - require.NoError(t, ioutil.WriteFile(file1, []byte{'1'}, 0644)) + require.NoError(t, os.WriteFile(file1, []byte{'1'}, 0644)) // Test server to serve the artifacts ts := httptest.NewServer(http.FileServer(http.Dir(srcdir))) defer ts.Close() // Create the target directory. - destdir, err := ioutil.TempDir("", "nomadtest-dest") - require.NoError(t, err) - defer func() { - require.NoError(t, os.RemoveAll(destdir)) - }() + destdir := t.TempDir() req := &interfaces.TaskPrestartRequest{ TaskEnv: taskenv.NewTaskEnv(nil, nil, nil, nil, destdir, ""), @@ -112,7 +108,7 @@ func TestTaskRunner_ArtifactHook_PartialDone(t *testing.T) { // On first run file1 (foo) should download but file2 (bar) should // fail. - err = artifactHook.Prestart(context.Background(), req, &resp) + err := artifactHook.Prestart(context.Background(), req, &resp) require.NotNil(t, err) require.True(t, structs.IsRecoverable(err)) @@ -126,10 +122,10 @@ func TestTaskRunner_ArtifactHook_PartialDone(t *testing.T) { // Write file2 so artifacts can download successfully file2 := filepath.Join(srcdir, "bar.txt") - require.NoError(t, ioutil.WriteFile(file2, []byte{'1'}, 0644)) + require.NoError(t, os.WriteFile(file2, []byte{'1'}, 0644)) // Mock TaskRunner by copying state from resp to req and reset resp. - req.PreviousState = helper.CopyMapStringString(resp.State) + req.PreviousState = maps.Clone(resp.State) resp = interfaces.TaskPrestartResponse{} @@ -149,7 +145,7 @@ func TestTaskRunner_ArtifactHook_PartialDone(t *testing.T) { // Stop the test server entirely and assert that re-running works ts.Close() - req.PreviousState = helper.CopyMapStringString(resp.State) + req.PreviousState = maps.Clone(resp.State) resp = interfaces.TaskPrestartResponse{} err = artifactHook.Prestart(context.Background(), req, &resp) require.NoError(t, err) @@ -163,7 +159,7 @@ func TestTaskRunner_ArtifactHook_ConcurrentDownloadSuccess(t *testing.T) { t.Parallel() me := &mockEmitter{} - artifactHook := newArtifactHook(me, testlog.HCLogger(t)) + artifactHook := newArtifactHook(me, getter.TestDefaultGetter(t), testlog.HCLogger(t)) // Create a source directory all 7 artifacts srcdir := t.TempDir() @@ -171,7 +167,7 @@ func TestTaskRunner_ArtifactHook_ConcurrentDownloadSuccess(t *testing.T) { numOfFiles := 7 for i := 0; i < numOfFiles; i++ { file := filepath.Join(srcdir, fmt.Sprintf("file%d.txt", i)) - require.NoError(t, ioutil.WriteFile(file, []byte{byte(i)}, 0644)) + require.NoError(t, os.WriteFile(file, []byte{byte(i)}, 0644)) } // Test server to serve the artifacts @@ -179,11 +175,7 @@ func TestTaskRunner_ArtifactHook_ConcurrentDownloadSuccess(t *testing.T) { defer ts.Close() // Create the target directory. - destdir, err := ioutil.TempDir("", "nomadtest-dest") - require.NoError(t, err) - defer func() { - require.NoError(t, os.RemoveAll(destdir)) - }() + destdir := t.TempDir() req := &interfaces.TaskPrestartRequest{ TaskEnv: taskenv.NewTaskEnv(nil, nil, nil, nil, destdir, ""), @@ -225,7 +217,7 @@ func TestTaskRunner_ArtifactHook_ConcurrentDownloadSuccess(t *testing.T) { resp := interfaces.TaskPrestartResponse{} // start the hook - err = artifactHook.Prestart(context.Background(), req, &resp) + err := artifactHook.Prestart(context.Background(), req, &resp) require.NoError(t, err) require.True(t, resp.Done) @@ -254,30 +246,26 @@ func TestTaskRunner_ArtifactHook_ConcurrentDownloadFailure(t *testing.T) { t.Parallel() me := &mockEmitter{} - artifactHook := newArtifactHook(me, testlog.HCLogger(t)) + artifactHook := newArtifactHook(me, getter.TestDefaultGetter(t), testlog.HCLogger(t)) // Create a source directory with 3 of the 4 artifacts srcdir := t.TempDir() file1 := filepath.Join(srcdir, "file1.txt") - require.NoError(t, ioutil.WriteFile(file1, []byte{'1'}, 0644)) + require.NoError(t, os.WriteFile(file1, []byte{'1'}, 0644)) file2 := filepath.Join(srcdir, "file2.txt") - require.NoError(t, ioutil.WriteFile(file2, []byte{'2'}, 0644)) + require.NoError(t, os.WriteFile(file2, []byte{'2'}, 0644)) file3 := filepath.Join(srcdir, "file3.txt") - require.NoError(t, ioutil.WriteFile(file3, []byte{'3'}, 0644)) + require.NoError(t, os.WriteFile(file3, []byte{'3'}, 0644)) // Test server to serve the artifacts ts := httptest.NewServer(http.FileServer(http.Dir(srcdir))) defer ts.Close() // Create the target directory. - destdir, err := ioutil.TempDir("", "nomadtest-dest") - require.NoError(t, err) - defer func() { - require.NoError(t, os.RemoveAll(destdir)) - }() + destdir := t.TempDir() req := &interfaces.TaskPrestartRequest{ TaskEnv: taskenv.NewTaskEnv(nil, nil, nil, nil, destdir, ""), @@ -307,7 +295,7 @@ func TestTaskRunner_ArtifactHook_ConcurrentDownloadFailure(t *testing.T) { resp := interfaces.TaskPrestartResponse{} // On first run all files will be downloaded except file0.txt - err = artifactHook.Prestart(context.Background(), req, &resp) + err := artifactHook.Prestart(context.Background(), req, &resp) require.Error(t, err) require.True(t, structs.IsRecoverable(err)) @@ -323,10 +311,10 @@ func TestTaskRunner_ArtifactHook_ConcurrentDownloadFailure(t *testing.T) { // create the missing file file0 := filepath.Join(srcdir, "file0.txt") - require.NoError(t, ioutil.WriteFile(file0, []byte{'0'}, 0644)) + require.NoError(t, os.WriteFile(file0, []byte{'0'}, 0644)) // Mock TaskRunner by copying state from resp to req and reset resp. - req.PreviousState = helper.CopyMapStringString(resp.State) + req.PreviousState = maps.Clone(resp.State) resp = interfaces.TaskPrestartResponse{} @@ -346,19 +334,19 @@ func TestTaskRunner_ArtifactHook_ConcurrentDownloadFailure(t *testing.T) { require.Contains(t, files[3], "file3.txt") // verify the file contents too, since files will also be created for failed downloads - data0, err := ioutil.ReadFile(files[0]) + data0, err := os.ReadFile(files[0]) require.NoError(t, err) require.Equal(t, data0, []byte{'0'}) - data1, err := ioutil.ReadFile(files[1]) + data1, err := os.ReadFile(files[1]) require.NoError(t, err) require.Equal(t, data1, []byte{'1'}) - data2, err := ioutil.ReadFile(files[2]) + data2, err := os.ReadFile(files[2]) require.NoError(t, err) require.Equal(t, data2, []byte{'2'}) - data3, err := ioutil.ReadFile(files[3]) + data3, err := os.ReadFile(files[3]) require.NoError(t, err) require.Equal(t, data3, []byte{'3'}) diff --git a/client/allocrunner/taskrunner/connect_native_hook.go b/client/allocrunner/taskrunner/connect_native_hook.go index 9e51b335886..628e87d614c 100644 --- a/client/allocrunner/taskrunner/connect_native_hook.go +++ b/client/allocrunner/taskrunner/connect_native_hook.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "io" - "io/ioutil" "os" "path/filepath" @@ -265,12 +264,12 @@ func (h *connectNativeHook) hostEnv(env map[string]string) map[string]string { func (h *connectNativeHook) maybeSetSITokenEnv(dir, task string, env map[string]string) error { if _, exists := env["CONSUL_HTTP_TOKEN"]; exists { // Consul token was already set - typically by using the Vault integration - // and a template stanza to set the environment. Ignore the SI token as + // and a template block to set the environment. Ignore the SI token as // the configured token takes precedence. return nil } - token, err := ioutil.ReadFile(filepath.Join(dir, sidsTokenFile)) + token, err := os.ReadFile(filepath.Join(dir, sidsTokenFile)) if err != nil { if !os.IsNotExist(err) { return fmt.Errorf("failed to load SI token for native task %s: %w", task, err) diff --git a/client/allocrunner/taskrunner/connect_native_hook_test.go b/client/allocrunner/taskrunner/connect_native_hook_test.go index 5684e77ed7a..c96ec4d7481 100644 --- a/client/allocrunner/taskrunner/connect_native_hook_test.go +++ b/client/allocrunner/taskrunner/connect_native_hook_test.go @@ -2,7 +2,7 @@ package taskrunner import ( "context" - "io/ioutil" + "io" "os" "path/filepath" "testing" @@ -15,7 +15,7 @@ import ( "github.com/hashicorp/nomad/client/taskenv" "github.com/hashicorp/nomad/client/testutil" agentconsul "github.com/hashicorp/nomad/command/agent/consul" - "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" @@ -27,8 +27,8 @@ import ( func getTestConsul(t *testing.T) *consultest.TestServer { testConsul, err := consultest.NewTestServerConfigT(t, func(c *consultest.TestServerConfig) { if !testing.Verbose() { // disable consul logging if -v not set - c.Stdout = ioutil.Discard - c.Stderr = ioutil.Discard + c.Stdout = io.Discard + c.Stderr = io.Discard } }) require.NoError(t, err, "failed to start test consul server") @@ -42,30 +42,20 @@ func TestConnectNativeHook_Name(t *testing.T) { } func setupCertDirs(t *testing.T) (string, string) { - fd, err := ioutil.TempFile("", "connect_native_testcert") + fd, err := os.CreateTemp(t.TempDir(), "connect_native_testcert") require.NoError(t, err) _, err = fd.WriteString("ABCDEF") require.NoError(t, err) err = fd.Close() require.NoError(t, err) - d, err := ioutil.TempDir("", "connect_native_testsecrets") - require.NoError(t, err) - return fd.Name(), d -} - -func cleanupCertDirs(t *testing.T, original, secrets string) { - err := os.Remove(original) - require.NoError(t, err) - err = os.RemoveAll(secrets) - require.NoError(t, err) + return fd.Name(), t.TempDir() } func TestConnectNativeHook_copyCertificate(t *testing.T) { ci.Parallel(t) f, d := setupCertDirs(t) - defer cleanupCertDirs(t, f, d) t.Run("no source", func(t *testing.T) { err := new(connectNativeHook).copyCertificate("", d, "out.pem") @@ -75,7 +65,7 @@ func TestConnectNativeHook_copyCertificate(t *testing.T) { t.Run("normal", func(t *testing.T) { err := new(connectNativeHook).copyCertificate(f, d, "out.pem") require.NoError(t, err) - b, err := ioutil.ReadFile(filepath.Join(d, "out.pem")) + b, err := os.ReadFile(filepath.Join(d, "out.pem")) require.NoError(t, err) require.Equal(t, "ABCDEF", string(b)) }) @@ -85,7 +75,6 @@ func TestConnectNativeHook_copyCertificates(t *testing.T) { ci.Parallel(t) f, d := setupCertDirs(t) - defer cleanupCertDirs(t, f, d) t.Run("normal", func(t *testing.T) { err := new(connectNativeHook).copyCertificates(consulTransportConfig{ @@ -94,7 +83,7 @@ func TestConnectNativeHook_copyCertificates(t *testing.T) { KeyFile: f, }, d) require.NoError(t, err) - ls, err := ioutil.ReadDir(d) + ls, err := os.ReadDir(d) require.NoError(t, err) require.Equal(t, 3, len(ls)) }) @@ -125,7 +114,7 @@ func TestConnectNativeHook_tlsEnv(t *testing.T) { }, } - // existing config from task env stanza + // existing config from task env block taskEnv := map[string]string{ "CONSUL_CACERT": "fakeCA.pem", "CONSUL_CLIENT_CERT": "fakeCert.pem", @@ -422,7 +411,7 @@ func TestTaskRunner_ConnectNativeHook_with_SI_token(t *testing.T) { // Insert service identity token in the secrets directory token := uuid.Generate() siTokenFile := filepath.Join(request.TaskDir.SecretsDir, sidsTokenFile) - err = ioutil.WriteFile(siTokenFile, []byte(token), 0440) + err = os.WriteFile(siTokenFile, []byte(token), 0440) require.NoError(t, err) response := new(interfaces.TaskPrestartResponse) @@ -450,8 +439,7 @@ func TestTaskRunner_ConnectNativeHook_shareTLS(t *testing.T) { testutil.RequireConsul(t) try := func(t *testing.T, shareSSL *bool) { - fakeCert, fakeCertDir := setupCertDirs(t) - defer cleanupCertDirs(t, fakeCert, fakeCertDir) + fakeCert, _ := setupCertDirs(t) testConsul := getTestConsul(t) defer testConsul.Stop() @@ -491,8 +479,8 @@ func TestTaskRunner_ConnectNativeHook_shareTLS(t *testing.T) { // TLS config consumed by native application ShareSSL: shareSSL, - EnableSSL: helper.BoolToPtr(true), - VerifySSL: helper.BoolToPtr(true), + EnableSSL: pointer.Of(true), + VerifySSL: pointer.Of(true), CAFile: fakeCert, CertFile: fakeCert, KeyFile: fakeCert, @@ -502,7 +490,7 @@ func TestTaskRunner_ConnectNativeHook_shareTLS(t *testing.T) { request := &interfaces.TaskPrestartRequest{ Task: tg.Tasks[0], TaskDir: allocDir.NewTaskDir(tg.Tasks[0].Name), - TaskEnv: taskenv.NewEmptyTaskEnv(), // nothing set in env stanza + TaskEnv: taskenv.NewEmptyTaskEnv(), // nothing set in env block } require.NoError(t, request.TaskDir.Build(false, nil)) @@ -541,7 +529,7 @@ func TestTaskRunner_ConnectNativeHook_shareTLS(t *testing.T) { // so make sure an unset value turns the feature on. t.Run("share_ssl is true", func(t *testing.T) { - try(t, helper.BoolToPtr(true)) + try(t, pointer.Of(true)) }) t.Run("share_ssl is nil", func(t *testing.T) { @@ -550,7 +538,7 @@ func TestTaskRunner_ConnectNativeHook_shareTLS(t *testing.T) { } func checkFilesInDir(t *testing.T, dir string, includes, excludes []string) { - ls, err := ioutil.ReadDir(dir) + ls, err := os.ReadDir(dir) require.NoError(t, err) var present []string @@ -570,8 +558,7 @@ func TestTaskRunner_ConnectNativeHook_shareTLS_override(t *testing.T) { ci.Parallel(t) testutil.RequireConsul(t) - fakeCert, fakeCertDir := setupCertDirs(t) - defer cleanupCertDirs(t, fakeCert, fakeCertDir) + fakeCert, _ := setupCertDirs(t) testConsul := getTestConsul(t) defer testConsul.Stop() @@ -610,9 +597,9 @@ func TestTaskRunner_ConnectNativeHook_shareTLS_override(t *testing.T) { Addr: consulConfig.Address, // TLS config consumed by native application - ShareSSL: helper.BoolToPtr(true), - EnableSSL: helper.BoolToPtr(true), - VerifySSL: helper.BoolToPtr(true), + ShareSSL: pointer.Of(true), + EnableSSL: pointer.Of(true), + VerifySSL: pointer.Of(true), CAFile: fakeCert, CertFile: fakeCert, KeyFile: fakeCert, @@ -633,7 +620,7 @@ func TestTaskRunner_ConnectNativeHook_shareTLS_override(t *testing.T) { request := &interfaces.TaskPrestartRequest{ Task: tg.Tasks[0], TaskDir: allocDir.NewTaskDir(tg.Tasks[0].Name), - TaskEnv: taskEnv, // env stanza is configured w/ non-default tls configs + TaskEnv: taskEnv, // env block is configured w/ non-default tls configs } require.NoError(t, request.TaskDir.Build(false, nil)) @@ -647,7 +634,7 @@ func TestTaskRunner_ConnectNativeHook_shareTLS_override(t *testing.T) { require.True(t, response.Done) // Assert environment variable for CONSUL_HTTP_SSL is set, because it was - // the only one not overridden by task env stanza config + // the only one not overridden by task env block config require.NotEmpty(t, response.Env) require.Equal(t, map[string]string{ "CONSUL_HTTP_SSL": "true", diff --git a/client/allocrunner/taskrunner/dispatch_hook.go b/client/allocrunner/taskrunner/dispatch_hook.go index 2564f8046fd..35dc9141dcf 100644 --- a/client/allocrunner/taskrunner/dispatch_hook.go +++ b/client/allocrunner/taskrunner/dispatch_hook.go @@ -2,7 +2,6 @@ package taskrunner import ( "context" - "io/ioutil" "os" "path/filepath" @@ -69,5 +68,5 @@ func writeDispatchPayload(base, filename string, payload []byte) error { return err } - return ioutil.WriteFile(renderTo, decoded, 0777) + return os.WriteFile(renderTo, decoded, 0777) } diff --git a/client/allocrunner/taskrunner/dispatch_hook_test.go b/client/allocrunner/taskrunner/dispatch_hook_test.go index 6d757761279..92adf1338c2 100644 --- a/client/allocrunner/taskrunner/dispatch_hook_test.go +++ b/client/allocrunner/taskrunner/dispatch_hook_test.go @@ -2,7 +2,7 @@ package taskrunner import ( "context" - "io/ioutil" + "os" "path/filepath" "testing" @@ -50,7 +50,7 @@ func TestTaskRunner_DispatchHook_NoPayload(t *testing.T) { require.True(resp.Done) // Assert payload directory is empty - files, err := ioutil.ReadDir(req.TaskDir.LocalDir) + files, err := os.ReadDir(req.TaskDir.LocalDir) require.NoError(err) require.Empty(files) } @@ -94,7 +94,7 @@ func TestTaskRunner_DispatchHook_Ok(t *testing.T) { require.True(resp.Done) filename := filepath.Join(req.TaskDir.LocalDir, task.DispatchPayload.File) - result, err := ioutil.ReadFile(filename) + result, err := os.ReadFile(filename) require.NoError(err) require.Equal(expected, result) } @@ -141,7 +141,7 @@ func TestTaskRunner_DispatchHook_Error(t *testing.T) { require.False(resp.Done) // Assert payload directory is empty - files, err := ioutil.ReadDir(req.TaskDir.LocalDir) + files, err := os.ReadDir(req.TaskDir.LocalDir) require.NoError(err) require.Empty(files) } diff --git a/client/allocrunner/taskrunner/driver_handle.go b/client/allocrunner/taskrunner/driver_handle.go index 36427f6f245..ef211f5ede7 100644 --- a/client/allocrunner/taskrunner/driver_handle.go +++ b/client/allocrunner/taskrunner/driver_handle.go @@ -11,13 +11,24 @@ import ( ) // NewDriverHandle returns a handle for task operations on a specific task -func NewDriverHandle(driver drivers.DriverPlugin, taskID string, task *structs.Task, net *drivers.DriverNetwork) *DriverHandle { +func NewDriverHandle( + driver drivers.DriverPlugin, + taskID string, + task *structs.Task, + maxKillTimeout time.Duration, + net *drivers.DriverNetwork) *DriverHandle { + + timeout := task.KillTimeout + if maxKillTimeout < timeout { + timeout = maxKillTimeout + } + return &DriverHandle{ driver: driver, net: net, taskID: taskID, killSignal: task.KillSignal, - killTimeout: task.KillTimeout, + killTimeout: timeout, } } diff --git a/client/allocrunner/taskrunner/envoy_bootstrap_hook.go b/client/allocrunner/taskrunner/envoy_bootstrap_hook.go index 144a4a1607e..5849e1603d9 100644 --- a/client/allocrunner/taskrunner/envoy_bootstrap_hook.go +++ b/client/allocrunner/taskrunner/envoy_bootstrap_hook.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "io" - "io/ioutil" "net" "os" "os/exec" @@ -44,27 +43,29 @@ const ( ) type consulTransportConfig struct { - HTTPAddr string // required - Auth string // optional, env CONSUL_HTTP_AUTH - SSL string // optional, env CONSUL_HTTP_SSL - VerifySSL string // optional, env CONSUL_HTTP_SSL_VERIFY - CAFile string // optional, arg -ca-file - CertFile string // optional, arg -client-cert - KeyFile string // optional, arg -client-key - Namespace string // optional, only consul Enterprise, env CONSUL_NAMESPACE + HTTPAddr string // required + Auth string // optional, env CONSUL_HTTP_AUTH + SSL string // optional, env CONSUL_HTTP_SSL + VerifySSL string // optional, env CONSUL_HTTP_SSL_VERIFY + GRPCCAFile string // optional, arg -grpc-ca-file + CAFile string // optional, arg -ca-file + CertFile string // optional, arg -client-cert + KeyFile string // optional, arg -client-key + Namespace string // optional, only consul Enterprise, env CONSUL_NAMESPACE // CAPath (dir) not supported by Nomad's config object } -func newConsulTransportConfig(consul *config.ConsulConfig) consulTransportConfig { +func newConsulTransportConfig(cc *config.ConsulConfig) consulTransportConfig { return consulTransportConfig{ - HTTPAddr: consul.Addr, - Auth: consul.Auth, - SSL: decodeTriState(consul.EnableSSL), - VerifySSL: decodeTriState(consul.VerifySSL), - CAFile: consul.CAFile, - CertFile: consul.CertFile, - KeyFile: consul.KeyFile, - Namespace: consul.Namespace, + HTTPAddr: cc.Addr, + Auth: cc.Auth, + SSL: decodeTriState(cc.EnableSSL), + VerifySSL: decodeTriState(cc.VerifySSL), + GRPCCAFile: cc.GRPCCAFile, + CAFile: cc.CAFile, + CertFile: cc.CertFile, + KeyFile: cc.KeyFile, + Namespace: cc.Namespace, } } @@ -125,7 +126,7 @@ type envoyBootstrapHook struct { // envoyBootstrapWaitTime is the total amount of time hook will wait for Consul envoyBootstrapWaitTime time.Duration - // envoyBootstrapInitialGap is the initial wait gap when retyring + // envoyBootstrapInitialGap is the initial wait gap when retrying envoyBoostrapInitialGap time.Duration // envoyBootstrapMaxJitter is the maximum amount of jitter applied to retries @@ -152,8 +153,8 @@ func newEnvoyBootstrapHook(c *envoyBootstrapHookConfig) *envoyBootstrapHook { } // getConsulNamespace will resolve the Consul namespace, choosing between -// - agent config (low precedence) -// - task group config (high precedence) +// - agent config (low precedence) +// - task group config (high precedence) func (h *envoyBootstrapHook) getConsulNamespace() string { var namespace string if h.consulConfig.Namespace != "" { @@ -424,7 +425,7 @@ func buildEnvoyBind(alloc *structs.Allocation, ifce, service, task string, taskE } func (h *envoyBootstrapHook) writeConfig(filename, config string) error { - if err := ioutil.WriteFile(filename, []byte(config), 0440); err != nil { + if err := os.WriteFile(filename, []byte(config), 0440); err != nil { _ = os.Remove(filename) return err } @@ -527,29 +528,19 @@ func (e envoyBootstrapArgs) args() []string { "-bootstrap", } - if v := e.gateway; v != "" { - arguments = append(arguments, "-gateway", v) - } - - if v := e.siToken; v != "" { - arguments = append(arguments, "-token", v) - } - - if v := e.consulConfig.CAFile; v != "" { - arguments = append(arguments, "-ca-file", v) - } - - if v := e.consulConfig.CertFile; v != "" { - arguments = append(arguments, "-client-cert", v) - } - - if v := e.consulConfig.KeyFile; v != "" { - arguments = append(arguments, "-client-key", v) + appendIfSet := func(param, value string) { + if value != "" { + arguments = append(arguments, param, value) + } } - if v := e.namespace; v != "" { - arguments = append(arguments, "-namespace", v) - } + appendIfSet("-gateway", e.gateway) + appendIfSet("-token", e.siToken) + appendIfSet("-grpc-ca-file", e.consulConfig.GRPCCAFile) + appendIfSet("-ca-file", e.consulConfig.CAFile) + appendIfSet("-client-cert", e.consulConfig.CertFile) + appendIfSet("-client-key", e.consulConfig.KeyFile) + appendIfSet("-namespace", e.namespace) return arguments } @@ -582,7 +573,7 @@ func (e envoyBootstrapArgs) env(env []string) []string { // Consul ACLs are enabled), it will be in place by the time we try to read it. func (h *envoyBootstrapHook) maybeLoadSIToken(task, dir string) (string, error) { tokenPath := filepath.Join(dir, sidsTokenFile) - token, err := ioutil.ReadFile(tokenPath) + token, err := os.ReadFile(tokenPath) if err != nil { if !os.IsNotExist(err) { h.logger.Error("failed to load SI token", "task", task, "error", err) diff --git a/client/allocrunner/taskrunner/envoy_bootstrap_hook_test.go b/client/allocrunner/taskrunner/envoy_bootstrap_hook_test.go index b1337c2886d..58279df2903 100644 --- a/client/allocrunner/taskrunner/envoy_bootstrap_hook_test.go +++ b/client/allocrunner/taskrunner/envoy_bootstrap_hook_test.go @@ -10,7 +10,6 @@ import ( "context" "encoding/json" "fmt" - "io/ioutil" "os" "path/filepath" "testing" @@ -23,8 +22,8 @@ import ( "github.com/hashicorp/nomad/client/taskenv" "github.com/hashicorp/nomad/client/testutil" agentconsul "github.com/hashicorp/nomad/command/agent/consul" - "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/args" + "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" @@ -43,11 +42,10 @@ const ( ) func writeTmp(t *testing.T, s string, fm os.FileMode) string { - dir, err := ioutil.TempDir("", "envoy-") - require.NoError(t, err) + dir := t.TempDir() fPath := filepath.Join(dir, sidsTokenFile) - err = ioutil.WriteFile(fPath, []byte(s), fm) + err := os.WriteFile(fPath, []byte(s), fm) require.NoError(t, err) return dir @@ -73,7 +71,6 @@ func TestEnvoyBootstrapHook_maybeLoadSIToken(t *testing.T) { t.Run("load token from file", func(t *testing.T) { token := uuid.Generate() f := writeTmp(t, token, 0440) - defer cleanupDir(t, f) h := newEnvoyBootstrapHook(&envoyBootstrapHookConfig{logger: testlog.HCLogger(t)}) cfg, err := h.maybeLoadSIToken("task1", f) @@ -84,7 +81,6 @@ func TestEnvoyBootstrapHook_maybeLoadSIToken(t *testing.T) { t.Run("file is unreadable", func(t *testing.T) { token := uuid.Generate() f := writeTmp(t, token, 0200) - defer cleanupDir(t, f) h := newEnvoyBootstrapHook(&envoyBootstrapHookConfig{logger: testlog.HCLogger(t)}) cfg, err := h.maybeLoadSIToken("task1", f) @@ -98,8 +94,8 @@ func TestEnvoyBootstrapHook_decodeTriState(t *testing.T) { ci.Parallel(t) require.Equal(t, "", decodeTriState(nil)) - require.Equal(t, "true", decodeTriState(helper.BoolToPtr(true))) - require.Equal(t, "false", decodeTriState(helper.BoolToPtr(false))) + require.Equal(t, "true", decodeTriState(pointer.Of(true))) + require.Equal(t, "false", decodeTriState(pointer.Of(false))) } var ( @@ -108,13 +104,14 @@ var ( } consulTLSConfig = consulTransportConfig{ - HTTPAddr: "2.2.2.2", // arg - Auth: "user:password", // env - SSL: "true", // env - VerifySSL: "true", // env - CAFile: "/etc/tls/ca-file", // arg - CertFile: "/etc/tls/cert-file", // arg - KeyFile: "/etc/tls/key-file", // arg + HTTPAddr: "2.2.2.2", // arg + Auth: "user:password", // env + SSL: "true", // env + VerifySSL: "true", // env + GRPCCAFile: "/etc/tls/grpc-ca-file", // arg + CAFile: "/etc/tls/ca-file", // arg + CertFile: "/etc/tls/cert-file", // arg + KeyFile: "/etc/tls/key-file", // arg } ) @@ -178,6 +175,7 @@ func TestEnvoyBootstrapHook_envoyBootstrapArgs(t *testing.T) { "-address", "127.0.0.1:19100", "-proxy-id", "s1-sidecar-proxy", "-bootstrap", + "-grpc-ca-file", "/etc/tls/grpc-ca-file", "-ca-file", "/etc/tls/ca-file", "-client-cert", "/etc/tls/cert-file", "-client-key", "/etc/tls/key-file", @@ -359,7 +357,7 @@ func TestEnvoyBootstrapHook_with_SI_token(t *testing.T) { // Insert service identity token in the secrets directory token := uuid.Generate() siTokenFile := filepath.Join(req.TaskDir.SecretsDir, sidsTokenFile) - err = ioutil.WriteFile(siTokenFile, []byte(token), 0440) + err = os.WriteFile(siTokenFile, []byte(token), 0440) require.NoError(t, err) resp := &interfaces.TaskPrestartResponse{} diff --git a/client/allocrunner/taskrunner/envoy_version_hook.go b/client/allocrunner/taskrunner/envoy_version_hook.go index 3501e27252a..ef958736d96 100644 --- a/client/allocrunner/taskrunner/envoy_version_hook.go +++ b/client/allocrunner/taskrunner/envoy_version_hook.go @@ -181,9 +181,9 @@ func (h *envoyVersionHook) tweakImage(configured string, supported map[string][] // semver sanitizes the envoy version string coming from Consul into the format // used by the Envoy project when publishing images (i.e. proper semver). This // resulting string value does NOT contain the 'v' prefix for 2 reasons: -// 1) the version library does not include the 'v' -// 2) its plausible unofficial images use the 3 numbers without the prefix for -// tagging their own images +// 1. the version library does not include the 'v' +// 2. its plausible unofficial images use the 3 numbers without the prefix for +// tagging their own images func semver(chosen string) (string, error) { v, err := version.NewVersion(chosen) if err != nil { diff --git a/client/allocrunner/taskrunner/getter/getter.go b/client/allocrunner/taskrunner/getter/getter.go index 4bbf7674cd0..5b8a9b8341f 100644 --- a/client/allocrunner/taskrunner/getter/getter.go +++ b/client/allocrunner/taskrunner/getter/getter.go @@ -5,68 +5,158 @@ import ( "fmt" "net/http" "net/url" + "runtime/debug" "strings" "github.com/hashicorp/go-cleanhttp" gg "github.com/hashicorp/go-getter" + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/client/config" + "github.com/hashicorp/nomad/client/interfaces" "github.com/hashicorp/nomad/nomad/structs" ) -// httpClient is a shared HTTP client for use across all http/https Getter -// instantiations. The HTTP client is designed to be thread-safe, and using a pooled -// transport will help reduce excessive connections when clients are downloading lots -// of artifacts. -var httpClient = &http.Client{ - Transport: cleanhttp.DefaultPooledTransport(), -} - const ( // gitSSHPrefix is the prefix for downloading via git using ssh gitSSHPrefix = "git@github.com:" ) -// EnvReplacer is an interface which can interpolate environment variables and -// is usually satisfied by taskenv.TaskEnv. -type EnvReplacer interface { - ReplaceEnv(string) string - ClientPath(string, bool) (string, bool) +// Getter wraps go-getter calls in an artifact configuration. +type Getter struct { + logger hclog.Logger + + // httpClient is a shared HTTP client for use across all http/https + // Getter instantiations. The HTTP client is designed to be + // thread-safe, and using a pooled transport will help reduce excessive + // connections when clients are downloading lots of artifacts. + httpClient *http.Client + config *config.ArtifactConfig +} + +// NewGetter returns a new Getter instance. This function is called once per +// client and shared across alloc and task runners. +func NewGetter(logger hclog.Logger, config *config.ArtifactConfig) *Getter { + return &Getter{ + logger: logger, + httpClient: &http.Client{ + Transport: cleanhttp.DefaultPooledTransport(), + }, + config: config, + } +} + +// GetArtifact downloads an artifact into the specified task directory. +func (g *Getter) GetArtifact(taskEnv interfaces.EnvReplacer, artifact *structs.TaskArtifact) (returnErr error) { + // Recover from panics to avoid crashing the entire Nomad client due to + // artifact download failures, such as bugs in go-getter. + defer func() { + if r := recover(); r != nil { + g.logger.Error("panic while downloading artifact", + "artifact", artifact.GetterSource, + "error", r, + "stack", string(debug.Stack())) + returnErr = fmt.Errorf("getter panic: %v", r) + } + }() + + ggURL, err := getGetterUrl(taskEnv, artifact) + if err != nil { + return newGetError(artifact.GetterSource, err, false) + } + + dest, escapes := taskEnv.ClientPath(artifact.RelativeDest, true) + // Verify the destination is still in the task sandbox after interpolation + if escapes { + return newGetError(artifact.RelativeDest, + errors.New("artifact destination path escapes the alloc directory"), + false) + } + + // Convert from string getter mode to go-getter const + mode := gg.ClientModeAny + switch artifact.GetterMode { + case structs.GetterModeFile: + mode = gg.ClientModeFile + case structs.GetterModeDir: + mode = gg.ClientModeDir + } + + headers := getHeaders(taskEnv, artifact.GetterHeaders) + + if err := g.getClient(ggURL, headers, mode, dest).Get(); err != nil { + return newGetError(ggURL, err, true) + } + + return nil } // getClient returns a client that is suitable for Nomad downloading artifacts. -func getClient(src string, headers http.Header, mode gg.ClientMode, dst string) *gg.Client { +func (g *Getter) getClient(src string, headers http.Header, mode gg.ClientMode, dst string) *gg.Client { return &gg.Client{ Src: src, Dst: dst, Mode: mode, Umask: 060000000, - Getters: createGetters(headers), + Getters: g.createGetters(headers), + + // This will prevent copying or writing files through symlinks. + DisableSymlinks: true, + + // This will protect against decompression bombs. + Decompressors: gg.LimitedDecompressors(g.config.DecompressionLimitFileCount, g.config.DecompressionLimitSize), } } -func createGetters(header http.Header) map[string]gg.Getter { +func (g *Getter) createGetters(header http.Header) map[string]gg.Getter { httpGetter := &gg.HttpGetter{ Netrc: true, - Client: httpClient, + Client: g.httpClient, Header: header, + + // Do not support the custom X-Terraform-Get header and + // associated logic. + XTerraformGetDisabled: true, + + // Disable HEAD requests as they can produce corrupt files when + // retrying a download of a resource that has changed. + // hashicorp/go-getter#219 + DoNotCheckHeadFirst: true, + + // Read timeout for HTTP operations. Must be long enough to + // accommodate large/slow downloads. + ReadTimeout: g.config.HTTPReadTimeout, + + // Maximum download size. Must be large enough to accommodate + // large downloads. + MaxBytes: g.config.HTTPMaxBytes, } + // Explicitly create fresh set of supported Getter for each Client, because // go-getter is not thread-safe. Use a shared HTTP client for http/https Getter, // with pooled transport which is thread-safe. // // If a getter type is not listed here, it is not supported (e.g. file). return map[string]gg.Getter{ - "git": new(gg.GitGetter), - "gcs": new(gg.GCSGetter), - "hg": new(gg.HgGetter), - "s3": new(gg.S3Getter), + "git": &gg.GitGetter{ + Timeout: g.config.GitTimeout, + }, + "hg": &gg.HgGetter{ + Timeout: g.config.HgTimeout, + }, + "gcs": &gg.GCSGetter{ + Timeout: g.config.GCSTimeout, + }, + "s3": &gg.S3Getter{ + Timeout: g.config.S3Timeout, + }, "http": httpGetter, "https": httpGetter, } } // getGetterUrl returns the go-getter URL to download the artifact. -func getGetterUrl(taskEnv EnvReplacer, artifact *structs.TaskArtifact) (string, error) { +func getGetterUrl(taskEnv interfaces.EnvReplacer, artifact *structs.TaskArtifact) (string, error) { source := taskEnv.ReplaceEnv(artifact.GetterSource) // Handle an invalid URL when given a go-getter url such as @@ -98,7 +188,7 @@ func getGetterUrl(taskEnv EnvReplacer, artifact *structs.TaskArtifact) (string, return ggURL, nil } -func getHeaders(env EnvReplacer, m map[string]string) http.Header { +func getHeaders(env interfaces.EnvReplacer, m map[string]string) http.Header { if len(m) == 0 { return nil } @@ -110,38 +200,6 @@ func getHeaders(env EnvReplacer, m map[string]string) http.Header { return headers } -// GetArtifact downloads an artifact into the specified task directory. -func GetArtifact(taskEnv EnvReplacer, artifact *structs.TaskArtifact) error { - ggURL, err := getGetterUrl(taskEnv, artifact) - if err != nil { - return newGetError(artifact.GetterSource, err, false) - } - - dest, escapes := taskEnv.ClientPath(artifact.RelativeDest, true) - // Verify the destination is still in the task sandbox after interpolation - if escapes { - return newGetError(artifact.RelativeDest, - errors.New("artifact destination path escapes the alloc directory"), - false) - } - - // Convert from string getter mode to go-getter const - mode := gg.ClientModeAny - switch artifact.GetterMode { - case structs.GetterModeFile: - mode = gg.ClientModeFile - case structs.GetterModeDir: - mode = gg.ClientModeDir - } - - headers := getHeaders(taskEnv, artifact.GetterHeaders) - if err := getClient(ggURL, headers, mode, dest).Get(); err != nil { - return newGetError(ggURL, err, true) - } - - return nil -} - // GetError wraps the underlying artifact fetching error with the URL. It // implements the RecoverableError interface. type GetError struct { diff --git a/client/allocrunner/taskrunner/getter/getter_test.go b/client/allocrunner/taskrunner/getter/getter_test.go index 274b2b71d2a..31ba2b292a3 100644 --- a/client/allocrunner/taskrunner/getter/getter_test.go +++ b/client/allocrunner/taskrunner/getter/getter_test.go @@ -13,9 +13,14 @@ import ( "runtime" "strings" "testing" + "time" + gg "github.com/hashicorp/go-getter" + "github.com/hashicorp/go-hclog" + clientconfig "github.com/hashicorp/nomad/client/config" + "github.com/hashicorp/nomad/client/interfaces" "github.com/hashicorp/nomad/client/taskenv" - "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/escapingfs" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/stretchr/testify/require" @@ -27,11 +32,11 @@ type noopReplacer struct { } func clientPath(taskDir, path string, join bool) (string, bool) { - if !filepath.IsAbs(path) || (helper.PathEscapesSandbox(taskDir, path) && join) { + if !filepath.IsAbs(path) || (escapingfs.PathEscapesSandbox(taskDir, path) && join) { path = filepath.Join(taskDir, path) } path = filepath.Clean(path) - if taskDir != "" && !helper.PathEscapesSandbox(taskDir, path) { + if taskDir != "" && !escapingfs.PathEscapesSandbox(taskDir, path) { return path, false } return path, true @@ -46,12 +51,25 @@ func (r noopReplacer) ClientPath(p string, join bool) (string, bool) { return path, escapes } -func noopTaskEnv(taskDir string) EnvReplacer { +func noopTaskEnv(taskDir string) interfaces.EnvReplacer { return noopReplacer{ taskDir: taskDir, } } +// panicReplacer is a version of taskenv.TaskEnv.ReplaceEnv that panics. +type panicReplacer struct{} + +func (panicReplacer) ReplaceEnv(_ string) string { + panic("panic") +} +func (panicReplacer) ClientPath(_ string, _ bool) (string, bool) { + panic("panic") +} +func panicTaskEnv() interfaces.EnvReplacer { + return panicReplacer{} +} + // upperReplacer is a version of taskenv.TaskEnv.ReplaceEnv that upper-cases // the given input. type upperReplacer struct { @@ -67,8 +85,65 @@ func (u upperReplacer) ClientPath(p string, join bool) (string, bool) { return path, escapes } -func removeAllT(t *testing.T, path string) { - require.NoError(t, os.RemoveAll(path)) +func TestGetter_getClient(t *testing.T) { + const fileCountLimit = 555 + const fileSizeLimit = int64(666) + getter := NewGetter(hclog.NewNullLogger(), &clientconfig.ArtifactConfig{ + HTTPReadTimeout: time.Minute, + HTTPMaxBytes: 100_000, + GCSTimeout: 1 * time.Minute, + GitTimeout: 2 * time.Minute, + HgTimeout: 3 * time.Minute, + S3Timeout: 4 * time.Minute, + DecompressionLimitFileCount: fileCountLimit, + DecompressionLimitSize: fileSizeLimit, + }) + + client := getter.getClient("src", nil, gg.ClientModeAny, "dst") + + t.Run("check symlink config", func(t *testing.T) { + require.True(t, client.DisableSymlinks) + }) + + t.Run("check file size limits", func(t *testing.T) { + require.Equal(t, fileSizeLimit, client.Decompressors["zip"].(*gg.ZipDecompressor).FileSizeLimit) + require.Equal(t, fileCountLimit, client.Decompressors["zip"].(*gg.ZipDecompressor).FilesLimit) + + require.Equal(t, fileSizeLimit, client.Decompressors["tar.gz"].(*gg.TarGzipDecompressor).FileSizeLimit) + require.Equal(t, fileCountLimit, client.Decompressors["tar.gz"].(*gg.TarGzipDecompressor).FilesLimit) + + require.Equal(t, fileSizeLimit, client.Decompressors["xz"].(*gg.XzDecompressor).FileSizeLimit) + // xz does not support files count limit + }) + + t.Run("check http config", func(t *testing.T) { + require.True(t, client.Getters["http"].(*gg.HttpGetter).XTerraformGetDisabled) + require.Equal(t, time.Minute, client.Getters["http"].(*gg.HttpGetter).ReadTimeout) + require.Equal(t, int64(100_000), client.Getters["http"].(*gg.HttpGetter).MaxBytes) + }) + + t.Run("check https config", func(t *testing.T) { + require.True(t, client.Getters["https"].(*gg.HttpGetter).XTerraformGetDisabled) + require.Equal(t, time.Minute, client.Getters["https"].(*gg.HttpGetter).ReadTimeout) + require.Equal(t, int64(100_000), client.Getters["https"].(*gg.HttpGetter).MaxBytes) + }) + + t.Run("check gcs config", func(t *testing.T) { + require.Equal(t, client.Getters["gcs"].(*gg.GCSGetter).Timeout, 1*time.Minute) + }) + + t.Run("check git config", func(t *testing.T) { + require.Equal(t, client.Getters["git"].(*gg.GitGetter).Timeout, 2*time.Minute) + }) + + t.Run("check hg config", func(t *testing.T) { + require.Equal(t, client.Getters["hg"].(*gg.HgGetter).Timeout, 3*time.Minute) + }) + + t.Run("check s3 config", func(t *testing.T) { + require.Equal(t, client.Getters["s3"].(*gg.S3Getter).Timeout, 4*time.Minute) + }) + } func TestGetArtifact_getHeaders(t *testing.T) { @@ -92,7 +167,7 @@ func TestGetArtifact_getHeaders(t *testing.T) { } func TestGetArtifact_Headers(t *testing.T) { - file := "output.txt" + const file = "output.txt" // Create the test server with a handler that will validate headers are set. ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -109,9 +184,7 @@ func TestGetArtifact_Headers(t *testing.T) { defer ts.Close() // Create a temp directory to download into. - taskDir, err := ioutil.TempDir("", "nomad-test") - require.NoError(t, err) - defer removeAllT(t, taskDir) + taskDir := t.TempDir() // Create the artifact. artifact := &structs.TaskArtifact{ @@ -124,10 +197,12 @@ func TestGetArtifact_Headers(t *testing.T) { } // Download the artifact. + getter := TestDefaultGetter(t) taskEnv := upperReplacer{ taskDir: taskDir, } - err = GetArtifact(taskEnv, artifact) + + err := getter.GetArtifact(taskEnv, artifact) require.NoError(t, err) // Verify artifact exists. @@ -145,11 +220,7 @@ func TestGetArtifact_FileAndChecksum(t *testing.T) { defer ts.Close() // Create a temp directory to download into - taskDir, err := ioutil.TempDir("", "nomad-test") - if err != nil { - t.Fatalf("failed to make temp directory: %v", err) - } - defer removeAllT(t, taskDir) + taskDir := t.TempDir() // Create the artifact file := "test.sh" @@ -161,7 +232,8 @@ func TestGetArtifact_FileAndChecksum(t *testing.T) { } // Download the artifact - if err := GetArtifact(noopTaskEnv(taskDir), artifact); err != nil { + getter := TestDefaultGetter(t) + if err := getter.GetArtifact(noopTaskEnv(taskDir), artifact); err != nil { t.Fatalf("GetArtifact failed: %v", err) } @@ -177,11 +249,7 @@ func TestGetArtifact_File_RelativeDest(t *testing.T) { defer ts.Close() // Create a temp directory to download into - taskDir, err := ioutil.TempDir("", "nomad-test") - if err != nil { - t.Fatalf("failed to make temp directory: %v", err) - } - defer removeAllT(t, taskDir) + taskDir := t.TempDir() // Create the artifact file := "test.sh" @@ -195,7 +263,8 @@ func TestGetArtifact_File_RelativeDest(t *testing.T) { } // Download the artifact - if err := GetArtifact(noopTaskEnv(taskDir), artifact); err != nil { + getter := TestDefaultGetter(t) + if err := getter.GetArtifact(noopTaskEnv(taskDir), artifact); err != nil { t.Fatalf("GetArtifact failed: %v", err) } @@ -211,11 +280,7 @@ func TestGetArtifact_File_EscapeDest(t *testing.T) { defer ts.Close() // Create a temp directory to download into - taskDir, err := ioutil.TempDir("", "nomad-test") - if err != nil { - t.Fatalf("failed to make temp directory: %v", err) - } - defer removeAllT(t, taskDir) + taskDir := t.TempDir() // Create the artifact file := "test.sh" @@ -229,7 +294,8 @@ func TestGetArtifact_File_EscapeDest(t *testing.T) { } // attempt to download the artifact - err = GetArtifact(noopTaskEnv(taskDir), artifact) + getter := TestDefaultGetter(t) + err := getter.GetArtifact(noopTaskEnv(taskDir), artifact) if err == nil || !strings.Contains(err.Error(), "escapes") { t.Fatalf("expected GetArtifact to disallow sandbox escape: %v", err) } @@ -263,11 +329,7 @@ func TestGetArtifact_InvalidChecksum(t *testing.T) { defer ts.Close() // Create a temp directory to download into - taskDir, err := ioutil.TempDir("", "nomad-test") - if err != nil { - t.Fatalf("failed to make temp directory: %v", err) - } - defer removeAllT(t, taskDir) + taskDir := t.TempDir() // Create the artifact with an incorrect checksum file := "test.sh" @@ -279,7 +341,8 @@ func TestGetArtifact_InvalidChecksum(t *testing.T) { } // Download the artifact and expect an error - if err := GetArtifact(noopTaskEnv(taskDir), artifact); err == nil { + getter := TestDefaultGetter(t) + if err := getter.GetArtifact(noopTaskEnv(taskDir), artifact); err == nil { t.Fatalf("GetArtifact should have failed") } } @@ -324,11 +387,7 @@ func TestGetArtifact_Archive(t *testing.T) { // Create a temp directory to download into and create some of the same // files that exist in the artifact to ensure they are overridden - taskDir, err := ioutil.TempDir("", "nomad-test") - if err != nil { - t.Fatalf("failed to make temp directory: %v", err) - } - defer removeAllT(t, taskDir) + taskDir := t.TempDir() create := map[string]string{ "exist/my.config": "to be replaced", @@ -344,7 +403,8 @@ func TestGetArtifact_Archive(t *testing.T) { }, } - if err := GetArtifact(noopTaskEnv(taskDir), artifact); err != nil { + getter := TestDefaultGetter(t) + if err := getter.GetArtifact(noopTaskEnv(taskDir), artifact); err != nil { t.Fatalf("GetArtifact failed: %v", err) } @@ -365,9 +425,7 @@ func TestGetArtifact_Setuid(t *testing.T) { // Create a temp directory to download into and create some of the same // files that exist in the artifact to ensure they are overridden - taskDir, err := ioutil.TempDir("", "nomad-test") - require.NoError(t, err) - defer removeAllT(t, taskDir) + taskDir := t.TempDir() file := "setuid.tgz" artifact := &structs.TaskArtifact{ @@ -377,7 +435,8 @@ func TestGetArtifact_Setuid(t *testing.T) { }, } - require.NoError(t, GetArtifact(noopTaskEnv(taskDir), artifact)) + getter := TestDefaultGetter(t) + require.NoError(t, getter.GetArtifact(noopTaskEnv(taskDir), artifact)) var expected map[string]int @@ -407,6 +466,15 @@ func TestGetArtifact_Setuid(t *testing.T) { } } +// TestGetArtifact_handlePanic tests that a panic during the getter execution +// does not cause its goroutine to crash. +func TestGetArtifact_handlePanic(t *testing.T) { + getter := TestDefaultGetter(t) + err := getter.GetArtifact(panicTaskEnv(), &structs.TaskArtifact{}) + require.Error(t, err) + require.Contains(t, err.Error(), "panic") +} + func TestGetGetterUrl_Queries(t *testing.T) { cases := []struct { name string diff --git a/client/allocrunner/taskrunner/getter/testing.go b/client/allocrunner/taskrunner/getter/testing.go new file mode 100644 index 00000000000..d4d8ad392bc --- /dev/null +++ b/client/allocrunner/taskrunner/getter/testing.go @@ -0,0 +1,23 @@ +//go:build !release + +package getter + +import ( + "testing" + + "github.com/hashicorp/go-hclog" + clientconfig "github.com/hashicorp/nomad/client/config" + "github.com/hashicorp/nomad/helper/pointer" + "github.com/hashicorp/nomad/nomad/structs/config" + "github.com/shoenig/test/must" +) + +// TestDefaultGetter creates a Getter suitable for unit test cases. +func TestDefaultGetter(t *testing.T) *Getter { + defaultConfig := config.DefaultArtifactConfig() + defaultConfig.DecompressionSizeLimit = pointer.Of("1MB") + defaultConfig.DecompressionFileCountLimit = pointer.Of(10) + getterConf, err := clientconfig.ArtifactConfigFromAgent(defaultConfig) + must.NoError(t, err) + return NewGetter(hclog.NewNullLogger(), getterConf) +} diff --git a/client/allocrunner/taskrunner/lifecycle.go b/client/allocrunner/taskrunner/lifecycle.go index b812156a846..90c3d37189a 100644 --- a/client/allocrunner/taskrunner/lifecycle.go +++ b/client/allocrunner/taskrunner/lifecycle.go @@ -6,28 +6,103 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) -// Restart a task. Returns immediately if no task is running. Blocks until -// existing task exits or passed-in context is canceled. +// Restart restarts a task that is already running. Returns an error if the +// task is not running. Blocks until existing task exits or passed-in context +// is canceled. func (tr *TaskRunner) Restart(ctx context.Context, event *structs.TaskEvent, failure bool) error { - tr.logger.Trace("Restart requested", "failure", failure) + tr.logger.Trace("Restart requested", "failure", failure, "event", event.GoString()) - // Grab the handle - handle := tr.getDriverHandle() + taskState := tr.TaskState() + if taskState == nil { + return ErrTaskNotRunning + } - // Check it is running - if handle == nil { + switch taskState.State { + case structs.TaskStatePending, structs.TaskStateDead: + return ErrTaskNotRunning + } + + return tr.restartImpl(ctx, event, failure) +} + +// ForceRestart restarts a task that is already running or reruns it if dead. +// Returns an error if the task is not able to rerun. Blocks until existing +// task exits or passed-in context is canceled. +// +// Callers must restart the AllocRuner taskCoordinator beforehand to make sure +// the task will be able to run again. +func (tr *TaskRunner) ForceRestart(ctx context.Context, event *structs.TaskEvent, failure bool) error { + tr.logger.Trace("Force restart requested", "failure", failure, "event", event.GoString()) + + taskState := tr.TaskState() + if taskState == nil { + return ErrTaskNotRunning + } + + tr.stateLock.Lock() + localState := tr.localState.Copy() + tr.stateLock.Unlock() + + if localState == nil { + return ErrTaskNotRunning + } + + switch taskState.State { + case structs.TaskStatePending: + return ErrTaskNotRunning + + case structs.TaskStateDead: + // Tasks that are in the "dead" state are only allowed to restart if + // their Run() method is still active. + if localState.RunComplete { + return ErrTaskNotRunning + } + } + + return tr.restartImpl(ctx, event, failure) +} + +// restartImpl implements to task restart process. +// +// It should never be called directly as it doesn't verify if the task state +// allows for a restart. +func (tr *TaskRunner) restartImpl(ctx context.Context, event *structs.TaskEvent, failure bool) error { + + // Check if the task is able to restart based on its state and the type of + // restart event that was triggered. + taskState := tr.TaskState() + if taskState == nil { return ErrTaskNotRunning } // Emit the event since it may take a long time to kill tr.EmitEvent(event) - // Run the pre-kill hooks prior to restarting the task - tr.preKill() - // Tell the restart tracker that a restart triggered the exit tr.restartTracker.SetRestartTriggered(failure) + // Signal a restart to unblock tasks that are in the "dead" state, but + // don't block since the channel is buffered. Only one signal is enough to + // notify the tr.Run() loop. + // The channel must be signaled after SetRestartTriggered is called so the + // tr.Run() loop runs again. + if taskState.State == structs.TaskStateDead { + select { + case tr.restartCh <- struct{}{}: + default: + } + } + + // Grab the handle to see if the task is still running and needs to be + // killed. + handle := tr.getDriverHandle() + if handle == nil { + return nil + } + + // Run the pre-kill hooks prior to restarting the task + tr.preKill() + // Grab a handle to the wait channel that will timeout with context cancelation // _before_ killing the task. waitCh, err := handle.WaitCh(ctx) @@ -69,14 +144,17 @@ func (tr *TaskRunner) Signal(event *structs.TaskEvent, s string) error { // Kill a task. Blocks until task exits or context is canceled. State is set to // dead. func (tr *TaskRunner) Kill(ctx context.Context, event *structs.TaskEvent) error { - tr.logger.Trace("Kill requested", "event_type", event.Type, "event_reason", event.KillReason) + tr.logger.Trace("Kill requested") // Cancel the task runner to break out of restart delay or the main run // loop. tr.killCtxCancel() // Emit kill event - tr.EmitEvent(event) + if event != nil { + tr.logger.Trace("Kill event", "event_type", event.Type, "event_reason", event.KillReason) + tr.EmitEvent(event) + } select { case <-tr.WaitCh(): diff --git a/client/allocrunner/taskrunner/logmon_hook_test.go b/client/allocrunner/taskrunner/logmon_hook_test.go index 8d17b7b66ec..bf5f9e7f078 100644 --- a/client/allocrunner/taskrunner/logmon_hook_test.go +++ b/client/allocrunner/taskrunner/logmon_hook_test.go @@ -3,19 +3,17 @@ package taskrunner import ( "context" "encoding/json" - "io/ioutil" "net" - "os" "testing" plugin "github.com/hashicorp/go-plugin" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/allocrunner/interfaces" - "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/nomad/mock" pstructs "github.com/hashicorp/nomad/plugins/shared/structs" "github.com/stretchr/testify/require" + "golang.org/x/exp/maps" ) // Statically assert the logmon hook implements the expected interfaces @@ -66,11 +64,7 @@ func TestTaskRunner_LogmonHook_StartStop(t *testing.T) { alloc := mock.BatchAlloc() task := alloc.Job.TaskGroups[0].Tasks[0] - dir, err := ioutil.TempDir("", "nomadtest") - require.NoError(t, err) - defer func() { - require.NoError(t, os.RemoveAll(dir)) - }() + dir := t.TempDir() hookConf := newLogMonHookConfig(task.Name, dir) runner := &TaskRunner{logmonHookConfig: hookConf} @@ -102,7 +96,7 @@ func TestTaskRunner_LogmonHook_StartStop(t *testing.T) { // Running stop should shutdown logmon stopReq := interfaces.TaskStopRequest{ - ExistingState: helper.CopyMapStringString(resp.State), + ExistingState: maps.Clone(resp.State), } require.NoError(t, hook.Stop(context.Background(), &stopReq, nil)) } diff --git a/client/allocrunner/taskrunner/logmon_hook_unix_test.go b/client/allocrunner/taskrunner/logmon_hook_unix_test.go index 03ab80ea1a7..f98c01b06cc 100644 --- a/client/allocrunner/taskrunner/logmon_hook_unix_test.go +++ b/client/allocrunner/taskrunner/logmon_hook_unix_test.go @@ -7,7 +7,6 @@ import ( "context" "encoding/json" "fmt" - "io/ioutil" "os" "syscall" "testing" @@ -31,11 +30,7 @@ func TestTaskRunner_LogmonHook_StartCrashStop(t *testing.T) { alloc := mock.BatchAlloc() task := alloc.Job.TaskGroups[0].Tasks[0] - dir, err := ioutil.TempDir("", "nomadtest") - require.NoError(t, err) - defer func() { - require.NoError(t, os.RemoveAll(dir)) - }() + dir := t.TempDir() hookConf := newLogMonHookConfig(task.Name, dir) runner := &TaskRunner{logmonHookConfig: hookConf} @@ -84,7 +79,7 @@ func TestTaskRunner_LogmonHook_StartCrashStop(t *testing.T) { logmonReattachKey: origHookData, } resp = interfaces.TaskPrestartResponse{} - err = hook.Prestart(context.Background(), &req, &resp) + err := hook.Prestart(context.Background(), &req, &resp) require.NoError(t, err) require.NotEqual(t, origState, resp.State) @@ -100,11 +95,7 @@ func TestTaskRunner_LogmonHook_ShutdownMidStart(t *testing.T) { alloc := mock.BatchAlloc() task := alloc.Job.TaskGroups[0].Tasks[0] - dir, err := ioutil.TempDir("", "nomadtest") - require.NoError(t, err) - defer func() { - require.NoError(t, os.RemoveAll(dir)) - }() + dir := t.TempDir() hookConf := newLogMonHookConfig(task.Name, dir) runner := &TaskRunner{logmonHookConfig: hookConf} diff --git a/client/allocrunner/taskrunner/plugin_supervisor_hook.go b/client/allocrunner/taskrunner/plugin_supervisor_hook.go index 3983d001d1c..a21c632f8da 100644 --- a/client/allocrunner/taskrunner/plugin_supervisor_hook.go +++ b/client/allocrunner/taskrunner/plugin_supervisor_hook.go @@ -22,11 +22,11 @@ import ( // to their requisite plugin manager. // // It provides a few things to a plugin task running inside Nomad. These are: -// * A mount to the `csi_plugin.mount_dir` where the plugin will create its csi.sock -// * A mount to `local/csi` that node plugins will use to stage volume mounts. -// * When the task has started, it starts a loop of attempting to connect to the -// plugin, to perform initial fingerprinting of the plugins capabilities before -// notifying the plugin manager of the plugin. +// - A mount to the `csi_plugin.mount_dir` where the plugin will create its csi.sock +// - A mount to `local/csi` that node plugins will use to stage volume mounts. +// - When the task has started, it starts a loop of attempting to connect to the +// plugin, to perform initial fingerprinting of the plugins capabilities before +// notifying the plugin manager of the plugin. type csiPluginSupervisorHook struct { logger hclog.Logger alloc *structs.Allocation @@ -81,7 +81,7 @@ var _ interfaces.TaskStopHook = &csiPluginSupervisorHook{} // Per-allocation directories of unix domain sockets used to communicate // with the CSI plugin. Nomad creates the directory and the plugin creates // the socket file. This directory is bind-mounted to the -// csi_plugin.mount_config dir in the plugin task. +// csi_plugin.mount_dir in the plugin task. // // {plugin-type}/{plugin-id}/ // staging/ @@ -103,6 +103,20 @@ func newCSIPluginSupervisorHook(config *csiPluginSupervisorHookConfig) *csiPlugi socketMountPoint := filepath.Join(config.clientStateDirPath, "csi", "plugins", config.runner.Alloc().ID) + // In v1.3.0, Nomad started instructing CSI plugins to stage and publish + // within /local/csi. Plugins deployed after the introduction of + // StagePublishBaseDir default to StagePublishBaseDir = /local/csi. However, + // plugins deployed between v1.3.0 and the introduction of + // StagePublishBaseDir have StagePublishBaseDir = "". Default to /local/csi here + // to avoid breaking plugins that aren't redeployed. + if task.CSIPluginConfig.StagePublishBaseDir == "" { + task.CSIPluginConfig.StagePublishBaseDir = filepath.Join("/local", "csi") + } + + if task.CSIPluginConfig.HealthTimeout == 0 { + task.CSIPluginConfig.HealthTimeout = 30 * time.Second + } + shutdownCtx, cancelFn := context.WithCancel(context.Background()) hook := &csiPluginSupervisorHook{ @@ -153,8 +167,7 @@ func (h *csiPluginSupervisorHook) Prestart(ctx context.Context, } // where the staging and per-alloc directories will be mounted volumeStagingMounts := &drivers.MountConfig{ - // TODO(tgross): add this TaskPath to the CSIPluginConfig as well - TaskPath: "/local/csi", + TaskPath: h.task.CSIPluginConfig.StagePublishBaseDir, HostPath: h.mountPoint, Readonly: false, PropagationMode: "bidirectional", @@ -234,13 +247,13 @@ func (h *csiPluginSupervisorHook) Poststart(_ context.Context, _ *interfaces.Tas // the passed in context is terminated. // // The supervisor works by: -// - Initially waiting for the plugin to become available. This loop is expensive -// and may do things like create new gRPC Clients on every iteration. -// - After receiving an initial healthy status, it will inform the plugin catalog -// of the plugin, registering it with the plugins fingerprinted capabilities. -// - We then perform a more lightweight check, simply probing the plugin on a less -// frequent interval to ensure it is still alive, emitting task events when this -// status changes. +// - Initially waiting for the plugin to become available. This loop is expensive +// and may do things like create new gRPC Clients on every iteration. +// - After receiving an initial healthy status, it will inform the plugin catalog +// of the plugin, registering it with the plugins fingerprinted capabilities. +// - We then perform a more lightweight check, simply probing the plugin on a less +// frequent interval to ensure it is still alive, emitting task events when this +// status changes. // // Deeper fingerprinting of the plugin is implemented by the csimanager. func (h *csiPluginSupervisorHook) ensureSupervisorLoop(ctx context.Context) { @@ -253,7 +266,7 @@ func (h *csiPluginSupervisorHook) ensureSupervisorLoop(ctx context.Context) { // We're in Poststart at this point, so if we can't connect within // this deadline, assume it's broken so we can restart the task - startCtx, startCancelFn := context.WithTimeout(ctx, 30*time.Second) + startCtx, startCancelFn := context.WithTimeout(ctx, h.task.CSIPluginConfig.HealthTimeout) defer startCancelFn() var err error @@ -356,7 +369,7 @@ func (h *csiPluginSupervisorHook) registerPlugin(client csi.CSIPlugin, socketPat Options: map[string]string{ "Provider": info.Name, // vendor name "MountPoint": h.mountPoint, - "ContainerMountPoint": "/local/csi", + "ContainerMountPoint": h.task.CSIPluginConfig.StagePublishBaseDir, }, } } @@ -441,7 +454,7 @@ func (h *csiPluginSupervisorHook) kill(ctx context.Context, reason error) { if err := h.lifecycle.Kill(ctx, structs.NewTaskEvent(structs.TaskKilling). SetFailsTask(). - SetDisplayMessage("CSI plugin did not become healthy before timeout"), + SetDisplayMessage(fmt.Sprintf("CSI plugin did not become healthy before configured %v health timeout", h.task.CSIPluginConfig.HealthTimeout.String())), ); err != nil { h.logger.Error("failed to kill task", "kill_reason", reason, "error", err) } diff --git a/client/allocrunner/taskrunner/remotetask_hook.go b/client/allocrunner/taskrunner/remotetask_hook.go index 2068b52d9bf..4fd9a63769b 100644 --- a/client/allocrunner/taskrunner/remotetask_hook.go +++ b/client/allocrunner/taskrunner/remotetask_hook.go @@ -32,10 +32,10 @@ func (h *remoteTaskHook) Name() string { } // Prestart performs 2 remote task driver related tasks: -// 1. If there is no local handle, see if there is a handle propagated from a -// previous alloc to be restored. -// 2. If the alloc is lost make sure the task signal is set to detach instead -// of kill. +// 1. If there is no local handle, see if there is a handle propagated from a +// previous alloc to be restored. +// 2. If the alloc is lost make sure the task signal is set to detach instead +// of kill. func (h *remoteTaskHook) Prestart(ctx context.Context, req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error { if h.tr.getDriverHandle() != nil { // Driver handle already exists so don't try to load remote @@ -72,7 +72,7 @@ func (h *remoteTaskHook) Prestart(ctx context.Context, req *interfaces.TaskPrest return nil } - h.tr.setDriverHandle(NewDriverHandle(h.tr.driver, th.Config.ID, h.tr.Task(), taskInfo.NetworkOverride)) + h.tr.setDriverHandle(NewDriverHandle(h.tr.driver, th.Config.ID, h.tr.Task(), h.tr.clientConfig.MaxKillTimeout, taskInfo.NetworkOverride)) h.tr.stateLock.Lock() h.tr.localState.TaskHandle = th diff --git a/client/allocrunner/taskrunner/restarts/restarts.go b/client/allocrunner/taskrunner/restarts/restarts.go index a117e8d49d1..0e39e471524 100644 --- a/client/allocrunner/taskrunner/restarts/restarts.go +++ b/client/allocrunner/taskrunner/restarts/restarts.go @@ -139,11 +139,11 @@ func (r *RestartTracker) GetCount() int { // GetState returns the tasks next state given the set exit code and start // error. One of the following states are returned: -// * TaskRestarting - Task should be restarted -// * TaskNotRestarting - Task should not be restarted and has exceeded its -// restart policy. -// * TaskTerminated - Task has terminated successfully and does not need a -// restart. +// - TaskRestarting - Task should be restarted +// - TaskNotRestarting - Task should not be restarted and has exceeded its +// restart policy. +// - TaskTerminated - Task has terminated successfully and does not need a +// restart. // // If TaskRestarting is returned, the duration is how long to wait until // starting the task again. diff --git a/client/allocrunner/taskrunner/sids_hook.go b/client/allocrunner/taskrunner/sids_hook.go index 667516986ed..fa087baed67 100644 --- a/client/allocrunner/taskrunner/sids_hook.go +++ b/client/allocrunner/taskrunner/sids_hook.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "io/ioutil" "os" "path/filepath" "sync" @@ -147,7 +146,7 @@ func (h *sidsHook) earlyExit() bool { // writeToken writes token into the secrets directory for the task. func (h *sidsHook) writeToken(dir string, token string) error { tokenPath := filepath.Join(dir, sidsTokenFile) - if err := ioutil.WriteFile(tokenPath, []byte(token), sidsTokenFilePerms); err != nil { + if err := os.WriteFile(tokenPath, []byte(token), sidsTokenFilePerms); err != nil { return fmt.Errorf("failed to write SI token: %w", err) } return nil @@ -158,7 +157,7 @@ func (h *sidsHook) writeToken(dir string, token string) error { // is returned only for some other (e.g. disk IO) error. func (h *sidsHook) recoverToken(dir string) (string, error) { tokenPath := filepath.Join(dir, sidsTokenFile) - token, err := ioutil.ReadFile(tokenPath) + token, err := os.ReadFile(tokenPath) if err != nil { if !os.IsNotExist(err) { h.logger.Error("failed to recover SI token", "error", err) diff --git a/client/allocrunner/taskrunner/sids_hook_test.go b/client/allocrunner/taskrunner/sids_hook_test.go index f475c6a2f10..7b22f231cf0 100644 --- a/client/allocrunner/taskrunner/sids_hook_test.go +++ b/client/allocrunner/taskrunner/sids_hook_test.go @@ -8,7 +8,6 @@ package taskrunner import ( "context" - "io/ioutil" "os" "path/filepath" "testing" @@ -22,24 +21,12 @@ import ( "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" - "github.com/hashicorp/nomad/testutil" "github.com/stretchr/testify/require" "golang.org/x/sys/unix" ) var _ interfaces.TaskPrestartHook = (*sidsHook)(nil) -func tmpDir(t *testing.T) string { - dir, err := ioutil.TempDir("", "sids-") - require.NoError(t, err) - return dir -} - -func cleanupDir(t *testing.T, dir string) { - err := os.RemoveAll(dir) - require.NoError(t, err) -} - func sidecar(task string) (string, structs.TaskKind) { name := structs.ConnectProxyPrefix + "-" + task kind := structs.TaskKind(structs.ConnectProxyPrefix + ":" + task) @@ -50,8 +37,7 @@ func TestSIDSHook_recoverToken(t *testing.T) { ci.Parallel(t) r := require.New(t) - secrets := tmpDir(t) - defer cleanupDir(t, secrets) + secrets := t.TempDir() taskName, taskKind := sidecar("foo") h := newSIDSHook(sidsHookConfig{ @@ -75,8 +61,7 @@ func TestSIDSHook_recoverToken_empty(t *testing.T) { ci.Parallel(t) r := require.New(t) - secrets := tmpDir(t) - defer cleanupDir(t, secrets) + secrets := t.TempDir() taskName, taskKind := sidecar("foo") h := newSIDSHook(sidsHookConfig{ @@ -103,8 +88,7 @@ func TestSIDSHook_recoverToken_unReadable(t *testing.T) { r := require.New(t) - secrets := tmpDir(t) - defer cleanupDir(t, secrets) + secrets := t.TempDir() err := os.Chmod(secrets, 0000) r.NoError(err) @@ -126,15 +110,14 @@ func TestSIDSHook_writeToken(t *testing.T) { ci.Parallel(t) r := require.New(t) - secrets := tmpDir(t) - defer cleanupDir(t, secrets) + secrets := t.TempDir() id := uuid.Generate() h := new(sidsHook) err := h.writeToken(secrets, id) r.NoError(err) - content, err := ioutil.ReadFile(filepath.Join(secrets, sidsTokenFile)) + content, err := os.ReadFile(filepath.Join(secrets, sidsTokenFile)) r.NoError(err) r.Equal(id, string(content)) } @@ -150,8 +133,7 @@ func TestSIDSHook_writeToken_unWritable(t *testing.T) { r := require.New(t) - secrets := tmpDir(t) - defer cleanupDir(t, secrets) + secrets := t.TempDir() err := os.Chmod(secrets, 0000) r.NoError(err) @@ -166,8 +148,7 @@ func Test_SIDSHook_writeToken_nonExistent(t *testing.T) { ci.Parallel(t) r := require.New(t) - base := tmpDir(t) - defer cleanupDir(t, base) + base := t.TempDir() secrets := filepath.Join(base, "does/not/exist") id := uuid.Generate() @@ -289,10 +270,9 @@ func TestTaskRunner_DeriveSIToken_UnWritableTokenFile(t *testing.T) { // make the si_token file un-writable, triggering a failure after a // successful token derivation - secrets := tmpDir(t) - defer cleanupDir(t, secrets) + secrets := t.TempDir() trConfig.TaskDir.SecretsDir = secrets - err := ioutil.WriteFile(filepath.Join(secrets, sidsTokenFile), nil, 0400) + err := os.WriteFile(filepath.Join(secrets, sidsTokenFile), nil, 0400) r.NoError(err) // set a consul token for the nomad client, which is what triggers the @@ -315,11 +295,7 @@ func TestTaskRunner_DeriveSIToken_UnWritableTokenFile(t *testing.T) { go tr.Run() // wait for task runner to finish running - select { - case <-tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - r.Fail("timed out waiting for task runner") - } + testWaitForTaskToDie(t, tr) // assert task exited un-successfully finalState := tr.TaskState() @@ -329,7 +305,7 @@ func TestTaskRunner_DeriveSIToken_UnWritableTokenFile(t *testing.T) { // assert the token is *not* on disk, as secrets dir was un-writable tokenPath := filepath.Join(trConfig.TaskDir.SecretsDir, sidsTokenFile) - token, err := ioutil.ReadFile(tokenPath) + token, err := os.ReadFile(tokenPath) r.NoError(err) r.Empty(token) } diff --git a/client/allocrunner/taskrunner/state/state.go b/client/allocrunner/taskrunner/state/state.go index 5f83c476c80..cda144003af 100644 --- a/client/allocrunner/taskrunner/state/state.go +++ b/client/allocrunner/taskrunner/state/state.go @@ -1,8 +1,8 @@ package state import ( - "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/plugins/drivers" + "golang.org/x/exp/maps" ) // LocalState is Task state which is persisted for use when restarting Nomad @@ -16,6 +16,11 @@ type LocalState struct { // TaskHandle is the handle used to reattach to the task during recovery TaskHandle *drivers.TaskHandle + + // RunComplete is set to true when the TaskRunner.Run() method finishes. + // It is used to distinguish between a dead task that could be restarted + // and one that will never run again. + RunComplete bool } func NewLocalState() *LocalState { @@ -52,6 +57,7 @@ func (s *LocalState) Copy() *LocalState { Hooks: make(map[string]*HookState, len(s.Hooks)), DriverNetwork: s.DriverNetwork.Copy(), TaskHandle: s.TaskHandle.Copy(), + RunComplete: s.RunComplete, } // Copy the hook state @@ -83,8 +89,8 @@ func (h *HookState) Copy() *HookState { c := new(HookState) *c = *h - c.Data = helper.CopyMapStringString(h.Data) - c.Env = helper.CopyMapStringString(h.Env) + c.Data = maps.Clone(h.Data) + c.Env = maps.Clone(h.Env) return c } @@ -97,9 +103,9 @@ func (h *HookState) Equal(o *HookState) bool { return false } - if !helper.CompareMapStringString(h.Data, o.Data) { + if !maps.Equal(h.Data, o.Data) { return false } - return helper.CompareMapStringString(h.Env, o.Env) + return maps.Equal(h.Env, o.Env) } diff --git a/client/allocrunner/taskrunner/task_runner.go b/client/allocrunner/taskrunner/task_runner.go index b8c3b270c32..a77c6329a7c 100644 --- a/client/allocrunner/taskrunner/task_runner.go +++ b/client/allocrunner/taskrunner/task_runner.go @@ -9,6 +9,7 @@ import ( "time" "github.com/hashicorp/nomad/client/lib/cgutil" + "golang.org/x/exp/slices" metrics "github.com/armon/go-metrics" log "github.com/hashicorp/go-hclog" @@ -62,6 +63,11 @@ const ( // updates have come in since the last one was handled, we only need to // handle the last one. triggerUpdateChCap = 1 + + // restartChCap is the capacity for the restartCh used for triggering task + // restarts. It should be exactly 1 as even if multiple restarts have come + // we only need to handle the last one. + restartChCap = 1 ) type TaskRunner struct { @@ -95,6 +101,9 @@ type TaskRunner struct { // stateDB is for persisting localState and taskState stateDB cstate.StateDB + // restartCh is used to signal that the task should restart. + restartCh chan struct{} + // shutdownCtx is used to exit the TaskRunner *without* affecting task state. shutdownCtx context.Context @@ -228,8 +237,8 @@ type TaskRunner struct { // GetClientAllocs has been called in case of a failed restore. serversContactedCh <-chan struct{} - // startConditionMetCtx is done when TR should start the task - startConditionMetCtx <-chan struct{} + // startConditionMetCh signals the TaskRunner when it should start the task + startConditionMetCh <-chan struct{} // waitOnServers defaults to false but will be set true if a restore // fails and the Run method should wait until serversContactedCh is @@ -244,6 +253,9 @@ type TaskRunner struct { // serviceRegWrapper is the handler wrapper that is used by service hooks // to perform service and check registration and deregistration. serviceRegWrapper *wrapper.HandlerWrapper + + // getter is an interface for retrieving artifacts. + getter cinterfaces.ArtifactGetter } type Config struct { @@ -296,8 +308,8 @@ type Config struct { // servers succeeds and allocs are synced. ServersContactedCh chan struct{} - // startConditionMetCtx is done when TR should start the task - StartConditionMetCtx <-chan struct{} + // StartConditionMetCh signals the TaskRunner when it should start the task + StartConditionMetCh <-chan struct{} // ShutdownDelayCtx is a context from the alloc runner which will // tell us to exit early from shutdown_delay @@ -309,6 +321,9 @@ type Config struct { // ServiceRegWrapper is the handler wrapper that is used by service hooks // to perform service and check registration and deregistration. ServiceRegWrapper *wrapper.HandlerWrapper + + // Getter is an interface for retrieving artifacts. + Getter cinterfaces.ArtifactGetter } func NewTaskRunner(config *Config) (*TaskRunner, error) { @@ -356,6 +371,7 @@ func NewTaskRunner(config *Config) (*TaskRunner, error) { shutdownCtx: trCtx, shutdownCtxCancel: trCancel, triggerUpdateCh: make(chan struct{}, triggerUpdateChCap), + restartCh: make(chan struct{}, restartChCap), waitCh: make(chan struct{}), csiManager: config.CSIManager, cpusetCgroupPathGetter: config.CpusetCgroupPathGetter, @@ -363,10 +379,11 @@ func NewTaskRunner(config *Config) (*TaskRunner, error) { driverManager: config.DriverManager, maxEvents: defaultMaxEvents, serversContactedCh: config.ServersContactedCh, - startConditionMetCtx: config.StartConditionMetCtx, + startConditionMetCh: config.StartConditionMetCh, shutdownDelayCtx: config.ShutdownDelayCtx, shutdownDelayCancelFn: config.ShutdownDelayCancelFn, serviceRegWrapper: config.ServiceRegWrapper, + getter: config.Getter, } // Create the logger based on the allocation ID @@ -494,20 +511,25 @@ func (tr *TaskRunner) Run() { tr.stateLock.RLock() dead := tr.state.State == structs.TaskStateDead + runComplete := tr.localState.RunComplete tr.stateLock.RUnlock() - // if restoring a dead task, ensure that task is cleared and all post hooks - // are called without additional state updates + // If restoring a dead task, ensure the task is cleared and, if the local + // state indicates that the previous Run() call is complete, execute all + // post stop hooks and exit early, otherwise proceed until the + // ALLOC_RESTART loop skipping MAIN since the task is dead. if dead { // do cleanup functions without emitting any additional events/work // to handle cases where we restored a dead task where client terminated // after task finished before completing post-run actions. tr.clearDriverHandle() tr.stateUpdater.TaskStateUpdated() - if err := tr.stop(); err != nil { - tr.logger.Error("stop failed on terminal task", "error", err) + if runComplete { + if err := tr.stop(); err != nil { + tr.logger.Error("stop failed on terminal task", "error", err) + } + return } - return } // Updates are handled asynchronously with the other hooks but each @@ -529,27 +551,28 @@ func (tr *TaskRunner) Run() { } } - select { - case <-tr.startConditionMetCtx: - tr.logger.Debug("lifecycle start condition has been met, proceeding") - // yay proceed - case <-tr.killCtx.Done(): - case <-tr.shutdownCtx.Done(): - return - } + // Set the initial task state. + tr.stateUpdater.TaskStateUpdated() - timer, stop := helper.NewSafeTimer(0) // timer duration calculated JIT + // start with a stopped timer; actual restart delay computed later + timer, stop := helper.NewStoppedTimer() defer stop() MAIN: for !tr.shouldShutdown() { + if dead { + break + } + select { case <-tr.killCtx.Done(): break MAIN case <-tr.shutdownCtx.Done(): // TaskRunner was told to exit immediately return - default: + case <-tr.startConditionMetCh: + tr.logger.Debug("lifecycle start condition has been met, proceeding") + // yay proceed } // Run the prestart hooks @@ -659,6 +682,38 @@ MAIN: // Mark the task as dead tr.UpdateState(structs.TaskStateDead, nil) + // Wait here in case the allocation is restarted. Poststop tasks will never + // run again so skip them to avoid blocking forever. + if !tr.Task().IsPoststop() { + ALLOC_RESTART: + // Run in a loop to handle cases where restartCh is triggered but the + // task runner doesn't need to restart. + for { + select { + case <-tr.killCtx.Done(): + break ALLOC_RESTART + case <-tr.shutdownCtx.Done(): + return + case <-tr.restartCh: + // Restart without delay since the task is not running anymore. + restart, _ := tr.shouldRestart() + if restart { + // Set runner as not dead to allow the MAIN loop to run. + dead = false + goto MAIN + } + } + } + } + + tr.stateLock.Lock() + tr.localState.RunComplete = true + err := tr.stateDB.PutTaskRunnerLocalState(tr.allocID, tr.taskName, tr.localState) + if err != nil { + tr.logger.Warn("error persisting task state on run loop exit", "error", err) + } + tr.stateLock.Unlock() + // Run the stop hooks if err := tr.stop(); err != nil { tr.logger.Error("stop failed", "error", err) @@ -783,6 +838,7 @@ func (tr *TaskRunner) runDriver() error { taskConfig := tr.buildTaskConfig() if tr.cpusetCgroupPathGetter != nil { + tr.logger.Trace("waiting for cgroup to exist for", "allocID", tr.allocID, "task", tr.task) cpusetCgroupPath, err := tr.cpusetCgroupPathGetter(tr.killCtx) if err != nil { return err @@ -875,7 +931,7 @@ func (tr *TaskRunner) runDriver() error { } tr.stateLock.Unlock() - tr.setDriverHandle(NewDriverHandle(tr.driver, taskConfig.ID, tr.Task(), net)) + tr.setDriverHandle(NewDriverHandle(tr.driver, taskConfig.ID, tr.Task(), tr.clientConfig.MaxKillTimeout, net)) // Emit an event that we started tr.UpdateState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted)) @@ -1041,10 +1097,11 @@ func (tr *TaskRunner) buildTaskConfig() *drivers.TaskConfig { if alloc.AllocatedResources != nil && len(alloc.AllocatedResources.Shared.Networks) > 0 { allocDNS := alloc.AllocatedResources.Shared.Networks[0].DNS if allocDNS != nil { + interpolatedNetworks := taskenv.InterpolateNetworks(env, alloc.AllocatedResources.Shared.Networks) dns = &drivers.DNSConfig{ - Servers: allocDNS.Servers, - Searches: allocDNS.Searches, - Options: allocDNS.Options, + Servers: interpolatedNetworks[0].DNS.Servers, + Searches: interpolatedNetworks[0].DNS.Searches, + Options: interpolatedNetworks[0].DNS.Options, } } } @@ -1173,7 +1230,7 @@ func (tr *TaskRunner) restoreHandle(taskHandle *drivers.TaskHandle, net *drivers } // Update driver handle on task runner - tr.setDriverHandle(NewDriverHandle(tr.driver, taskHandle.Config.ID, tr.Task(), net)) + tr.setDriverHandle(NewDriverHandle(tr.driver, taskHandle.Config.ID, tr.Task(), tr.clientConfig.MaxKillTimeout, net)) return true } @@ -1183,8 +1240,10 @@ func (tr *TaskRunner) UpdateState(state string, event *structs.TaskEvent) { tr.stateLock.Lock() defer tr.stateLock.Unlock() + tr.logger.Trace("setting task state", "state", state) + if event != nil { - tr.logger.Trace("setting task state", "state", state, "event", event.Type) + tr.logger.Trace("appending task event", "state", state, "event", event.Type) // Append the event tr.appendEvent(event) @@ -1399,7 +1458,7 @@ func (tr *TaskRunner) UpdateStats(ru *cstructs.TaskResourceUsage) { } } -//TODO Remove Backwardscompat or use tr.Alloc()? +// TODO Remove Backwardscompat or use tr.Alloc()? func (tr *TaskRunner) setGaugeForMemory(ru *cstructs.TaskResourceUsage) { alloc := tr.Alloc() var allocatedMem float32 @@ -1411,7 +1470,7 @@ func (tr *TaskRunner) setGaugeForMemory(ru *cstructs.TaskResourceUsage) { ms := ru.ResourceUsage.MemoryStats publishMetric := func(v uint64, reported, measured string) { - if v != 0 || helper.SliceStringContains(ms.Measured, measured) { + if v != 0 || slices.Contains(ms.Measured, measured) { metrics.SetGaugeWithLabels([]string{"client", "allocs", "memory", reported}, float32(v), tr.baseLabels) } @@ -1431,7 +1490,7 @@ func (tr *TaskRunner) setGaugeForMemory(ru *cstructs.TaskResourceUsage) { } } -//TODO Remove Backwardscompat or use tr.Alloc()? +// TODO Remove Backwardscompat or use tr.Alloc()? func (tr *TaskRunner) setGaugeForCPU(ru *cstructs.TaskResourceUsage) { alloc := tr.Alloc() var allocatedCPU float32 diff --git a/client/allocrunner/taskrunner/task_runner_getters.go b/client/allocrunner/taskrunner/task_runner_getters.go index fcf3189b7a1..4d9c35e6e41 100644 --- a/client/allocrunner/taskrunner/task_runner_getters.go +++ b/client/allocrunner/taskrunner/task_runner_getters.go @@ -33,6 +33,11 @@ func (tr *TaskRunner) IsPoststopTask() bool { return tr.Task().Lifecycle != nil && tr.Task().Lifecycle.Hook == structs.TaskLifecycleHookPoststop } +// IsSidecarTask returns true if this task is a sidecar task in its task group. +func (tr *TaskRunner) IsSidecarTask() bool { + return tr.Task().Lifecycle != nil && tr.Task().Lifecycle.Sidecar +} + func (tr *TaskRunner) Task() *structs.Task { tr.taskLock.RLock() defer tr.taskLock.RUnlock() diff --git a/client/allocrunner/taskrunner/task_runner_hooks.go b/client/allocrunner/taskrunner/task_runner_hooks.go index 63b1c0071b2..8736157b1c8 100644 --- a/client/allocrunner/taskrunner/task_runner_hooks.go +++ b/client/allocrunner/taskrunner/task_runner_hooks.go @@ -58,18 +58,19 @@ func (tr *TaskRunner) initHooks() { // Create the task directory hook. This is run first to ensure the // directory path exists for other hooks. alloc := tr.Alloc() + tr.runnerHooks = []interfaces.TaskHook{ newValidateHook(tr.clientConfig, hookLogger), newTaskDirHook(tr, hookLogger), newLogMonHook(tr, hookLogger), newDispatchHook(alloc, hookLogger), newVolumeHook(tr, hookLogger), - newArtifactHook(tr, hookLogger), + newArtifactHook(tr, tr.getter, hookLogger), newStatsHook(tr, tr.clientConfig.StatsCollectionInterval, hookLogger), newDeviceHook(tr.devicemanager, hookLogger), } - // If the task has a CSI stanza, add the hook. + // If the task has a CSI block, add the hook. if task.CSIPluginConfig != nil { tr.runnerHooks = append(tr.runnerHooks, newCSIPluginSupervisorHook( &csiPluginSupervisorHookConfig{ @@ -85,14 +86,14 @@ func (tr *TaskRunner) initHooks() { // If Vault is enabled, add the hook if task.Vault != nil { tr.runnerHooks = append(tr.runnerHooks, newVaultHook(&vaultHookConfig{ - vaultStanza: task.Vault, - client: tr.vaultClient, - events: tr, - lifecycle: tr, - updater: tr, - logger: hookLogger, - alloc: tr.Alloc(), - task: tr.taskName, + vaultBlock: task.Vault, + client: tr.vaultClient, + events: tr, + lifecycle: tr, + updater: tr, + logger: hookLogger, + alloc: tr.Alloc(), + task: tr.taskName, })) } diff --git a/client/allocrunner/taskrunner/task_runner_test.go b/client/allocrunner/taskrunner/task_runner_test.go index b6c0fdc53c6..7319ca5021d 100644 --- a/client/allocrunner/taskrunner/task_runner_test.go +++ b/client/allocrunner/taskrunner/task_runner_test.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "io/ioutil" "net/http" "net/http/httptest" "os" @@ -17,6 +16,7 @@ import ( "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/allocrunner/interfaces" + "github.com/hashicorp/nomad/client/allocrunner/taskrunner/getter" "github.com/hashicorp/nomad/client/config" consulapi "github.com/hashicorp/nomad/client/consul" "github.com/hashicorp/nomad/client/devicemanager" @@ -30,7 +30,7 @@ import ( agentconsul "github.com/hashicorp/nomad/command/agent/consul" mockdriver "github.com/hashicorp/nomad/drivers/mock" "github.com/hashicorp/nomad/drivers/rawexec" - "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" @@ -117,7 +117,7 @@ func testTaskRunnerConfig(t *testing.T, alloc *structs.Allocation, taskName stri shutdownDelayCtx, shutdownDelayCancelFn := context.WithCancel(context.Background()) - // Create a closed channel to mock TaskHookCoordinator.startConditionForTask. + // Create a closed channel to mock TaskCoordinator.startConditionForTask. // Closed channel indicates this task is not blocked on prestart hooks. closedCh := make(chan struct{}) close(closedCh) @@ -141,10 +141,11 @@ func testTaskRunnerConfig(t *testing.T, alloc *structs.Allocation, taskName stri DeviceManager: devicemanager.NoopMockManager(), DriverManager: drivermanager.TestDriverManager(t), ServersContactedCh: make(chan struct{}), - StartConditionMetCtx: closedCh, + StartConditionMetCh: closedCh, ShutdownDelayCtx: shutdownDelayCtx, ShutdownDelayCancelFn: shutdownDelayCancelFn, ServiceRegWrapper: wrapperMock, + Getter: getter.TestDefaultGetter(t), } // Set the cgroup path getter if we are in v2 mode @@ -253,6 +254,11 @@ func TestTaskRunner_Stop_ExitCode(t *testing.T) { "command": "/bin/sleep", "args": []string{"1000"}, } + task.Env = map[string]string{ + "NOMAD_PARENT_CGROUP": "nomad.slice", + "NOMAD_ALLOC_ID": alloc.ID, + "NOMAD_TASK_NAME": task.Name, + } conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name) defer cleanup() @@ -328,7 +334,7 @@ func TestTaskRunner_Restore_Running(t *testing.T) { defer newTR.Kill(context.Background(), structs.NewTaskEvent("cleanup")) // Wait for new task runner to exit when the process does - <-newTR.WaitCh() + testWaitForTaskToDie(t, newTR) // Assert that the process was only started once started := 0 @@ -342,19 +348,103 @@ func TestTaskRunner_Restore_Running(t *testing.T) { assert.Equal(t, 1, started) } +// TestTaskRunner_Restore_Dead asserts that restoring a dead task will place it +// back in the correct state. If the task was waiting for an alloc restart it +// must be able to be restarted after restore, otherwise a restart must fail. +func TestTaskRunner_Restore_Dead(t *testing.T) { + ci.Parallel(t) + + alloc := mock.BatchAlloc() + alloc.Job.TaskGroups[0].Count = 1 + task := alloc.Job.TaskGroups[0].Tasks[0] + task.Driver = "mock_driver" + task.Config = map[string]interface{}{ + "run_for": "2s", + } + conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name) + conf.StateDB = cstate.NewMemDB(conf.Logger) // "persist" state between task runners + defer cleanup() + + // Run the first TaskRunner + origTR, err := NewTaskRunner(conf) + require.NoError(t, err) + go origTR.Run() + defer origTR.Kill(context.Background(), structs.NewTaskEvent("cleanup")) + + // Wait for it to be dead + testWaitForTaskToDie(t, origTR) + + // Cause TR to exit without shutting down task + origTR.Shutdown() + + // Start a new TaskRunner and do the Restore + newTR, err := NewTaskRunner(conf) + require.NoError(t, err) + require.NoError(t, newTR.Restore()) + + go newTR.Run() + defer newTR.Kill(context.Background(), structs.NewTaskEvent("cleanup")) + + // Verify that the TaskRunner is still active since it was recovered after + // a forced shutdown. + select { + case <-newTR.WaitCh(): + require.Fail(t, "WaitCh is not blocking") + default: + } + + // Verify that we can restart task. + // Retry a few times as the newTR.Run() may not have started yet. + testutil.WaitForResult(func() (bool, error) { + ev := &structs.TaskEvent{Type: structs.TaskRestartSignal} + err = newTR.ForceRestart(context.Background(), ev, false) + return err == nil, err + }, func(err error) { + require.NoError(t, err) + }) + testWaitForTaskToStart(t, newTR) + + // Kill task to verify that it's restored as dead and not able to restart. + newTR.Kill(context.Background(), nil) + testutil.WaitForResult(func() (bool, error) { + select { + case <-newTR.WaitCh(): + return true, nil + default: + return false, fmt.Errorf("task still running") + } + }, func(err error) { + require.NoError(t, err) + }) + + newTR2, err := NewTaskRunner(conf) + require.NoError(t, err) + require.NoError(t, newTR2.Restore()) + + go newTR2.Run() + defer newTR2.Kill(context.Background(), structs.NewTaskEvent("cleanup")) + + ev := &structs.TaskEvent{Type: structs.TaskRestartSignal} + err = newTR2.ForceRestart(context.Background(), ev, false) + require.Equal(t, err, ErrTaskNotRunning) +} + // setupRestoreFailureTest starts a service, shuts down the task runner, and // kills the task before restarting a new TaskRunner. The new TaskRunner is // returned once it is running and waiting in pending along with a cleanup // func. func setupRestoreFailureTest(t *testing.T, alloc *structs.Allocation) (*TaskRunner, *Config, func()) { - ci.Parallel(t) - task := alloc.Job.TaskGroups[0].Tasks[0] task.Driver = "raw_exec" task.Config = map[string]interface{}{ "command": "sleep", "args": []string{"30"}, } + task.Env = map[string]string{ + "NOMAD_PARENT_CGROUP": "nomad.slice", + "NOMAD_ALLOC_ID": alloc.ID, + "NOMAD_TASK_NAME": task.Name, + } conf, cleanup1 := testTaskRunnerConfig(t, alloc, task.Name) conf.StateDB = cstate.NewMemDB(conf.Logger) // "persist" state between runs @@ -503,6 +593,11 @@ func TestTaskRunner_Restore_System(t *testing.T) { "command": "sleep", "args": []string{"30"}, } + task.Env = map[string]string{ + "NOMAD_PARENT_CGROUP": "nomad.slice", + "NOMAD_ALLOC_ID": alloc.ID, + "NOMAD_TASK_NAME": task.Name, + } conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name) defer cleanup() conf.StateDB = cstate.NewMemDB(conf.Logger) // "persist" state between runs @@ -588,11 +683,7 @@ func TestTaskRunner_TaskEnv_Interpolated(t *testing.T) { defer cleanup() // Wait for task to complete - select { - case <-tr.WaitCh(): - case <-time.After(3 * time.Second): - require.Fail("timeout waiting for task to exit") - } + testWaitForTaskToDie(t, tr) // Get the mock driver plugin driverPlugin, err := conf.DriverManager.Dispense(mockdriver.PluginID.Name) @@ -639,7 +730,9 @@ func TestTaskRunner_TaskEnv_Chroot(t *testing.T) { go tr.Run() defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup")) - // Wait for task to exit + // Wait for task to exit and kill the task runner to run the stop hooks. + testWaitForTaskToDie(t, tr) + tr.Kill(context.Background(), structs.NewTaskEvent("kill")) timeout := 15 * time.Second if testutil.IsCI() { timeout = 120 * time.Second @@ -652,7 +745,7 @@ func TestTaskRunner_TaskEnv_Chroot(t *testing.T) { // Read stdout p := filepath.Join(conf.TaskDir.LogDir, task.Name+".stdout.0") - stdout, err := ioutil.ReadFile(p) + stdout, err := os.ReadFile(p) require.NoError(t, err) require.Equalf(t, exp, string(stdout), "expected: %s\n\nactual: %s\n", exp, stdout) } @@ -668,7 +761,7 @@ func TestTaskRunner_TaskEnv_Image(t *testing.T) { task := alloc.Job.TaskGroups[0].Tasks[0] task.Driver = "docker" task.Config = map[string]interface{}{ - "image": "redis:3.2-alpine", + "image": "redis:7-alpine", "network_mode": "none", "command": "sh", "args": []string{"-c", "echo $NOMAD_ALLOC_DIR; " + @@ -688,7 +781,9 @@ func TestTaskRunner_TaskEnv_Image(t *testing.T) { tr, conf, cleanup := runTestTaskRunner(t, alloc, task.Name) defer cleanup() - // Wait for task to exit + // Wait for task to exit and kill task runner to run the stop hooks. + testWaitForTaskToDie(t, tr) + tr.Kill(context.Background(), structs.NewTaskEvent("kill")) select { case <-tr.WaitCh(): case <-time.After(15 * time.Second): @@ -697,7 +792,7 @@ func TestTaskRunner_TaskEnv_Image(t *testing.T) { // Read stdout p := filepath.Join(conf.TaskDir.LogDir, task.Name+".stdout.0") - stdout, err := ioutil.ReadFile(p) + stdout, err := os.ReadFile(p) require.NoError(err) require.Equalf(exp, string(stdout), "expected: %s\n\nactual: %s\n", exp, stdout) } @@ -718,7 +813,11 @@ func TestTaskRunner_TaskEnv_None(t *testing.T) { "echo $PATH", }, } - + task.Env = map[string]string{ + "NOMAD_PARENT_CGROUP": "nomad.slice", + "NOMAD_ALLOC_ID": alloc.ID, + "NOMAD_TASK_NAME": task.Name, + } tr, conf, cleanup := runTestTaskRunner(t, alloc, task.Name) defer cleanup() @@ -731,7 +830,9 @@ func TestTaskRunner_TaskEnv_None(t *testing.T) { %s `, root, taskDir, taskDir, os.Getenv("PATH")) - // Wait for task to exit + // Wait for task to exit and kill the task runner to run the stop hooks. + testWaitForTaskToDie(t, tr) + tr.Kill(context.Background(), structs.NewTaskEvent("kill")) select { case <-tr.WaitCh(): case <-time.After(15 * time.Second): @@ -740,7 +841,7 @@ func TestTaskRunner_TaskEnv_None(t *testing.T) { // Read stdout p := filepath.Join(conf.TaskDir.LogDir, task.Name+".stdout.0") - stdout, err := ioutil.ReadFile(p) + stdout, err := os.ReadFile(p) require.NoError(err) require.Equalf(exp, string(stdout), "expected: %s\n\nactual: %s\n", exp, stdout) } @@ -799,10 +900,7 @@ func TestTaskRunner_DevicePropogation(t *testing.T) { defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup")) // Wait for task to complete - select { - case <-tr.WaitCh(): - case <-time.After(3 * time.Second): - } + testWaitForTaskToDie(t, tr) // Get the mock driver plugin driverPlugin, err := conf.DriverManager.Dispense(mockdriver.PluginID.Name) @@ -1046,7 +1144,7 @@ func TestTaskRunner_NoShutdownDelay(t *testing.T) { maxTimeToFailDuration := time.Duration(testutil.TestMultiplier()) * time.Second alloc := mock.Alloc() - alloc.DesiredTransition = structs.DesiredTransition{NoShutdownDelay: helper.BoolToPtr(true)} + alloc.DesiredTransition = structs.DesiredTransition{NoShutdownDelay: pointer.Of(true)} task := alloc.Job.TaskGroups[0].Tasks[0] task.Services[0].Tags = []string{"tag1"} task.Services = task.Services[:1] // only need 1 for this test @@ -1151,7 +1249,7 @@ func TestTaskRunner_Dispatch_Payload(t *testing.T) { // Check that the file was written to disk properly payloadPath := filepath.Join(tr.taskDir.LocalDir, fileName) - data, err := ioutil.ReadFile(payloadPath) + data, err := os.ReadFile(payloadPath) require.NoError(t, err) require.Equal(t, expected, data) } @@ -1309,11 +1407,7 @@ func TestTaskRunner_CheckWatcher_Restart(t *testing.T) { // Wait until the task exits. Don't simply wait for it to run as it may // get restarted and terminated before the test is able to observe it // running. - select { - case <-tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - require.Fail(t, "timeout") - } + testWaitForTaskToDie(t, tr) state := tr.TaskState() actualEvents := make([]string, len(state.Events)) @@ -1402,11 +1496,7 @@ func TestTaskRunner_BlockForSIDSToken(t *testing.T) { // task runner should exit now that it has been unblocked and it is a batch // job with a zero sleep time - select { - case <-tr.WaitCh(): - case <-time.After(15 * time.Second * time.Duration(testutil.TestMultiplier())): - r.Fail("timed out waiting for batch task to exist") - } + testWaitForTaskToDie(t, tr) // assert task exited successfully finalState := tr.TaskState() @@ -1415,7 +1505,7 @@ func TestTaskRunner_BlockForSIDSToken(t *testing.T) { // assert the token is on disk tokenPath := filepath.Join(trConfig.TaskDir.SecretsDir, sidsTokenFile) - data, err := ioutil.ReadFile(tokenPath) + data, err := os.ReadFile(tokenPath) r.NoError(err) r.Equal(token, string(data)) } @@ -1459,11 +1549,7 @@ func TestTaskRunner_DeriveSIToken_Retry(t *testing.T) { go tr.Run() // assert task runner blocks on SI token - select { - case <-tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - r.Fail("timed out waiting for task runner") - } + testWaitForTaskToDie(t, tr) // assert task exited successfully finalState := tr.TaskState() @@ -1472,7 +1558,7 @@ func TestTaskRunner_DeriveSIToken_Retry(t *testing.T) { // assert the token is on disk tokenPath := filepath.Join(trConfig.TaskDir.SecretsDir, sidsTokenFile) - data, err := ioutil.ReadFile(tokenPath) + data, err := os.ReadFile(tokenPath) r.NoError(err) r.Equal(token, string(data)) } @@ -1579,11 +1665,7 @@ func TestTaskRunner_BlockForVaultToken(t *testing.T) { // TR should exit now that it's unblocked by vault as its a batch job // with 0 sleeping. - select { - case <-tr.WaitCh(): - case <-time.After(15 * time.Second * time.Duration(testutil.TestMultiplier())): - require.Fail(t, "timed out waiting for batch task to exit") - } + testWaitForTaskToDie(t, tr) // Assert task exited successfully finalState := tr.TaskState() @@ -1592,10 +1674,18 @@ func TestTaskRunner_BlockForVaultToken(t *testing.T) { // Check that the token is on disk tokenPath := filepath.Join(conf.TaskDir.SecretsDir, vaultTokenFile) - data, err := ioutil.ReadFile(tokenPath) + data, err := os.ReadFile(tokenPath) require.NoError(t, err) require.Equal(t, token, string(data)) + // Kill task runner to trigger stop hooks + tr.Kill(context.Background(), structs.NewTaskEvent("kill")) + select { + case <-tr.WaitCh(): + case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): + require.Fail(t, "timed out waiting for task runner to exit") + } + // Check the token was revoked testutil.WaitForResult(func() (bool, error) { if len(vaultClient.StoppedTokens()) != 1 { @@ -1642,22 +1732,26 @@ func TestTaskRunner_DeriveToken_Retry(t *testing.T) { defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup")) go tr.Run() - // Wait for TR to exit and check its state + // Wait for TR to die and check its state + testWaitForTaskToDie(t, tr) + + state := tr.TaskState() + require.Equal(t, structs.TaskStateDead, state.State) + require.False(t, state.Failed) + + // Kill task runner to trigger stop hooks + tr.Kill(context.Background(), structs.NewTaskEvent("kill")) select { case <-tr.WaitCh(): case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): require.Fail(t, "timed out waiting for task runner to exit") } - state := tr.TaskState() - require.Equal(t, structs.TaskStateDead, state.State) - require.False(t, state.Failed) - require.Equal(t, 1, count) // Check that the token is on disk tokenPath := filepath.Join(conf.TaskDir.SecretsDir, vaultTokenFile) - data, err := ioutil.ReadFile(tokenPath) + data, err := os.ReadFile(tokenPath) require.NoError(t, err) require.Equal(t, token, string(data)) @@ -1752,11 +1846,7 @@ func TestTaskRunner_Download_ChrootExec(t *testing.T) { defer cleanup() // Wait for task to run and exit - select { - case <-tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - require.Fail(t, "timed out waiting for task runner to exit") - } + testWaitForTaskToDie(t, tr) state := tr.TaskState() require.Equal(t, structs.TaskStateDead, state.State) @@ -1780,6 +1870,11 @@ func TestTaskRunner_Download_RawExec(t *testing.T) { task.Config = map[string]interface{}{ "command": "noop.sh", } + task.Env = map[string]string{ + "NOMAD_PARENT_CGROUP": "nomad.slice", + "NOMAD_ALLOC_ID": alloc.ID, + "NOMAD_TASK_NAME": task.Name, + } task.Artifacts = []*structs.TaskArtifact{ { GetterSource: fmt.Sprintf("%s/testdata/noop.sh", ts.URL), @@ -1792,11 +1887,7 @@ func TestTaskRunner_Download_RawExec(t *testing.T) { defer cleanup() // Wait for task to run and exit - select { - case <-tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - require.Fail(t, "timed out waiting for task runner to exit") - } + testWaitForTaskToDie(t, tr) state := tr.TaskState() require.Equal(t, structs.TaskStateDead, state.State) @@ -1827,11 +1918,7 @@ func TestTaskRunner_Download_List(t *testing.T) { defer cleanup() // Wait for task to run and exit - select { - case <-tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - require.Fail(t, "timed out waiting for task runner to exit") - } + testWaitForTaskToDie(t, tr) state := tr.TaskState() require.Equal(t, structs.TaskStateDead, state.State) @@ -1878,11 +1965,7 @@ func TestTaskRunner_Download_Retries(t *testing.T) { tr, _, cleanup := runTestTaskRunner(t, alloc, task.Name) defer cleanup() - select { - case <-tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - require.Fail(t, "timed out waiting for task to exit") - } + testWaitForTaskToDie(t, tr) state := tr.TaskState() require.Equal(t, structs.TaskStateDead, state.State) @@ -2076,6 +2159,8 @@ func TestTaskRunner_RestartSignalTask_NotRunning(t *testing.T) { case <-time.After(1 * time.Second): } + require.Equal(t, structs.TaskStatePending, tr.TaskState().State) + // Send a signal and restart err = tr.Signal(structs.NewTaskEvent("don't panic"), "QUIT") require.EqualError(t, err, ErrTaskNotRunning.Error()) @@ -2086,12 +2171,7 @@ func TestTaskRunner_RestartSignalTask_NotRunning(t *testing.T) { // Unblock and let it finish waitCh <- struct{}{} - - select { - case <-tr.WaitCh(): - case <-time.After(10 * time.Second): - require.Fail(t, "timed out waiting for task to complete") - } + testWaitForTaskToDie(t, tr) // Assert the task ran and never restarted state := tr.TaskState() @@ -2129,11 +2209,7 @@ func TestTaskRunner_Run_RecoverableStartError(t *testing.T) { tr, _, cleanup := runTestTaskRunner(t, alloc, task.Name) defer cleanup() - select { - case <-tr.WaitCh(): - case <-time.After(time.Duration(testutil.TestMultiplier()*15) * time.Second): - require.Fail(t, "timed out waiting for task to exit") - } + testWaitForTaskToDie(t, tr) state := tr.TaskState() require.Equal(t, structs.TaskStateDead, state.State) @@ -2178,11 +2254,7 @@ func TestTaskRunner_Template_Artifact(t *testing.T) { go tr.Run() // Wait for task to run and exit - select { - case <-tr.WaitCh(): - case <-time.After(15 * time.Second * time.Duration(testutil.TestMultiplier())): - require.Fail(t, "timed out waiting for task runner to exit") - } + testWaitForTaskToDie(t, tr) state := tr.TaskState() require.Equal(t, structs.TaskStateDead, state.State) @@ -2512,7 +2584,9 @@ func TestTaskRunner_UnregisterConsul_Retries(t *testing.T) { tr, err := NewTaskRunner(conf) require.NoError(t, err) defer tr.Kill(context.Background(), structs.NewTaskEvent("cleanup")) - tr.Run() + go tr.Run() + + testWaitForTaskToDie(t, tr) state := tr.TaskState() require.Equal(t, structs.TaskStateDead, state.State) @@ -2538,7 +2612,17 @@ func TestTaskRunner_UnregisterConsul_Retries(t *testing.T) { func testWaitForTaskToStart(t *testing.T, tr *TaskRunner) { testutil.WaitForResult(func() (bool, error) { ts := tr.TaskState() - return ts.State == structs.TaskStateRunning, fmt.Errorf("%v", ts.State) + return ts.State == structs.TaskStateRunning, fmt.Errorf("expected task to be running, got %v", ts.State) + }, func(err error) { + require.NoError(t, err) + }) +} + +// testWaitForTaskToDie waits for the task to die or fails the test +func testWaitForTaskToDie(t *testing.T, tr *TaskRunner) { + testutil.WaitForResult(func() (bool, error) { + ts := tr.TaskState() + return ts.State == structs.TaskStateDead, fmt.Errorf("expected task to be dead, got %v", ts.State) }, func(err error) { require.NoError(t, err) }) diff --git a/client/allocrunner/taskrunner/template/template.go b/client/allocrunner/taskrunner/template/template.go index d6c71819662..e161882add6 100644 --- a/client/allocrunner/taskrunner/template/template.go +++ b/client/allocrunner/taskrunner/template/template.go @@ -20,7 +20,7 @@ import ( "github.com/hashicorp/nomad/client/allocrunner/taskrunner/interfaces" "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/taskenv" - "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/nomad/structs" ) @@ -54,6 +54,10 @@ type TaskTemplateManager struct { // runner is the consul-template runner runner *manager.Runner + // handle is used to execute scripts + handle interfaces.ScriptExecutor + handleLock sync.Mutex + // signals is a lookup map from the string representation of a signal to its // actual signal signals map[string]os.Signal @@ -189,6 +193,14 @@ func (tm *TaskTemplateManager) Stop() { } } +// SetDriverHandle sets the executor +func (tm *TaskTemplateManager) SetDriverHandle(executor interfaces.ScriptExecutor) { + tm.handleLock.Lock() + defer tm.handleLock.Unlock() + tm.handle = executor + +} + // run is the long lived loop that handles errors and templates being rendered func (tm *TaskTemplateManager) run() { // Runner is nil if there are no templates @@ -389,6 +401,7 @@ func (tm *TaskTemplateManager) onTemplateRendered(handledRenders map[string]time var handling []string signals := make(map[string]struct{}) + scripts := []*structs.ChangeScript{} restart := false var splay time.Duration @@ -433,6 +446,8 @@ func (tm *TaskTemplateManager) onTemplateRendered(handledRenders map[string]time signals[tmpl.ChangeSignal] = struct{}{} case structs.TemplateChangeModeRestart: restart = true + case structs.TemplateChangeModeScript: + scripts = append(scripts, tmpl.ChangeScript) case structs.TemplateChangeModeNoop: continue } @@ -445,52 +460,131 @@ func (tm *TaskTemplateManager) onTemplateRendered(handledRenders map[string]time handling = append(handling, id) } - if restart || len(signals) != 0 { - if splay != 0 { - ns := splay.Nanoseconds() - offset := rand.Int63n(ns) - t := time.Duration(offset) + shouldHandle := restart || len(signals) != 0 || len(scripts) != 0 + if !shouldHandle { + return + } - select { - case <-time.After(t): - case <-tm.shutdownCh: - return - } - } + // Apply splay timeout to avoid applying change_mode too frequently. + if splay != 0 { + ns := splay.Nanoseconds() + offset := rand.Int63n(ns) + t := time.Duration(offset) - // Update handle time - for _, id := range handling { - handledRenders[id] = events[id].LastDidRender + select { + case <-time.After(t): + case <-tm.shutdownCh: + return } + } - if restart { - tm.config.Lifecycle.Restart(context.Background(), - structs.NewTaskEvent(structs.TaskRestartSignal). - SetDisplayMessage("Template with change_mode restart re-rendered"), false) - } else if len(signals) != 0 { - var mErr multierror.Error - for signal := range signals { - s := tm.signals[signal] - event := structs.NewTaskEvent(structs.TaskSignaling).SetTaskSignal(s).SetDisplayMessage("Template re-rendered") - if err := tm.config.Lifecycle.Signal(event, signal); err != nil { - _ = multierror.Append(&mErr, err) - } - } + // Update handle time + for _, id := range handling { + handledRenders[id] = events[id].LastDidRender + } - if err := mErr.ErrorOrNil(); err != nil { - flat := make([]os.Signal, 0, len(signals)) - for signal := range signals { - flat = append(flat, tm.signals[signal]) - } + if restart { + tm.config.Lifecycle.Restart(context.Background(), + structs.NewTaskEvent(structs.TaskRestartSignal). + SetDisplayMessage("Template with change_mode restart re-rendered"), false) + } else { + // Handle signals and scripts since the task may have multiple + // templates with mixed change_mode values. + tm.handleChangeModeSignal(signals) + tm.handleChangeModeScript(scripts) + } +} - tm.config.Lifecycle.Kill(context.Background(), - structs.NewTaskEvent(structs.TaskKilling). - SetFailsTask(). - SetDisplayMessage(fmt.Sprintf("Template failed to send signals %v: %v", flat, err))) - } +func (tm *TaskTemplateManager) handleChangeModeSignal(signals map[string]struct{}) { + var mErr multierror.Error + for signal := range signals { + s := tm.signals[signal] + event := structs.NewTaskEvent(structs.TaskSignaling).SetTaskSignal(s).SetDisplayMessage("Template re-rendered") + if err := tm.config.Lifecycle.Signal(event, signal); err != nil { + _ = multierror.Append(&mErr, err) + } + } + + if err := mErr.ErrorOrNil(); err != nil { + flat := make([]os.Signal, 0, len(signals)) + for signal := range signals { + flat = append(flat, tm.signals[signal]) } + + tm.config.Lifecycle.Kill(context.Background(), + structs.NewTaskEvent(structs.TaskKilling). + SetFailsTask(). + SetDisplayMessage(fmt.Sprintf("Template failed to send signals %v: %v", flat, err))) } +} +func (tm *TaskTemplateManager) handleChangeModeScript(scripts []*structs.ChangeScript) { + // process script execution concurrently + var wg sync.WaitGroup + for _, script := range scripts { + wg.Add(1) + go tm.processScript(script, &wg) + } + wg.Wait() +} + +// handleScriptError is a helper function that produces a TaskKilling event and +// emits a message +func (tm *TaskTemplateManager) handleScriptError(script *structs.ChangeScript, msg string) { + ev := structs.NewTaskEvent(structs.TaskHookFailed).SetDisplayMessage(msg) + tm.config.Events.EmitEvent(ev) + + if script.FailOnError { + tm.config.Lifecycle.Kill(context.Background(), + structs.NewTaskEvent(structs.TaskKilling). + SetFailsTask(). + SetDisplayMessage("Template script failed, task is being killed")) + } +} + +// processScript is used for executing change_mode script and handling errors +func (tm *TaskTemplateManager) processScript(script *structs.ChangeScript, wg *sync.WaitGroup) { + defer wg.Done() + + if tm.handle == nil { + failureMsg := fmt.Sprintf( + "Template failed to run script %v with arguments %v because task driver doesn't support the exec operation", + script.Command, + script.Args, + ) + tm.handleScriptError(script, failureMsg) + return + } + _, exitCode, err := tm.handle.Exec(script.Timeout, script.Command, script.Args) + if err != nil { + failureMsg := fmt.Sprintf( + "Template failed to run script %v with arguments %v on change: %v Exit code: %v", + script.Command, + script.Args, + err, + exitCode, + ) + tm.handleScriptError(script, failureMsg) + return + } + if exitCode != 0 { + failureMsg := fmt.Sprintf( + "Template ran script %v with arguments %v on change but it exited with code code: %v", + script.Command, + script.Args, + exitCode, + ) + tm.handleScriptError(script, failureMsg) + return + } + tm.config.Events.EmitEvent(structs.NewTaskEvent(structs.TaskHookMessage). + SetDisplayMessage( + fmt.Sprintf( + "Template successfully ran script %v with arguments: %v. Exit code: %v", + script.Command, + script.Args, + exitCode, + ))) } // allTemplatesNoop returns whether all the managed templates have change mode noop. @@ -608,7 +702,7 @@ func parseTemplateConfigs(config *TaskTemplateManagerConfig) (map[*ctconf.Templa } ct.Wait = &ctconf.WaitConfig{ - Enabled: helper.BoolToPtr(true), + Enabled: pointer.Of(true), Min: tmpl.Wait.Min, Max: tmpl.Wait.Max, } @@ -623,6 +717,14 @@ func parseTemplateConfigs(config *TaskTemplateManagerConfig) (map[*ctconf.Templa m := os.FileMode(v) ct.Perms = &m } + // Set ownership + if tmpl.Uid != nil && *tmpl.Uid >= 0 { + ct.Uid = tmpl.Uid + } + if tmpl.Gid != nil && *tmpl.Gid >= 0 { + ct.Gid = tmpl.Gid + } + ct.Finalize() ctmpls[ct] = tmpl @@ -714,7 +816,7 @@ func newRunnerConfig(config *TaskTemplateManagerConfig, if cc.ConsulConfig.EnableSSL != nil && *cc.ConsulConfig.EnableSSL { verify := cc.ConsulConfig.VerifySSL != nil && *cc.ConsulConfig.VerifySSL conf.Consul.SSL = &ctconf.SSLConfig{ - Enabled: helper.BoolToPtr(true), + Enabled: pointer.Of(true), Verify: &verify, Cert: &cc.ConsulConfig.CertFile, Key: &cc.ConsulConfig.KeyFile, @@ -729,7 +831,7 @@ func newRunnerConfig(config *TaskTemplateManagerConfig, } conf.Consul.Auth = &ctconf.AuthConfig{ - Enabled: helper.BoolToPtr(true), + Enabled: pointer.Of(true), Username: &parts[0], Password: &parts[1], } @@ -758,7 +860,7 @@ func newRunnerConfig(config *TaskTemplateManagerConfig, // Set up the Vault config // Always set these to ensure nothing is picked up from the environment emptyStr := "" - conf.Vault.RenewToken = helper.BoolToPtr(false) + conf.Vault.RenewToken = pointer.Of(false) conf.Vault.Token = &emptyStr if cc.VaultConfig != nil && cc.VaultConfig.IsEnabled() { conf.Vault.Address = &cc.VaultConfig.Addr @@ -777,7 +879,7 @@ func newRunnerConfig(config *TaskTemplateManagerConfig, skipVerify := cc.VaultConfig.TLSSkipVerify != nil && *cc.VaultConfig.TLSSkipVerify verify := !skipVerify conf.Vault.SSL = &ctconf.SSLConfig{ - Enabled: helper.BoolToPtr(true), + Enabled: pointer.Of(true), Verify: &verify, Cert: &cc.VaultConfig.TLSCertFile, Key: &cc.VaultConfig.TLSKeyFile, @@ -787,8 +889,8 @@ func newRunnerConfig(config *TaskTemplateManagerConfig, } } else { conf.Vault.SSL = &ctconf.SSLConfig{ - Enabled: helper.BoolToPtr(false), - Verify: helper.BoolToPtr(false), + Enabled: pointer.Of(false), + Verify: pointer.Of(false), Cert: &emptyStr, Key: &emptyStr, CaCert: &emptyStr, @@ -817,6 +919,18 @@ func newRunnerConfig(config *TaskTemplateManagerConfig, // Use the Node's SecretID to authenticate Nomad template function calls. conf.Nomad.Token = &cc.Node.SecretID + if cc.TemplateConfig != nil && cc.TemplateConfig.NomadRetry != nil { + // Set the user-specified Nomad RetryConfig + var err error + if err = cc.TemplateConfig.NomadRetry.Validate(); err != nil { + return nil, err + } + conf.Nomad.Retry, err = cc.TemplateConfig.NomadRetry.ToConsulTemplate() + if err != nil { + return nil, err + } + } + conf.Finalize() return conf, nil } diff --git a/client/allocrunner/taskrunner/template/template_test.go b/client/allocrunner/taskrunner/template/template_test.go index 2bc0d5d98ec..f9732a015b0 100644 --- a/client/allocrunner/taskrunner/template/template_test.go +++ b/client/allocrunner/taskrunner/template/template_test.go @@ -5,7 +5,6 @@ import ( "context" "fmt" "io" - "io/ioutil" "os" "os/user" "path/filepath" @@ -16,6 +15,7 @@ import ( "strconv" "strings" "sync" + "syscall" "testing" "time" @@ -25,7 +25,7 @@ import ( "github.com/hashicorp/nomad/client/allocdir" "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/taskenv" - "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" @@ -33,6 +33,7 @@ import ( sconfig "github.com/hashicorp/nomad/nomad/structs/config" "github.com/hashicorp/nomad/testutil" "github.com/kr/pretty" + "github.com/shoenig/test/must" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -121,6 +122,16 @@ func (m *MockTaskHooks) EmitEvent(event *structs.TaskEvent) { func (m *MockTaskHooks) SetState(state string, event *structs.TaskEvent) {} +// mockExecutor implements script executor interface +type mockExecutor struct { + DesiredExit int + DesiredErr error +} + +func (m *mockExecutor) Exec(timeout time.Duration, cmd string, args []string) ([]byte, int, error) { + return []byte{}, m.DesiredExit, m.DesiredErr +} + // testHarness is used to test the TaskTemplateManager by spinning up // Consul/Vault as needed type testHarness struct { @@ -154,7 +165,7 @@ func newTestHarness(t *testing.T, templates []*structs.Template, consul, vault b TemplateConfig: &config.ClientTemplateConfig{ FunctionDenylist: config.DefaultTemplateFunctionDenylist, DisableSandbox: false, - ConsulRetry: &config.RetryConfig{Backoff: helper.TimeToPtr(10 * time.Millisecond)}, + ConsulRetry: &config.RetryConfig{Backoff: pointer.Of(10 * time.Millisecond)}, }}, emitRate: DefaultMaxTemplateEventRate, } @@ -167,14 +178,11 @@ func newTestHarness(t *testing.T, templates []*structs.Template, consul, vault b harness.nomadNamespace = a.Namespace // Make a tempdir - d, err := ioutil.TempDir("", "ct_test") - if err != nil { - t.Fatalf("Failed to make tmpdir: %v", err) - } - harness.taskDir = d + harness.taskDir = t.TempDir() harness.envBuilder.SetClientTaskRoot(harness.taskDir) if consul { + var err error harness.consul, err = ctestutil.NewTestServerConfigT(t, func(c *ctestutil.TestServerConfig) { // defaults }) @@ -214,7 +222,6 @@ func (h *testHarness) startWithErr() error { EnvBuilder: h.envBuilder, MaxTemplateEventRate: h.emitRate, }) - return err } @@ -382,7 +389,7 @@ func TestTaskTemplateManager_InvalidConfig(t *testing.T) { func TestTaskTemplateManager_HostPath(t *testing.T) { ci.Parallel(t) // Make a template that will render immediately and write it to a tmp file - f, err := ioutil.TempFile("", "") + f, err := os.CreateTemp("", "") if err != nil { t.Fatalf("Bad: %v", err) } @@ -418,7 +425,7 @@ func TestTaskTemplateManager_HostPath(t *testing.T) { // Check the file is there path := filepath.Join(harness.taskDir, file) - raw, err := ioutil.ReadFile(path) + raw, err := os.ReadFile(path) if err != nil { t.Fatalf("Failed to read rendered template from %q: %v", path, err) } @@ -495,7 +502,7 @@ func TestTaskTemplateManager_Unblock_Static(t *testing.T) { // Check the file is there path := filepath.Join(harness.taskDir, file) - raw, err := ioutil.ReadFile(path) + raw, err := os.ReadFile(path) if err != nil { t.Fatalf("Failed to read rendered template from %q: %v", path, err) } @@ -515,6 +522,8 @@ func TestTaskTemplateManager_Permissions(t *testing.T) { DestPath: file, ChangeMode: structs.TemplateChangeModeNoop, Perms: "777", + Uid: pointer.Of(503), + Gid: pointer.Of(20), } harness := newTestHarness(t, []*structs.Template{template}, false, false) @@ -538,6 +547,13 @@ func TestTaskTemplateManager_Permissions(t *testing.T) { if m := fi.Mode(); m != os.ModePerm { t.Fatalf("Got mode %v; want %v", m, os.ModePerm) } + + sys := fi.Sys() + uid := pointer.Of(int(sys.(*syscall.Stat_t).Uid)) + gid := pointer.Of(int(sys.(*syscall.Stat_t).Gid)) + + must.Eq(t, template.Uid, uid) + must.Eq(t, template.Gid, gid) } func TestTaskTemplateManager_Unblock_Static_NomadEnv(t *testing.T) { @@ -565,7 +581,7 @@ func TestTaskTemplateManager_Unblock_Static_NomadEnv(t *testing.T) { // Check the file is there path := filepath.Join(harness.taskDir, file) - raw, err := ioutil.ReadFile(path) + raw, err := os.ReadFile(path) if err != nil { t.Fatalf("Failed to read rendered template from %q: %v", path, err) } @@ -590,7 +606,7 @@ func TestTaskTemplateManager_Unblock_Static_AlreadyRendered(t *testing.T) { // Write the contents path := filepath.Join(harness.taskDir, file) - if err := ioutil.WriteFile(path, []byte(content), 0777); err != nil { + if err := os.WriteFile(path, []byte(content), 0777); err != nil { t.Fatalf("Failed to write data: %v", err) } @@ -606,7 +622,7 @@ func TestTaskTemplateManager_Unblock_Static_AlreadyRendered(t *testing.T) { // Check the file is there path = filepath.Join(harness.taskDir, file) - raw, err := ioutil.ReadFile(path) + raw, err := os.ReadFile(path) if err != nil { t.Fatalf("Failed to read rendered template from %q: %v", path, err) } @@ -652,7 +668,7 @@ func TestTaskTemplateManager_Unblock_Consul(t *testing.T) { // Check the file is there path := filepath.Join(harness.taskDir, file) - raw, err := ioutil.ReadFile(path) + raw, err := os.ReadFile(path) if err != nil { t.Fatalf("Failed to read rendered template from %q: %v", path, err) } @@ -702,7 +718,7 @@ func TestTaskTemplateManager_Unblock_Vault(t *testing.T) { // Check the file is there path := filepath.Join(harness.taskDir, file) - raw, err := ioutil.ReadFile(path) + raw, err := os.ReadFile(path) if err != nil { t.Fatalf("Failed to read rendered template from %q: %v", path, err) } @@ -747,7 +763,7 @@ func TestTaskTemplateManager_Unblock_Multi_Template(t *testing.T) { // Check that the static file has been rendered path := filepath.Join(harness.taskDir, staticFile) - raw, err := ioutil.ReadFile(path) + raw, err := os.ReadFile(path) if err != nil { t.Fatalf("Failed to read rendered template from %q: %v", path, err) } @@ -768,7 +784,7 @@ func TestTaskTemplateManager_Unblock_Multi_Template(t *testing.T) { // Check the consul file is there path = filepath.Join(harness.taskDir, consulFile) - raw, err = ioutil.ReadFile(path) + raw, err = os.ReadFile(path) if err != nil { t.Fatalf("Failed to read rendered template from %q: %v", path, err) } @@ -820,7 +836,7 @@ func TestTaskTemplateManager_FirstRender_Restored(t *testing.T) { // Check the file is there path := filepath.Join(harness.taskDir, file) - raw, err := ioutil.ReadFile(path) + raw, err := os.ReadFile(path) require.NoError(err, "Failed to read rendered template from %q", path) require.Equal(content, string(raw), "Unexpected template data; got %s, want %q", raw, content) @@ -914,7 +930,7 @@ func TestTaskTemplateManager_Rerender_Noop(t *testing.T) { // Check the file is there path := filepath.Join(harness.taskDir, file) - raw, err := ioutil.ReadFile(path) + raw, err := os.ReadFile(path) if err != nil { t.Fatalf("Failed to read rendered template from %q: %v", path, err) } @@ -936,7 +952,7 @@ func TestTaskTemplateManager_Rerender_Noop(t *testing.T) { // Check the file has been updated path = filepath.Join(harness.taskDir, file) - raw, err = ioutil.ReadFile(path) + raw, err = os.ReadFile(path) if err != nil { t.Fatalf("Failed to read rendered template from %q: %v", path, err) } @@ -1026,7 +1042,7 @@ OUTER: // Check the files have been updated path := filepath.Join(harness.taskDir, file1) - raw, err := ioutil.ReadFile(path) + raw, err := os.ReadFile(path) if err != nil { t.Fatalf("Failed to read rendered template from %q: %v", path, err) } @@ -1036,7 +1052,7 @@ OUTER: } path = filepath.Join(harness.taskDir, file2) - raw, err = ioutil.ReadFile(path) + raw, err = os.ReadFile(path) if err != nil { t.Fatalf("Failed to read rendered template from %q: %v", path, err) } @@ -1100,7 +1116,7 @@ OUTER: // Check the files have been updated path := filepath.Join(harness.taskDir, file1) - raw, err := ioutil.ReadFile(path) + raw, err := os.ReadFile(path) if err != nil { t.Fatalf("Failed to read rendered template from %q: %v", path, err) } @@ -1135,7 +1151,7 @@ func TestTaskTemplateManager_Interpolate_Destination(t *testing.T) { // Check the file is there actual := fmt.Sprintf("%s.tmpl", harness.node.ID) path := filepath.Join(harness.taskDir, actual) - raw, err := ioutil.ReadFile(path) + raw, err := os.ReadFile(path) if err != nil { t.Fatalf("Failed to read rendered template from %q: %v", path, err) } @@ -1193,6 +1209,294 @@ func TestTaskTemplateManager_Signal_Error(t *testing.T) { require.Contains(harness.mockHooks.KillEvent.DisplayMessage, "failed to send signals") } +func TestTaskTemplateManager_ScriptExecution(t *testing.T) { + ci.Parallel(t) + + // Make a template that renders based on a key in Consul and triggers script + key1 := "bam" + key2 := "bar" + content1_1 := "cat" + content1_2 := "dog" + t1 := &structs.Template{ + EmbeddedTmpl: ` +FOO={{key "bam"}} +`, + DestPath: "test.env", + ChangeMode: structs.TemplateChangeModeScript, + ChangeScript: &structs.ChangeScript{ + Command: "/bin/foo", + Args: []string{}, + Timeout: 5 * time.Second, + FailOnError: false, + }, + Envvars: true, + } + t2 := &structs.Template{ + EmbeddedTmpl: ` +BAR={{key "bar"}} +`, + DestPath: "test2.env", + ChangeMode: structs.TemplateChangeModeScript, + ChangeScript: &structs.ChangeScript{ + Command: "/bin/foo", + Args: []string{}, + Timeout: 5 * time.Second, + FailOnError: false, + }, + Envvars: true, + } + + me := mockExecutor{DesiredExit: 0, DesiredErr: nil} + harness := newTestHarness(t, []*structs.Template{t1, t2}, true, false) + harness.start(t) + harness.manager.SetDriverHandle(&me) + defer harness.stop() + + // Ensure no unblock + select { + case <-harness.mockHooks.UnblockCh: + require.Fail(t, "Task unblock should not have been called") + case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): + } + + // Write the key to Consul + harness.consul.SetKV(t, key1, []byte(content1_1)) + harness.consul.SetKV(t, key2, []byte(content1_1)) + + // Wait for the unblock + select { + case <-harness.mockHooks.UnblockCh: + case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): + require.Fail(t, "Task unblock should have been called") + } + + // Update the keys in Consul + harness.consul.SetKV(t, key1, []byte(content1_2)) + + // Wait for script execution + timeout := time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second) +OUTER: + for { + select { + case <-harness.mockHooks.RestartCh: + require.Fail(t, "restart not expected") + case ev := <-harness.mockHooks.EmitEventCh: + if strings.Contains(ev.DisplayMessage, t1.ChangeScript.Command) { + break OUTER + } + case <-harness.mockHooks.SignalCh: + require.Fail(t, "signal not expected") + case <-timeout: + require.Fail(t, "should have received an event") + } + } +} + +// TestTaskTemplateManager_ScriptExecutionFailTask tests whether we fail the +// task upon script execution failure if that's how it's configured. +func TestTaskTemplateManager_ScriptExecutionFailTask(t *testing.T) { + ci.Parallel(t) + require := require.New(t) + + // Make a template that renders based on a key in Consul and triggers script + key1 := "bam" + key2 := "bar" + content1_1 := "cat" + content1_2 := "dog" + t1 := &structs.Template{ + EmbeddedTmpl: ` +FOO={{key "bam"}} +`, + DestPath: "test.env", + ChangeMode: structs.TemplateChangeModeScript, + ChangeScript: &structs.ChangeScript{ + Command: "/bin/foo", + Args: []string{}, + Timeout: 5 * time.Second, + FailOnError: true, + }, + Envvars: true, + } + t2 := &structs.Template{ + EmbeddedTmpl: ` +BAR={{key "bar"}} +`, + DestPath: "test2.env", + ChangeMode: structs.TemplateChangeModeScript, + ChangeScript: &structs.ChangeScript{ + Command: "/bin/foo", + Args: []string{}, + Timeout: 5 * time.Second, + FailOnError: false, + }, + Envvars: true, + } + + me := mockExecutor{DesiredExit: 1, DesiredErr: fmt.Errorf("Script failed")} + harness := newTestHarness(t, []*structs.Template{t1, t2}, true, false) + harness.start(t) + harness.manager.SetDriverHandle(&me) + defer harness.stop() + + // Ensure no unblock + select { + case <-harness.mockHooks.UnblockCh: + require.Fail("Task unblock should not have been called") + case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): + } + + // Write the key to Consul + harness.consul.SetKV(t, key1, []byte(content1_1)) + harness.consul.SetKV(t, key2, []byte(content1_1)) + + // Wait for the unblock + select { + case <-harness.mockHooks.UnblockCh: + case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): + require.Fail("Task unblock should have been called") + } + + // Update the keys in Consul + harness.consul.SetKV(t, key1, []byte(content1_2)) + + // Wait for kill channel + select { + case <-harness.mockHooks.KillCh: + break + case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): + require.Fail("Should have received a signals: %+v", harness.mockHooks) + } + + require.NotNil(harness.mockHooks.KillEvent) + require.Contains(harness.mockHooks.KillEvent.DisplayMessage, "task is being killed") +} + +func TestTaskTemplateManager_ChangeModeMixed(t *testing.T) { + ci.Parallel(t) + + templateRestart := &structs.Template{ + EmbeddedTmpl: ` +RESTART={{key "restart"}} +COMMON={{key "common"}} +`, + DestPath: "restart", + ChangeMode: structs.TemplateChangeModeRestart, + } + templateSignal := &structs.Template{ + EmbeddedTmpl: ` +SIGNAL={{key "signal"}} +COMMON={{key "common"}} +`, + DestPath: "signal", + ChangeMode: structs.TemplateChangeModeSignal, + ChangeSignal: "SIGALRM", + } + templateScript := &structs.Template{ + EmbeddedTmpl: ` +SCRIPT={{key "script"}} +COMMON={{key "common"}} +`, + DestPath: "script", + ChangeMode: structs.TemplateChangeModeScript, + ChangeScript: &structs.ChangeScript{ + Command: "/bin/foo", + Args: []string{}, + Timeout: 5 * time.Second, + FailOnError: true, + }, + } + templates := []*structs.Template{ + templateRestart, + templateSignal, + templateScript, + } + + me := mockExecutor{DesiredExit: 0, DesiredErr: nil} + harness := newTestHarness(t, templates, true, false) + harness.start(t) + harness.manager.SetDriverHandle(&me) + defer harness.stop() + + // Ensure no unblock + select { + case <-harness.mockHooks.UnblockCh: + require.Fail(t, "Task unblock should not have been called") + case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): + } + + // Write the key to Consul + harness.consul.SetKV(t, "common", []byte(fmt.Sprintf("%v", time.Now()))) + harness.consul.SetKV(t, "restart", []byte(fmt.Sprintf("%v", time.Now()))) + harness.consul.SetKV(t, "signal", []byte(fmt.Sprintf("%v", time.Now()))) + harness.consul.SetKV(t, "script", []byte(fmt.Sprintf("%v", time.Now()))) + + // Wait for the unblock + select { + case <-harness.mockHooks.UnblockCh: + case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): + require.Fail(t, "Task unblock should have been called") + } + + t.Run("restart takes precedence", func(t *testing.T) { + // Update the common Consul key. + harness.consul.SetKV(t, "common", []byte(fmt.Sprintf("%v", time.Now()))) + + // Collect some events. + timeout := time.After(time.Duration(3*testutil.TestMultiplier()) * time.Second) + events := []*structs.TaskEvent{} + OUTER: + for { + select { + case <-harness.mockHooks.RestartCh: + // Consume restarts so the channel is clean for other tests. + case <-harness.mockHooks.SignalCh: + require.Fail(t, "signal not expected") + case ev := <-harness.mockHooks.EmitEventCh: + events = append(events, ev) + case <-timeout: + break OUTER + } + } + + for _, ev := range events { + require.NotContains(t, ev.DisplayMessage, templateScript.ChangeScript.Command) + require.NotContains(t, ev.Type, structs.TaskSignaling) + } + }) + + t.Run("signal and script", func(t *testing.T) { + // Update the signal and script Consul keys. + harness.consul.SetKV(t, "signal", []byte(fmt.Sprintf("%v", time.Now()))) + harness.consul.SetKV(t, "script", []byte(fmt.Sprintf("%v", time.Now()))) + + // Wait for a events. + var gotSignal, gotScript bool + timeout := time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second) + for { + select { + case <-harness.mockHooks.RestartCh: + require.Fail(t, "restart not expected") + case ev := <-harness.mockHooks.EmitEventCh: + if strings.Contains(ev.DisplayMessage, templateScript.ChangeScript.Command) { + // Make sure we only run script once. + require.False(t, gotScript) + gotScript = true + } + case <-harness.mockHooks.SignalCh: + // Make sure we only signal once. + require.False(t, gotSignal) + gotSignal = true + case <-timeout: + require.Fail(t, "timeout waiting for script and signal") + } + + if gotScript && gotSignal { + break + } + } + }) +} + // TestTaskTemplateManager_FiltersProcessEnvVars asserts that we only render // environment variables found in task env-vars and not read the nomad host // process environment variables. nomad host process environment variables @@ -1233,7 +1537,7 @@ TEST_ENV_NOT_FOUND: {{env "` + testenv + `_NOTFOUND" }}` // Check the file is there path := filepath.Join(harness.taskDir, file) - raw, err := ioutil.ReadFile(path) + raw, err := os.ReadFile(path) require.NoError(t, err) require.Equal(t, expected, string(raw)) @@ -1286,14 +1590,10 @@ ANYTHING_goes=Spaces are=ok! // template processing function returns errors when files don't exist func TestTaskTemplateManager_Env_Missing(t *testing.T) { ci.Parallel(t) - d, err := ioutil.TempDir("", "ct_env_missing") - if err != nil { - t.Fatalf("err: %v", err) - } - defer os.RemoveAll(d) + d := t.TempDir() // Fake writing the file so we don't have to run the whole template manager - err = ioutil.WriteFile(filepath.Join(d, "exists.env"), []byte("FOO=bar\n"), 0644) + err := os.WriteFile(filepath.Join(d, "exists.env"), []byte("FOO=bar\n"), 0644) if err != nil { t.Fatalf("error writing template file: %v", err) } @@ -1323,14 +1623,10 @@ func TestTaskTemplateManager_Env_InterpolatedDest(t *testing.T) { ci.Parallel(t) require := require.New(t) - d, err := ioutil.TempDir("", "ct_env_interpolated") - if err != nil { - t.Fatalf("err: %v", err) - } - defer os.RemoveAll(d) + d := t.TempDir() // Fake writing the file so we don't have to run the whole template manager - err = ioutil.WriteFile(filepath.Join(d, "exists.env"), []byte("FOO=bar\n"), 0644) + err := os.WriteFile(filepath.Join(d, "exists.env"), []byte("FOO=bar\n"), 0644) if err != nil { t.Fatalf("error writing template file: %v", err) } @@ -1362,18 +1658,14 @@ func TestTaskTemplateManager_Env_InterpolatedDest(t *testing.T) { // templates correctly. func TestTaskTemplateManager_Env_Multi(t *testing.T) { ci.Parallel(t) - d, err := ioutil.TempDir("", "ct_env_missing") - if err != nil { - t.Fatalf("err: %v", err) - } - defer os.RemoveAll(d) + d := t.TempDir() // Fake writing the files so we don't have to run the whole template manager - err = ioutil.WriteFile(filepath.Join(d, "zzz.env"), []byte("FOO=bar\nSHARED=nope\n"), 0644) + err := os.WriteFile(filepath.Join(d, "zzz.env"), []byte("FOO=bar\nSHARED=nope\n"), 0644) if err != nil { t.Fatalf("error writing template file 1: %v", err) } - err = ioutil.WriteFile(filepath.Join(d, "aaa.env"), []byte("BAR=foo\nSHARED=yup\n"), 0644) + err = os.WriteFile(filepath.Join(d, "aaa.env"), []byte("BAR=foo\nSHARED=yup\n"), 0644) if err != nil { t.Fatalf("error writing template file 2: %v", err) } @@ -1493,7 +1785,7 @@ func TestTaskTemplateManager_Config_ServerName(t *testing.T) { c := config.DefaultConfig() c.Node = mock.Node() c.VaultConfig = &sconfig.VaultConfig{ - Enabled: helper.BoolToPtr(true), + Enabled: pointer.Of(true), Addr: "https://localhost/", TLSServerName: "notlocalhost", } @@ -1521,7 +1813,7 @@ func TestTaskTemplateManager_Config_VaultNamespace(t *testing.T) { c := config.DefaultConfig() c.Node = mock.Node() c.VaultConfig = &sconfig.VaultConfig{ - Enabled: helper.BoolToPtr(true), + Enabled: pointer.Of(true), Addr: "https://localhost/", TLSServerName: "notlocalhost", Namespace: testNS, @@ -1552,7 +1844,7 @@ func TestTaskTemplateManager_Config_VaultNamespace_TaskOverride(t *testing.T) { c := config.DefaultConfig() c.Node = mock.Node() c.VaultConfig = &sconfig.VaultConfig{ - Enabled: helper.BoolToPtr(true), + Enabled: pointer.Of(true), Addr: "https://localhost/", TLSServerName: "notlocalhost", Namespace: testNS, @@ -1938,7 +2230,7 @@ func TestTaskTemplateManager_ClientTemplateConfig_Set(t *testing.T) { clientConfig.Node = mock.Node() clientConfig.VaultConfig = &sconfig.VaultConfig{ - Enabled: helper.BoolToPtr(true), + Enabled: pointer.Of(true), Namespace: testNS, } @@ -1948,29 +2240,30 @@ func TestTaskTemplateManager_ClientTemplateConfig_Set(t *testing.T) { // helper to reduce boilerplate waitConfig := &config.WaitConfig{ - Min: helper.TimeToPtr(5 * time.Second), - Max: helper.TimeToPtr(10 * time.Second), + Min: pointer.Of(5 * time.Second), + Max: pointer.Of(10 * time.Second), } // helper to reduce boilerplate retryConfig := &config.RetryConfig{ - Attempts: helper.IntToPtr(5), - Backoff: helper.TimeToPtr(5 * time.Second), - MaxBackoff: helper.TimeToPtr(20 * time.Second), + Attempts: pointer.Of(5), + Backoff: pointer.Of(5 * time.Second), + MaxBackoff: pointer.Of(20 * time.Second), } - clientConfig.TemplateConfig.MaxStale = helper.TimeToPtr(5 * time.Second) - clientConfig.TemplateConfig.BlockQueryWaitTime = helper.TimeToPtr(60 * time.Second) + clientConfig.TemplateConfig.MaxStale = pointer.Of(5 * time.Second) + clientConfig.TemplateConfig.BlockQueryWaitTime = pointer.Of(60 * time.Second) clientConfig.TemplateConfig.Wait = waitConfig.Copy() clientConfig.TemplateConfig.ConsulRetry = retryConfig.Copy() clientConfig.TemplateConfig.VaultRetry = retryConfig.Copy() + clientConfig.TemplateConfig.NomadRetry = retryConfig.Copy() alloc := mock.Alloc() allocWithOverride := mock.Alloc() allocWithOverride.Job.TaskGroups[0].Tasks[0].Templates = []*structs.Template{ { Wait: &structs.WaitConfig{ - Min: helper.TimeToPtr(2 * time.Second), - Max: helper.TimeToPtr(12 * time.Second), + Min: pointer.Of(2 * time.Second), + Max: pointer.Of(12 * time.Second), }, }, } @@ -1985,11 +2278,12 @@ func TestTaskTemplateManager_ClientTemplateConfig_Set(t *testing.T) { { "basic-wait-config", &config.ClientTemplateConfig{ - MaxStale: helper.TimeToPtr(5 * time.Second), - BlockQueryWaitTime: helper.TimeToPtr(60 * time.Second), + MaxStale: pointer.Of(5 * time.Second), + BlockQueryWaitTime: pointer.Of(60 * time.Second), Wait: waitConfig.Copy(), ConsulRetry: retryConfig.Copy(), VaultRetry: retryConfig.Copy(), + NomadRetry: retryConfig.Copy(), }, &TaskTemplateManagerConfig{ ClientConfig: clientConfig, @@ -1998,29 +2292,31 @@ func TestTaskTemplateManager_ClientTemplateConfig_Set(t *testing.T) { }, &config.Config{ TemplateConfig: &config.ClientTemplateConfig{ - MaxStale: helper.TimeToPtr(5 * time.Second), - BlockQueryWaitTime: helper.TimeToPtr(60 * time.Second), + MaxStale: pointer.Of(5 * time.Second), + BlockQueryWaitTime: pointer.Of(60 * time.Second), Wait: waitConfig.Copy(), ConsulRetry: retryConfig.Copy(), VaultRetry: retryConfig.Copy(), + NomadRetry: retryConfig.Copy(), }, }, &templateconfig.TemplateConfig{ Wait: &templateconfig.WaitConfig{ - Enabled: helper.BoolToPtr(true), - Min: helper.TimeToPtr(5 * time.Second), - Max: helper.TimeToPtr(10 * time.Second), + Enabled: pointer.Of(true), + Min: pointer.Of(5 * time.Second), + Max: pointer.Of(10 * time.Second), }, }, }, { "template-override", &config.ClientTemplateConfig{ - MaxStale: helper.TimeToPtr(5 * time.Second), - BlockQueryWaitTime: helper.TimeToPtr(60 * time.Second), + MaxStale: pointer.Of(5 * time.Second), + BlockQueryWaitTime: pointer.Of(60 * time.Second), Wait: waitConfig.Copy(), ConsulRetry: retryConfig.Copy(), VaultRetry: retryConfig.Copy(), + NomadRetry: retryConfig.Copy(), }, &TaskTemplateManagerConfig{ ClientConfig: clientConfig, @@ -2029,33 +2325,35 @@ func TestTaskTemplateManager_ClientTemplateConfig_Set(t *testing.T) { }, &config.Config{ TemplateConfig: &config.ClientTemplateConfig{ - MaxStale: helper.TimeToPtr(5 * time.Second), - BlockQueryWaitTime: helper.TimeToPtr(60 * time.Second), + MaxStale: pointer.Of(5 * time.Second), + BlockQueryWaitTime: pointer.Of(60 * time.Second), Wait: waitConfig.Copy(), ConsulRetry: retryConfig.Copy(), VaultRetry: retryConfig.Copy(), + NomadRetry: retryConfig.Copy(), }, }, &templateconfig.TemplateConfig{ Wait: &templateconfig.WaitConfig{ - Enabled: helper.BoolToPtr(true), - Min: helper.TimeToPtr(2 * time.Second), - Max: helper.TimeToPtr(12 * time.Second), + Enabled: pointer.Of(true), + Min: pointer.Of(2 * time.Second), + Max: pointer.Of(12 * time.Second), }, }, }, { "bounds-override", &config.ClientTemplateConfig{ - MaxStale: helper.TimeToPtr(5 * time.Second), - BlockQueryWaitTime: helper.TimeToPtr(60 * time.Second), + MaxStale: pointer.Of(5 * time.Second), + BlockQueryWaitTime: pointer.Of(60 * time.Second), Wait: waitConfig.Copy(), WaitBounds: &config.WaitConfig{ - Min: helper.TimeToPtr(3 * time.Second), - Max: helper.TimeToPtr(11 * time.Second), + Min: pointer.Of(3 * time.Second), + Max: pointer.Of(11 * time.Second), }, ConsulRetry: retryConfig.Copy(), VaultRetry: retryConfig.Copy(), + NomadRetry: retryConfig.Copy(), }, &TaskTemplateManagerConfig{ ClientConfig: clientConfig, @@ -2064,30 +2362,31 @@ func TestTaskTemplateManager_ClientTemplateConfig_Set(t *testing.T) { Templates: []*structs.Template{ { Wait: &structs.WaitConfig{ - Min: helper.TimeToPtr(2 * time.Second), - Max: helper.TimeToPtr(12 * time.Second), + Min: pointer.Of(2 * time.Second), + Max: pointer.Of(12 * time.Second), }, }, }, }, &config.Config{ TemplateConfig: &config.ClientTemplateConfig{ - MaxStale: helper.TimeToPtr(5 * time.Second), - BlockQueryWaitTime: helper.TimeToPtr(60 * time.Second), + MaxStale: pointer.Of(5 * time.Second), + BlockQueryWaitTime: pointer.Of(60 * time.Second), Wait: waitConfig.Copy(), WaitBounds: &config.WaitConfig{ - Min: helper.TimeToPtr(3 * time.Second), - Max: helper.TimeToPtr(11 * time.Second), + Min: pointer.Of(3 * time.Second), + Max: pointer.Of(11 * time.Second), }, ConsulRetry: retryConfig.Copy(), VaultRetry: retryConfig.Copy(), + NomadRetry: retryConfig.Copy(), }, }, &templateconfig.TemplateConfig{ Wait: &templateconfig.WaitConfig{ - Enabled: helper.BoolToPtr(true), - Min: helper.TimeToPtr(3 * time.Second), - Max: helper.TimeToPtr(11 * time.Second), + Enabled: pointer.Of(true), + Min: pointer.Of(3 * time.Second), + Max: pointer.Of(11 * time.Second), }, }, }, @@ -2121,6 +2420,12 @@ func TestTaskTemplateManager_ClientTemplateConfig_Set(t *testing.T) { require.Equal(t, *_case.ExpectedRunnerConfig.TemplateConfig.VaultRetry.Attempts, *runnerConfig.Vault.Retry.Attempts) require.Equal(t, *_case.ExpectedRunnerConfig.TemplateConfig.VaultRetry.Backoff, *runnerConfig.Vault.Retry.Backoff) require.Equal(t, *_case.ExpectedRunnerConfig.TemplateConfig.VaultRetry.MaxBackoff, *runnerConfig.Vault.Retry.MaxBackoff) + // Nomad Retry + require.NotNil(t, runnerConfig.Nomad) + require.NotNil(t, runnerConfig.Nomad.Retry) + require.Equal(t, *_case.ExpectedRunnerConfig.TemplateConfig.NomadRetry.Attempts, *runnerConfig.Nomad.Retry.Attempts) + require.Equal(t, *_case.ExpectedRunnerConfig.TemplateConfig.NomadRetry.Backoff, *runnerConfig.Nomad.Retry.Backoff) + require.Equal(t, *_case.ExpectedRunnerConfig.TemplateConfig.NomadRetry.MaxBackoff, *runnerConfig.Nomad.Retry.MaxBackoff) // Test that wait_bounds are enforced for _, tmpl := range *runnerConfig.Templates { @@ -2150,8 +2455,8 @@ func TestTaskTemplateManager_Template_Wait_Set(t *testing.T) { Templates: []*structs.Template{ { Wait: &structs.WaitConfig{ - Min: helper.TimeToPtr(5 * time.Second), - Max: helper.TimeToPtr(10 * time.Second), + Min: pointer.Of(5 * time.Second), + Max: pointer.Of(10 * time.Second), }, }, }, @@ -2200,7 +2505,7 @@ func TestTaskTemplateManager_writeToFile_Disabled(t *testing.T) { // Check the file is not there path := filepath.Join(harness.taskDir, file) - _, err := ioutil.ReadFile(path) + _, err := os.ReadFile(path) require.Error(t, err) } @@ -2253,13 +2558,13 @@ func TestTaskTemplateManager_writeToFile(t *testing.T) { // Check the templated file is there path := filepath.Join(harness.taskDir, file) - r, err := ioutil.ReadFile(path) + r, err := os.ReadFile(path) require.NoError(t, err) require.True(t, bytes.HasSuffix(r, []byte("...done\n")), string(r)) // Check that writeToFile was allowed path = filepath.Join(harness.taskDir, "writetofile.out") - r, err = ioutil.ReadFile(path) + r, err = os.ReadFile(path) require.NoError(t, err) require.Equal(t, "hello", string(r)) } diff --git a/client/allocrunner/taskrunner/template_hook.go b/client/allocrunner/taskrunner/template_hook.go index a5ad9f8fd88..20136b03e41 100644 --- a/client/allocrunner/taskrunner/template_hook.go +++ b/client/allocrunner/taskrunner/template_hook.go @@ -54,6 +54,12 @@ type templateHook struct { templateManager *template.TaskTemplateManager managerLock sync.Mutex + // driverHandle is the task driver executor used by the template manager to + // run scripts when the template change mode is set to script. + // + // Must obtain a managerLock before changing. It may be nil. + driverHandle ti.ScriptExecutor + // consulNamespace is the current Consul namespace consulNamespace string @@ -111,6 +117,27 @@ func (h *templateHook) Prestart(ctx context.Context, req *interfaces.TaskPrestar return nil } +func (h *templateHook) Poststart(ctx context.Context, req *interfaces.TaskPoststartRequest, resp *interfaces.TaskPoststartResponse) error { + h.managerLock.Lock() + defer h.managerLock.Unlock() + + if h.templateManager == nil { + return nil + } + + if req.DriverExec != nil { + h.driverHandle = req.DriverExec + h.templateManager.SetDriverHandle(h.driverHandle) + } else { + for _, tmpl := range h.config.templates { + if tmpl.ChangeMode == structs.TemplateChangeModeScript { + return fmt.Errorf("template has change mode set to 'script' but the driver it uses does not provide exec capability") + } + } + } + return nil +} + func (h *templateHook) newManager() (unblock chan struct{}, err error) { unblock = make(chan struct{}) m, err := template.NewTaskTemplateManager(&template.TaskTemplateManagerConfig{ @@ -133,6 +160,9 @@ func (h *templateHook) newManager() (unblock chan struct{}, err error) { } h.templateManager = m + if h.driverHandle != nil { + h.templateManager.SetDriverHandle(h.driverHandle) + } return unblock, nil } @@ -148,12 +178,12 @@ func (h *templateHook) Stop(ctx context.Context, req *interfaces.TaskStopRequest return nil } -// Handle new Vault token +// Update is used to handle updates to vault and/or nomad tokens. func (h *templateHook) Update(ctx context.Context, req *interfaces.TaskUpdateRequest, resp *interfaces.TaskUpdateResponse) error { h.managerLock.Lock() defer h.managerLock.Unlock() - // Nothing to do + // no template manager to manage if h.templateManager == nil { return nil } @@ -165,15 +195,15 @@ func (h *templateHook) Update(ctx context.Context, req *interfaces.TaskUpdateReq h.vaultToken = req.VaultToken } - // Shutdown the old template + // shutdown the old template h.templateManager.Stop() h.templateManager = nil - // Create the new template + // create the new template if _, err := h.newManager(); err != nil { - err := fmt.Errorf("failed to build template manager: %v", err) + err = fmt.Errorf("failed to build template manager: %v", err) h.logger.Error("failed to build template manager", "error", err) - h.config.lifecycle.Kill(context.Background(), + _ = h.config.lifecycle.Kill(context.Background(), structs.NewTaskEvent(structs.TaskKilling). SetFailsTask(). SetDisplayMessage(fmt.Sprintf("Template update %v", err))) diff --git a/client/allocrunner/taskrunner/vault_hook.go b/client/allocrunner/taskrunner/vault_hook.go index 8aa33a429dc..87c76653ffd 100644 --- a/client/allocrunner/taskrunner/vault_hook.go +++ b/client/allocrunner/taskrunner/vault_hook.go @@ -3,7 +3,6 @@ package taskrunner import ( "context" "fmt" - "io/ioutil" "os" "path/filepath" "sync" @@ -45,19 +44,19 @@ func (tr *TaskRunner) updatedVaultToken(token string) { } type vaultHookConfig struct { - vaultStanza *structs.Vault - client vaultclient.VaultClient - events ti.EventEmitter - lifecycle ti.TaskLifecycle - updater vaultTokenUpdateHandler - logger log.Logger - alloc *structs.Allocation - task string + vaultBlock *structs.Vault + client vaultclient.VaultClient + events ti.EventEmitter + lifecycle ti.TaskLifecycle + updater vaultTokenUpdateHandler + logger log.Logger + alloc *structs.Allocation + task string } type vaultHook struct { - // vaultStanza is the vault stanza for the task - vaultStanza *structs.Vault + // vaultBlock is the vault block for the task + vaultBlock *structs.Vault // eventEmitter is used to emit events to the task eventEmitter ti.EventEmitter @@ -97,7 +96,7 @@ type vaultHook struct { func newVaultHook(config *vaultHookConfig) *vaultHook { ctx, cancel := context.WithCancel(context.Background()) h := &vaultHook{ - vaultStanza: config.vaultStanza, + vaultBlock: config.vaultBlock, client: config.client, eventEmitter: config.events, lifecycle: config.lifecycle, @@ -130,7 +129,7 @@ func (h *vaultHook) Prestart(ctx context.Context, req *interfaces.TaskPrestartRe // directory recoveredToken := "" h.tokenPath = filepath.Join(req.TaskDir.SecretsDir, vaultTokenFile) - data, err := ioutil.ReadFile(h.tokenPath) + data, err := os.ReadFile(h.tokenPath) if err != nil { if !os.IsNotExist(err) { return fmt.Errorf("failed to recover vault token: %v", err) @@ -239,9 +238,9 @@ OUTER: h.future.Set(token) if updatedToken { - switch h.vaultStanza.ChangeMode { + switch h.vaultBlock.ChangeMode { case structs.VaultChangeModeSignal: - s, err := signals.Parse(h.vaultStanza.ChangeSignal) + s, err := signals.Parse(h.vaultBlock.ChangeSignal) if err != nil { h.logger.Error("failed to parse signal", "error", err) h.lifecycle.Kill(h.ctx, @@ -252,7 +251,7 @@ OUTER: } event := structs.NewTaskEvent(structs.TaskSignaling).SetTaskSignal(s).SetDisplayMessage("Vault: new Vault token acquired") - if err := h.lifecycle.Signal(event, h.vaultStanza.ChangeSignal); err != nil { + if err := h.lifecycle.Signal(event, h.vaultBlock.ChangeSignal); err != nil { h.logger.Error("failed to send signal", "error", err) h.lifecycle.Kill(h.ctx, structs.NewTaskEvent(structs.TaskKilling). @@ -268,7 +267,7 @@ OUTER: case structs.VaultChangeModeNoop: fallthrough default: - h.logger.Error("invalid Vault change mode", "mode", h.vaultStanza.ChangeMode) + h.logger.Error("invalid Vault change mode", "mode", h.vaultBlock.ChangeMode) } // We have handled it @@ -343,7 +342,7 @@ func (h *vaultHook) deriveVaultToken() (token string, exit bool) { // writeToken writes the given token to disk func (h *vaultHook) writeToken(token string) error { - if err := ioutil.WriteFile(h.tokenPath, []byte(token), 0666); err != nil { + if err := os.WriteFile(h.tokenPath, []byte(token), 0666); err != nil { return fmt.Errorf("failed to write vault token: %v", err) } diff --git a/client/allocrunner/taskrunner/volume_hook.go b/client/allocrunner/taskrunner/volume_hook.go index 7d33d74e366..c84b891e1df 100644 --- a/client/allocrunner/taskrunner/volume_hook.go +++ b/client/allocrunner/taskrunner/volume_hook.go @@ -79,9 +79,10 @@ func (h *volumeHook) hostVolumeMountConfigurations(taskMounts []*structs.VolumeM } mcfg := &drivers.MountConfig{ - HostPath: hostVolume.Path, - TaskPath: m.Destination, - Readonly: hostVolume.ReadOnly || req.ReadOnly || m.ReadOnly, + HostPath: hostVolume.Path, + TaskPath: m.Destination, + Readonly: hostVolume.ReadOnly || req.ReadOnly || m.ReadOnly, + PropagationMode: m.PropagationMode, } mounts = append(mounts, mcfg) } @@ -171,9 +172,10 @@ func (h *volumeHook) prepareCSIVolumes(req *interfaces.TaskPrestartRequest, volu for _, m := range mountsForAlias { mcfg := &drivers.MountConfig{ - HostPath: csiMountPoint.Source, - TaskPath: m.Destination, - Readonly: request.ReadOnly || m.ReadOnly, + HostPath: csiMountPoint.Source, + TaskPath: m.Destination, + Readonly: request.ReadOnly || m.ReadOnly, + PropagationMode: m.PropagationMode, } mounts = append(mounts, mcfg) } diff --git a/client/allocrunner/testing.go b/client/allocrunner/testing.go index c369c4fe146..6a9703faac4 100644 --- a/client/allocrunner/testing.go +++ b/client/allocrunner/testing.go @@ -4,9 +4,11 @@ package allocrunner import ( + "fmt" "sync" "testing" + "github.com/hashicorp/nomad/client/allocrunner/taskrunner/getter" "github.com/hashicorp/nomad/client/allocwatcher" clientconfig "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/consul" @@ -18,6 +20,7 @@ import ( "github.com/hashicorp/nomad/client/state" "github.com/hashicorp/nomad/client/vaultclient" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/testutil" "github.com/stretchr/testify/require" ) @@ -83,7 +86,9 @@ func testAllocRunnerConfig(t *testing.T, alloc *structs.Allocation) (*Config, fu CpusetManager: new(cgutil.NoopCpusetManager), ServersContactedCh: make(chan struct{}), ServiceRegWrapper: wrapper.NewHandlerWrapper(clientConf.Logger, consulRegMock, nomadRegMock), + Getter: getter.TestDefaultGetter(t), } + return conf, cleanup } @@ -97,3 +102,13 @@ func TestAllocRunnerFromAlloc(t *testing.T, alloc *structs.Allocation) (*allocRu return ar, cleanup } + +func WaitForClientState(t *testing.T, ar *allocRunner, state string) { + testutil.WaitForResult(func() (bool, error) { + got := ar.AllocState().ClientStatus + return got == state, + fmt.Errorf("expected alloc runner to be in state %s, got %s", state, got) + }, func(err error) { + require.NoError(t, err) + }) +} diff --git a/client/allocwatcher/alloc_watcher_test.go b/client/allocwatcher/alloc_watcher_test.go index 4aa36433bdb..8d74fee06db 100644 --- a/client/allocwatcher/alloc_watcher_test.go +++ b/client/allocwatcher/alloc_watcher_test.go @@ -5,7 +5,7 @@ import ( "bytes" "context" "fmt" - "io/ioutil" + "io" "os" "path/filepath" "strings" @@ -208,11 +208,7 @@ func TestPrevAlloc_LocalPrevAlloc_Terminated(t *testing.T) { func TestPrevAlloc_StreamAllocDir_Error(t *testing.T) { ci.Parallel(t) - dest, err := ioutil.TempDir("", "nomadtest-") - if err != nil { - t.Fatalf("err: %v", err) - } - defer os.RemoveAll(dest) + dest := t.TempDir() // This test only unit tests streamAllocDir so we only need a partially // complete remotePrevAlloc @@ -232,7 +228,7 @@ func TestPrevAlloc_StreamAllocDir_Error(t *testing.T) { ModTime: time.Now(), Typeflag: tar.TypeReg, } - err = tw.WriteHeader(&fooHdr) + err := tw.WriteHeader(&fooHdr) if err != nil { t.Fatalf("error writing file header: %v", err) } @@ -259,7 +255,7 @@ func TestPrevAlloc_StreamAllocDir_Error(t *testing.T) { } // Assert streamAllocDir fails - err = prevAlloc.streamAllocDir(context.Background(), ioutil.NopCloser(tarBuf), dest) + err = prevAlloc.streamAllocDir(context.Background(), io.NopCloser(tarBuf), dest) if err == nil { t.Fatalf("expected an error from streamAllocDir") } diff --git a/client/allocwatcher/alloc_watcher_unix_test.go b/client/allocwatcher/alloc_watcher_unix_test.go index 79f8a2979bb..76a9f752b15 100644 --- a/client/allocwatcher/alloc_watcher_unix_test.go +++ b/client/allocwatcher/alloc_watcher_unix_test.go @@ -9,7 +9,6 @@ import ( "context" "fmt" "io" - "io/ioutil" "os" "path/filepath" "syscall" @@ -26,11 +25,7 @@ func TestPrevAlloc_StreamAllocDir_Ok(t *testing.T) { ci.Parallel(t) ctestutil.RequireRoot(t) - dir, err := ioutil.TempDir("", "") - if err != nil { - t.Fatalf("err: %v", err) - } - defer os.RemoveAll(dir) + dir := t.TempDir() // Create foo/ fooDir := filepath.Join(dir, "foo") @@ -124,13 +119,9 @@ func TestPrevAlloc_StreamAllocDir_Ok(t *testing.T) { } tw.Close() - dir1, err := ioutil.TempDir("", "nomadtest-") - if err != nil { - t.Fatalf("err: %v", err) - } - defer os.RemoveAll(dir1) + dir1 := t.TempDir() - rc := ioutil.NopCloser(buf) + rc := io.NopCloser(buf) prevAlloc := &remotePrevAlloc{logger: testlog.HCLogger(t)} if err := prevAlloc.streamAllocDir(context.Background(), rc, dir1); err != nil { t.Fatalf("err: %v", err) diff --git a/client/client.go b/client/client.go index 07beb2c11f1..44208333725 100644 --- a/client/client.go +++ b/client/client.go @@ -3,12 +3,10 @@ package client import ( "errors" "fmt" - "io/ioutil" "net" "net/rpc" "os" "path/filepath" - "runtime" "sort" "strconv" "strings" @@ -23,12 +21,14 @@ import ( "github.com/hashicorp/nomad/client/allocrunner" "github.com/hashicorp/nomad/client/allocrunner/interfaces" arstate "github.com/hashicorp/nomad/client/allocrunner/state" + "github.com/hashicorp/nomad/client/allocrunner/taskrunner/getter" "github.com/hashicorp/nomad/client/allocwatcher" "github.com/hashicorp/nomad/client/config" consulApi "github.com/hashicorp/nomad/client/consul" "github.com/hashicorp/nomad/client/devicemanager" "github.com/hashicorp/nomad/client/dynamicplugins" "github.com/hashicorp/nomad/client/fingerprint" + cinterfaces "github.com/hashicorp/nomad/client/interfaces" "github.com/hashicorp/nomad/client/lib/cgutil" "github.com/hashicorp/nomad/client/pluginmanager" "github.com/hashicorp/nomad/client/pluginmanager/csimanager" @@ -44,6 +44,7 @@ import ( "github.com/hashicorp/nomad/command/agent/consul" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/envoy" + "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/pool" hstats "github.com/hashicorp/nomad/helper/stats" "github.com/hashicorp/nomad/helper/tlsutil" @@ -55,6 +56,7 @@ import ( "github.com/hashicorp/nomad/plugins/drivers" vaultapi "github.com/hashicorp/vault/api" "github.com/shirou/gopsutil/v3/host" + "golang.org/x/exp/maps" ) const ( @@ -133,7 +135,7 @@ type ClientStatsReporter interface { } // AllocRunner is the interface implemented by the core alloc runner. -//TODO Create via factory to allow testing Client with mock AllocRunners. +// TODO Create via factory to allow testing Client with mock AllocRunners. type AllocRunner interface { Alloc() *structs.Allocation AllocState() *arstate.State @@ -156,6 +158,7 @@ type AllocRunner interface { PersistState() error RestartTask(taskName string, taskEvent *structs.TaskEvent) error + RestartRunning(taskEvent *structs.TaskEvent) error RestartAll(taskEvent *structs.TaskEvent) error Reconnect(update *structs.Allocation) error @@ -167,15 +170,22 @@ type AllocRunner interface { // are expected to register as a schedulable node to the servers, and to // run allocations as determined by the servers. type Client struct { - config *config.Config - start time.Time + start time.Time // stateDB is used to efficiently store client state. stateDB state.StateDB - // configCopy is a copy that should be passed to alloc-runners. - configCopy *config.Config - configLock sync.RWMutex + // config must only be accessed with lock held. To update the config, use the + // Client.UpdateConfig() helper. If you need more fine grained control use + // the following pattern: + // + // c.configLock.Lock() + // newConfig := c.config.Copy() + // // + // c.config = newConfig + // c.configLock.Unlock() + config *config.Config + configLock sync.Mutex logger hclog.InterceptLogger rpcLogger hclog.Logger @@ -319,6 +329,9 @@ type Client struct { // EnterpriseClient is used to set and check enterprise features for clients EnterpriseClient *EnterpriseClient + + // getter is an interface for retrieving artifacts. + getter cinterfaces.ArtifactGetter } var ( @@ -376,7 +389,8 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie invalidAllocs: make(map[string]struct{}), serversContactedCh: make(chan struct{}), serversContactedOnce: sync.Once{}, - cpusetManager: cgutil.CreateCPUSetManager(cfg.CgroupParent, logger), + cpusetManager: cgutil.CreateCPUSetManager(cfg.CgroupParent, cfg.ReservableCores, logger), + getter: getter.NewGetter(logger.Named("artifact_getter"), cfg.Artifact), EnterpriseClient: newEnterpriseClient(logger), } @@ -421,14 +435,8 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie return nil, fmt.Errorf("node setup failed: %v", err) } - // Store the config copy before restoring state but after it has been - // initialized. - c.configLock.Lock() - c.configCopy = c.config.Copy() - c.configLock.Unlock() - fingerprintManager := NewFingerprintManager( - c.configCopy.PluginSingletonLoader, c.GetConfig, c.configCopy.Node, + cfg.PluginSingletonLoader, c.GetConfig, cfg.Node, c.shutdownCh, c.updateNodeFromFingerprint, c.logger) c.pluginManagers = pluginmanager.New(c.logger) @@ -457,8 +465,8 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie // Setup the driver manager driverConfig := &drivermanager.Config{ Logger: c.logger, - Loader: c.configCopy.PluginSingletonLoader, - PluginConfig: c.configCopy.NomadPluginConfig(), + Loader: cfg.PluginSingletonLoader, + PluginConfig: cfg.NomadPluginConfig(), Updater: c.batchNodeUpdates.updateNodeFromDriver, EventHandlerFactory: c.GetTaskEventHandler, State: c.stateDB, @@ -472,10 +480,10 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie // Setup the device manager devConfig := &devicemanager.Config{ Logger: c.logger, - Loader: c.configCopy.PluginSingletonLoader, - PluginConfig: c.configCopy.NomadPluginConfig(), + Loader: cfg.PluginSingletonLoader, + PluginConfig: cfg.NomadPluginConfig(), Updater: c.batchNodeUpdates.updateNodeFromDevices, - StatsInterval: c.configCopy.StatsCollectionInterval, + StatsInterval: cfg.StatsCollectionInterval, State: c.stateDB, } devManager := devicemanager.New(devConfig) @@ -501,7 +509,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie go c.heartbeatStop.watch() // Add the stats collector - statsCollector := stats.NewHostStatsCollector(c.logger, c.config.AllocDir, c.devicemanager.AllStats) + statsCollector := stats.NewHostStatsCollector(c.logger, c.GetConfig().AllocDir, c.devicemanager.AllStats) c.hostStatsCollector = statsCollector // Add the garbage collector @@ -517,16 +525,14 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie go c.garbageCollector.Run() // Set the preconfigured list of static servers - c.configLock.RLock() - if len(c.configCopy.Servers) > 0 { - if _, err := c.setServersImpl(c.configCopy.Servers, true); err != nil { + if len(cfg.Servers) > 0 { + if _, err := c.setServersImpl(cfg.Servers, true); err != nil { logger.Warn("none of the configured servers are valid", "error", err) } } - c.configLock.RUnlock() // Setup Consul discovery if enabled - if c.configCopy.ConsulConfig.ClientAutoJoin != nil && *c.configCopy.ConsulConfig.ClientAutoJoin { + if cfg.ConsulConfig.ClientAutoJoin != nil && *cfg.ConsulConfig.ClientAutoJoin { c.shutdownGroup.Go(c.consulDiscovery) if c.servers.NumServers() == 0 { // No configured servers; trigger discovery manually @@ -560,7 +566,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie "The safest way to proceed is to manually stop running task processes "+ "and remove Nomad's state and alloc directories before "+ "restarting. Lost allocations will be rescheduled.", - "state_dir", c.config.StateDir, "alloc_dir", c.config.AllocDir) + "state_dir", cfg.StateDir, "alloc_dir", cfg.AllocDir) logger.Error("Corrupt state is often caused by a bug. Please " + "report as much information as possible to " + "https://github.com/hashicorp/nomad/issues") @@ -594,14 +600,15 @@ func (c *Client) Ready() <-chan struct{} { // needed before we begin starting its various components. func (c *Client) init() error { // Ensure the state dir exists if we have one - if c.config.StateDir != "" { - if err := os.MkdirAll(c.config.StateDir, 0700); err != nil { + conf := c.GetConfig() + if conf.StateDir != "" { + if err := os.MkdirAll(conf.StateDir, 0700); err != nil { return fmt.Errorf("failed creating state dir: %s", err) } } else { // Otherwise make a temp directory to use. - p, err := ioutil.TempDir("", "NomadClient") + p, err := os.MkdirTemp("", "NomadClient") if err != nil { return fmt.Errorf("failed creating temporary directory for the StateDir: %v", err) } @@ -611,12 +618,14 @@ func (c *Client) init() error { return fmt.Errorf("failed to find temporary directory for the StateDir: %v", err) } - c.config.StateDir = p + conf = c.UpdateConfig(func(c *config.Config) { + c.StateDir = p + }) } - c.logger.Info("using state directory", "state_dir", c.config.StateDir) + c.logger.Info("using state directory", "state_dir", conf.StateDir) // Open the state database - db, err := c.config.StateDBFactory(c.logger, c.config.StateDir) + db, err := conf.StateDBFactory(c.logger, conf.StateDir) if err != nil { return fmt.Errorf("failed to open state database: %v", err) } @@ -634,13 +643,13 @@ func (c *Client) init() error { c.stateDB = db // Ensure the alloc dir exists if we have one - if c.config.AllocDir != "" { - if err := os.MkdirAll(c.config.AllocDir, 0711); err != nil { + if conf.AllocDir != "" { + if err := os.MkdirAll(conf.AllocDir, 0711); err != nil { return fmt.Errorf("failed creating alloc dir: %s", err) } } else { // Otherwise make a temp directory to use. - p, err := ioutil.TempDir("", "NomadClient") + p, err := os.MkdirTemp("", "NomadClient") if err != nil { return fmt.Errorf("failed creating temporary directory for the AllocDir: %v", err) } @@ -655,38 +664,28 @@ func (c *Client) init() error { return fmt.Errorf("failed to change directory permissions for the AllocDir: %v", err) } - c.config.AllocDir = p + conf = c.UpdateConfig(func(c *config.Config) { + c.AllocDir = p + }) } - c.logger.Info("using alloc directory", "alloc_dir", c.config.AllocDir) + c.logger.Info("using alloc directory", "alloc_dir", conf.AllocDir) reserved := "" - if c.config.Node != nil && c.config.Node.ReservedResources != nil { + if conf.Node != nil && conf.Node.ReservedResources != nil { // Node should always be non-nil due to initialization in the // agent package, but don't risk a panic just for a long line. - reserved = c.config.Node.ReservedResources.Networks.ReservedHostPorts + reserved = conf.Node.ReservedResources.Networks.ReservedHostPorts } c.logger.Info("using dynamic ports", - "min", c.config.MinDynamicPort, - "max", c.config.MaxDynamicPort, + "min", conf.MinDynamicPort, + "max", conf.MaxDynamicPort, "reserved", reserved, ) - // Ensure cgroups are created on linux platform - if runtime.GOOS == "linux" && c.cpusetManager != nil { - // use the client configuration for reservable_cores if set - cores := c.config.ReservableCores - if len(cores) == 0 { - // otherwise lookup the effective cores from the parent cgroup - cores, _ = cgutil.GetCPUsFromCgroup(c.config.CgroupParent) - } - if cpuErr := c.cpusetManager.Init(cores); cpuErr != nil { - // If the client cannot initialize the cgroup then reserved cores will not be reported and the cpuset manager - // will be disabled. this is common when running in dev mode under a non-root user for example. - c.logger.Warn("failed to initialize cpuset cgroup subsystem, cpuset management disabled", "error", cpuErr) - c.cpusetManager = new(cgutil.NoopCpusetManager) - } - } + // startup the CPUSet manager + c.cpusetManager.Init() + return nil } @@ -713,9 +712,9 @@ func (c *Client) reloadTLSConnections(newConfig *nconfig.TLSConfig) error { // Keep the client configuration up to date as we use configuration values to // decide on what type of connections to accept - c.configLock.Lock() - c.config.TLSConfig = newConfig - c.configLock.Unlock() + c.UpdateConfig(func(c *config.Config) { + c.TLSConfig = newConfig + }) c.connPool.ReloadTLS(tlsWrap) @@ -724,7 +723,8 @@ func (c *Client) reloadTLSConnections(newConfig *nconfig.TLSConfig) error { // Reload allows a client to reload its configuration on the fly func (c *Client) Reload(newConfig *config.Config) error { - shouldReloadTLS, err := tlsutil.ShouldReloadRPCConnections(c.config.TLSConfig, newConfig.TLSConfig) + existing := c.GetConfig() + shouldReloadTLS, err := tlsutil.ShouldReloadRPCConnections(existing.TLSConfig, newConfig.TLSConfig) if err != nil { c.logger.Error("error parsing TLS configuration", "error", err) return err @@ -743,31 +743,50 @@ func (c *Client) Leave() error { return nil } -// GetConfig returns the config of the client +// GetConfig returns the config of the client. Do *not* mutate without first +// calling Copy(). func (c *Client) GetConfig() *config.Config { c.configLock.Lock() defer c.configLock.Unlock() - return c.configCopy + return c.config +} + +// UpdateConfig allows mutating the configuration. The updated configuration is +// returned. +func (c *Client) UpdateConfig(cb func(*config.Config)) *config.Config { + c.configLock.Lock() + defer c.configLock.Unlock() + + // Create a copy of the active config + newConfig := c.config.Copy() + + // Pass the copy to the supplied callback for mutation + cb(newConfig) + + // Set new config struct + c.config = newConfig + + return newConfig } // Datacenter returns the datacenter for the given client func (c *Client) Datacenter() string { - return c.config.Node.Datacenter + return c.GetConfig().Node.Datacenter } // Region returns the region for the given client func (c *Client) Region() string { - return c.config.Region + return c.GetConfig().Region } // NodeID returns the node ID for the given client func (c *Client) NodeID() string { - return c.config.Node.ID + return c.GetConfig().Node.ID } // secretNodeID returns the secret node ID for the given client func (c *Client) secretNodeID() string { - return c.config.Node.SecretID + return c.GetConfig().Node.SecretID } // Shutdown is used to tear down the client @@ -790,7 +809,7 @@ func (c *Client) Shutdown() error { c.garbageCollector.Stop() arGroup := group{} - if c.config.DevMode { + if c.GetConfig().DevMode { // In DevMode destroy all the running allocations. for _, ar := range c.getAllocRunners() { ar.Destroy() @@ -881,27 +900,36 @@ func (c *Client) CollectAllAllocs() { c.garbageCollector.CollectAll() } -func (c *Client) RestartAllocation(allocID, taskName string) error { +func (c *Client) RestartAllocation(allocID, taskName string, allTasks bool) error { + if allTasks && taskName != "" { + return fmt.Errorf("task name cannot be set when restarting all tasks") + } + ar, err := c.getAllocRunner(allocID) if err != nil { return err } - event := structs.NewTaskEvent(structs.TaskRestartSignal). - SetRestartReason("User requested restart") - if taskName != "" { + event := structs.NewTaskEvent(structs.TaskRestartSignal). + SetRestartReason("User requested task to restart") return ar.RestartTask(taskName, event) } - return ar.RestartAll(event) + if allTasks { + event := structs.NewTaskEvent(structs.TaskRestartSignal). + SetRestartReason("User requested all tasks to restart") + return ar.RestartAll(event) + } + + event := structs.NewTaskEvent(structs.TaskRestartSignal). + SetRestartReason("User requested running tasks to restart") + return ar.RestartRunning(event) } // Node returns the locally registered node func (c *Client) Node() *structs.Node { - c.configLock.RLock() - defer c.configLock.RUnlock() - return c.configCopy.Node + return c.GetConfig().Node } // getAllocRunner returns an AllocRunner or an UnknownAllocation error if the @@ -997,11 +1025,12 @@ func (c *Client) computeAllocatedDeviceGroupStats(devices []*structs.AllocatedDe // allocation, and has been created by a trusted party that has privileged // knowledge of the client's secret identifier func (c *Client) ValidateMigrateToken(allocID, migrateToken string) bool { - if !c.config.ACLEnabled { + conf := c.GetConfig() + if !conf.ACLEnabled { return true } - return structs.CompareMigrateToken(allocID, c.secretNodeID(), migrateToken) + return structs.CompareMigrateToken(allocID, conf.Node.SecretID, migrateToken) } // GetAllocFS returns the AllocFS interface for the alloc dir of an allocation @@ -1104,7 +1133,8 @@ func (c *Client) setServersImpl(in []string, force bool) (int, error) { // If there are errors restoring a specific allocation it is marked // as failed whenever possible. func (c *Client) restoreState() error { - if c.config.DevMode { + conf := c.GetConfig() + if conf.DevMode { return nil } @@ -1148,11 +1178,10 @@ func (c *Client) restoreState() error { prevAllocWatcher := allocwatcher.NoopPrevAlloc{} prevAllocMigrator := allocwatcher.NoopPrevAlloc{} - c.configLock.RLock() arConf := &allocrunner.Config{ Alloc: alloc, Logger: c.logger, - ClientConfig: c.configCopy, + ClientConfig: conf, StateDB: c.stateDB, StateUpdater: c, DeviceStatsReporter: c, @@ -1170,8 +1199,8 @@ func (c *Client) restoreState() error { ServersContactedCh: c.serversContactedCh, ServiceRegWrapper: c.serviceRegWrapper, RPCClient: c, + Getter: c.getter, } - c.configLock.RUnlock() ar, err := allocrunner.NewAllocRunner(arConf) if err != nil { @@ -1231,8 +1260,8 @@ func (c *Client) restoreState() error { // wait until it gets allocs from server to launch them. // // See: -// * https://github.com/hashicorp/nomad/pull/6207 -// * https://github.com/hashicorp/nomad/issues/5984 +// - https://github.com/hashicorp/nomad/pull/6207 +// - https://github.com/hashicorp/nomad/issues/5984 // // COMPAT(0.12): remove once upgrading from 0.9.5 is no longer supported func (c *Client) hasLocalState(alloc *structs.Allocation) bool { @@ -1315,13 +1344,13 @@ func (c *Client) NumAllocs() int { return n } -// nodeID restores, or generates if necessary, a unique node ID and SecretID. -// The node ID is, if available, a persistent unique ID. The secret ID is a -// high-entropy random UUID. -func (c *Client) nodeID() (id, secret string, err error) { +// ensureNodeID restores, or generates if necessary, a unique node ID and +// SecretID. The node ID is, if available, a persistent unique ID. The secret +// ID is a high-entropy random UUID. +func ensureNodeID(conf *config.Config) (id, secret string, err error) { var hostID string hostInfo, err := host.Info() - if !c.config.NoHostUUID && err == nil { + if !conf.NoHostUUID && err == nil { if hashed, ok := helper.HashUUID(hostInfo.HostID); ok { hostID = hashed } @@ -1334,20 +1363,20 @@ func (c *Client) nodeID() (id, secret string, err error) { } // Do not persist in dev mode - if c.config.DevMode { + if conf.DevMode { return hostID, uuid.Generate(), nil } // Attempt to read existing ID - idPath := filepath.Join(c.config.StateDir, "client-id") - idBuf, err := ioutil.ReadFile(idPath) + idPath := filepath.Join(conf.StateDir, "client-id") + idBuf, err := os.ReadFile(idPath) if err != nil && !os.IsNotExist(err) { return "", "", err } // Attempt to read existing secret ID - secretPath := filepath.Join(c.config.StateDir, "secret-id") - secretBuf, err := ioutil.ReadFile(secretPath) + secretPath := filepath.Join(conf.StateDir, "secret-id") + secretBuf, err := os.ReadFile(secretPath) if err != nil && !os.IsNotExist(err) { return "", "", err } @@ -1359,7 +1388,7 @@ func (c *Client) nodeID() (id, secret string, err error) { id = hostID // Persist the ID - if err := ioutil.WriteFile(idPath, []byte(id), 0700); err != nil { + if err := os.WriteFile(idPath, []byte(id), 0700); err != nil { return "", "", err } } @@ -1371,7 +1400,7 @@ func (c *Client) nodeID() (id, secret string, err error) { secret = uuid.Generate() // Persist the ID - if err := ioutil.WriteFile(secretPath, []byte(secret), 0700); err != nil { + if err := os.WriteFile(secretPath, []byte(secret), 0700); err != nil { return "", "", err } } @@ -1381,13 +1410,18 @@ func (c *Client) nodeID() (id, secret string, err error) { // setupNode is used to setup the initial node func (c *Client) setupNode() error { - node := c.config.Node + c.configLock.Lock() + defer c.configLock.Unlock() + + newConfig := c.config.Copy() + node := newConfig.Node if node == nil { node = &structs.Node{} - c.config.Node = node + newConfig.Node = node } + // Generate an ID and secret for the node - id, secretID, err := c.nodeID() + id, secretID, err := ensureNodeID(newConfig) if err != nil { return fmt.Errorf("node ID setup failed: %v", err) } @@ -1414,8 +1448,8 @@ func (c *Client) setupNode() error { } if node.NodeResources == nil { node.NodeResources = &structs.NodeResources{} - node.NodeResources.MinDynamicPort = c.config.MinDynamicPort - node.NodeResources.MaxDynamicPort = c.config.MaxDynamicPort + node.NodeResources.MinDynamicPort = newConfig.MinDynamicPort + node.NodeResources.MaxDynamicPort = newConfig.MaxDynamicPort } if node.ReservedResources == nil { node.ReservedResources = &structs.NodeReservedResources{} @@ -1432,11 +1466,11 @@ func (c *Client) setupNode() error { if node.Name == "" { node.Name, _ = os.Hostname() } - node.CgroupParent = c.config.CgroupParent + node.CgroupParent = newConfig.CgroupParent if node.HostVolumes == nil { - if l := len(c.config.HostVolumes); l != 0 { + if l := len(newConfig.HostVolumes); l != 0 { node.HostVolumes = make(map[string]*structs.ClientHostVolumeConfig, l) - for k, v := range c.config.HostVolumes { + for k, v := range newConfig.HostVolumes { if _, err := os.Stat(v.Path); err != nil { return fmt.Errorf("failed to validate volume %s, err: %v", v.Name, err) } @@ -1445,9 +1479,9 @@ func (c *Client) setupNode() error { } } if node.HostNetworks == nil { - if l := len(c.config.HostNetworks); l != 0 { + if l := len(newConfig.HostNetworks); l != 0 { node.HostNetworks = make(map[string]*structs.ClientHostNetworkConfig, l) - for k, v := range c.config.HostNetworks { + for k, v := range newConfig.HostNetworks { node.HostNetworks[k] = v.Copy() } } @@ -1472,6 +1506,7 @@ func (c *Client) setupNode() error { node.Meta["connect.proxy_concurrency"] = defaultConnectProxyConcurrency } + c.config = newConfig return nil } @@ -1482,34 +1517,35 @@ func (c *Client) updateNodeFromFingerprint(response *fingerprint.FingerprintResp defer c.configLock.Unlock() nodeHasChanged := false + newConfig := c.config.Copy() for name, newVal := range response.Attributes { - oldVal := c.config.Node.Attributes[name] + oldVal := newConfig.Node.Attributes[name] if oldVal == newVal { continue } nodeHasChanged = true if newVal == "" { - delete(c.config.Node.Attributes, name) + delete(newConfig.Node.Attributes, name) } else { - c.config.Node.Attributes[name] = newVal + newConfig.Node.Attributes[name] = newVal } } // update node links and resources from the diff created from // fingerprinting for name, newVal := range response.Links { - oldVal := c.config.Node.Links[name] + oldVal := newConfig.Node.Links[name] if oldVal == newVal { continue } nodeHasChanged = true if newVal == "" { - delete(c.config.Node.Links, name) + delete(newConfig.Node.Links, name) } else { - c.config.Node.Links[name] = newVal + newConfig.Node.Links[name] = newVal } } @@ -1519,9 +1555,9 @@ func (c *Client) updateNodeFromFingerprint(response *fingerprint.FingerprintResp if response.Resources != nil { response.Resources.Networks = updateNetworks( response.Resources.Networks, - c.config) - if !c.config.Node.Resources.Equals(response.Resources) { - c.config.Node.Resources.Merge(response.Resources) + newConfig) + if !newConfig.Node.Resources.Equals(response.Resources) { + newConfig.Node.Resources.Merge(response.Resources) nodeHasChanged = true } } @@ -1531,26 +1567,27 @@ func (c *Client) updateNodeFromFingerprint(response *fingerprint.FingerprintResp if response.NodeResources != nil { response.NodeResources.Networks = updateNetworks( response.NodeResources.Networks, - c.config) - if !c.config.Node.NodeResources.Equals(response.NodeResources) { - c.config.Node.NodeResources.Merge(response.NodeResources) + newConfig) + if !newConfig.Node.NodeResources.Equals(response.NodeResources) { + newConfig.Node.NodeResources.Merge(response.NodeResources) nodeHasChanged = true } - response.NodeResources.MinDynamicPort = c.config.MinDynamicPort - response.NodeResources.MaxDynamicPort = c.config.MaxDynamicPort - if c.config.Node.NodeResources.MinDynamicPort != response.NodeResources.MinDynamicPort || - c.config.Node.NodeResources.MaxDynamicPort != response.NodeResources.MaxDynamicPort { + response.NodeResources.MinDynamicPort = newConfig.MinDynamicPort + response.NodeResources.MaxDynamicPort = newConfig.MaxDynamicPort + if newConfig.Node.NodeResources.MinDynamicPort != response.NodeResources.MinDynamicPort || + newConfig.Node.NodeResources.MaxDynamicPort != response.NodeResources.MaxDynamicPort { nodeHasChanged = true } } if nodeHasChanged { - c.updateNodeLocked() + c.config = newConfig + c.updateNode() } - return c.configCopy.Node + return newConfig.Node } // updateNetworks filters and overrides network speed of host networks based @@ -1591,7 +1628,7 @@ func updateNetworks(up structs.Networks, c *config.Config) structs.Networks { // retryIntv calculates a retry interval value given the base func (c *Client) retryIntv(base time.Duration) time.Duration { - if c.config.DevMode { + if c.GetConfig().DevMode { return devModeRetryIntv } return base + helper.RandomStagger(base) @@ -1613,7 +1650,7 @@ func (c *Client) registerAndHeartbeat() { // we want to do this quickly. We want to do it extra quickly // in development mode. var heartbeat <-chan time.Time - if c.config.DevMode { + if c.GetConfig().DevMode { heartbeat = time.After(0) } else { heartbeat = time.After(helper.RandomStagger(initialHeartbeatStagger)) @@ -1657,7 +1694,7 @@ func (c *Client) lastHeartbeat() time.Time { // getHeartbeatRetryIntv is used to retrieve the time to wait before attempting // another heartbeat. func (c *Client) getHeartbeatRetryIntv(err error) time.Duration { - if c.config.DevMode { + if c.GetConfig().DevMode { return devModeRetryIntv } @@ -1825,7 +1862,7 @@ func (c *Client) retryRegisterNode() { } retryIntv := registerRetryIntv - if err == noServersErr { + if err == noServersErr || structs.IsErrNoRegionPath(err) { c.logger.Debug("registration waiting on servers") c.triggerDiscovery() retryIntv = noServerRetryIntv @@ -1843,9 +1880,8 @@ func (c *Client) retryRegisterNode() { // registerNode is used to register the node or update the registration func (c *Client) registerNode() error { - node := c.Node() req := structs.NodeRegisterRequest{ - Node: node, + Node: c.Node(), WriteRequest: structs.WriteRequest{Region: c.Region()}, } var resp structs.NodeUpdateResponse @@ -1853,11 +1889,15 @@ func (c *Client) registerNode() error { return err } + err := c.handleNodeUpdateResponse(resp) + if err != nil { + return err + } + // Update the node status to ready after we register. - c.configLock.Lock() - node.Status = structs.NodeStatusReady - c.config.Node.Status = structs.NodeStatusReady - c.configLock.Unlock() + c.UpdateConfig(func(c *config.Config) { + c.Node.Status = structs.NodeStatusReady + }) c.logger.Info("node registration complete") if len(resp.EvalIDs) != 0 { @@ -1868,6 +1908,7 @@ func (c *Client) registerNode() error { defer c.heartbeatLock.Unlock() c.heartbeatStop.setLastOk(time.Now()) c.heartbeatTTL = resp.HeartbeatTTL + return nil } @@ -1911,6 +1952,30 @@ func (c *Client) updateNodeStatus() error { } } + // Check heartbeat response for information about the server-side scheduling + // state of this node + c.UpdateConfig(func(c *config.Config) { + if resp.SchedulingEligibility != "" { + c.Node.SchedulingEligibility = resp.SchedulingEligibility + } + }) + + err := c.handleNodeUpdateResponse(resp) + if err != nil { + return fmt.Errorf("heartbeat response returned no valid servers") + } + + // If there's no Leader in the response we may be talking to a partitioned + // server. Redo discovery to ensure our server list is up to date. + if resp.LeaderRPCAddr == "" { + c.triggerDiscovery() + } + + c.EnterpriseClient.SetFeatures(resp.Features) + return nil +} + +func (c *Client) handleNodeUpdateResponse(resp structs.NodeUpdateResponse) error { // Update the number of nodes in the cluster so we can adjust our server // rebalance rate. c.servers.SetNumNodes(resp.NumNodes) @@ -1927,20 +1992,9 @@ func (c *Client) updateNodeStatus() error { nomadServers = append(nomadServers, e) } if len(nomadServers) == 0 { - return fmt.Errorf("heartbeat response returned no valid servers") + return noServersErr } c.servers.SetServers(nomadServers) - - // Begin polling Consul if there is no Nomad leader. We could be - // heartbeating to a Nomad server that is in the minority of a - // partition of the Nomad server quorum, but this Nomad Agent still - // has connectivity to the existing majority of Nomad Servers, but - // only if it queries Consul. - if resp.LeaderRPCAddr == "" { - c.triggerDiscovery() - } - - c.EnterpriseClient.SetFeatures(resp.Features) return nil } @@ -2240,14 +2294,9 @@ OUTER: } } -// updateNode updates the Node copy and triggers the client to send the updated -// Node to the server. This should be done while the caller holds the -// configLock lock. -func (c *Client) updateNodeLocked() { - // Update the config copy. - node := c.config.Node.Copy() - c.configCopy.Node = node - +// updateNode signals the client to send the updated +// Node to the server. +func (c *Client) updateNode() { select { case c.triggerNodeUpdate <- struct{}{}: // Node update goroutine was released to execute @@ -2365,7 +2414,7 @@ func makeFailedAlloc(add *structs.Allocation, err error) *structs.Allocation { stripped.DeploymentStatus = add.DeploymentStatus.Copy() } else { stripped.DeploymentStatus = &structs.AllocDeploymentStatus{ - Healthy: helper.BoolToPtr(false), + Healthy: pointer.Of(false), Timestamp: failTime, } } @@ -2426,8 +2475,11 @@ func (c *Client) updateAlloc(update *structs.Allocation) { return } - // Reconnect unknown allocations - if update.ClientStatus == structs.AllocClientStatusUnknown && update.AllocModifyIndex > ar.Alloc().AllocModifyIndex { + // Reconnect unknown allocations if they were updated and are not terminal. + reconnect := update.ClientStatus == structs.AllocClientStatusUnknown && + update.AllocModifyIndex > ar.Alloc().AllocModifyIndex && + !update.ServerTerminalStatus() + if reconnect { err = ar.Reconnect(update) if err != nil { c.logger.Error("error reconnecting alloc", "alloc_id", update.ID, "alloc_modify_index", update.AllocModifyIndex, "err", err) @@ -2477,20 +2529,16 @@ func (c *Client) addAlloc(alloc *structs.Allocation, migrateToken string) error PreviousRunner: c.allocs[alloc.PreviousAllocation], PreemptedRunners: preemptedAllocs, RPC: c, - Config: c.configCopy, + Config: c.GetConfig(), MigrateToken: migrateToken, Logger: c.logger, } prevAllocWatcher, prevAllocMigrator := allocwatcher.NewAllocWatcher(watcherConfig) - // Copy the config since the node can be swapped out as it is being updated. - // The long term fix is to pass in the config and node separately and then - // we don't have to do a copy. - c.configLock.RLock() arConf := &allocrunner.Config{ Alloc: alloc, Logger: c.logger, - ClientConfig: c.configCopy, + ClientConfig: c.GetConfig(), StateDB: c.stateDB, Consul: c.consulService, ConsulProxies: c.consulProxies, @@ -2507,8 +2555,8 @@ func (c *Client) addAlloc(alloc *structs.Allocation, migrateToken string) error DriverManager: c.drivermanager, ServiceRegWrapper: c.serviceRegWrapper, RPCClient: c, + Getter: c.getter, } - c.configLock.RUnlock() ar, err := allocrunner.NewAllocRunner(arConf) if err != nil { @@ -2537,7 +2585,7 @@ func (c *Client) setupConsulTokenClient() error { // with vault. func (c *Client) setupVaultClient() error { var err error - c.vaultClient, err = vaultclient.NewVaultClient(c.config.VaultConfig, c.logger, c.deriveToken) + c.vaultClient, err = vaultclient.NewVaultClient(c.GetConfig().VaultConfig, c.logger, c.deriveToken) if err != nil { return err } @@ -2558,7 +2606,7 @@ func (c *Client) setupVaultClient() error { func (c *Client) setupNomadServiceRegistrationHandler() { cfg := nsd.ServiceRegistrationHandlerCfg{ Datacenter: c.Datacenter(), - Enabled: c.config.NomadServiceDiscovery, + Enabled: c.GetConfig().NomadServiceDiscovery, NodeID: c.NodeID(), NodeSecret: c.secretNodeID(), Region: c.Region(), @@ -2586,8 +2634,9 @@ func (c *Client) deriveToken(alloc *structs.Allocation, taskNames []string, vcli AllocID: alloc.ID, Tasks: verifiedTasks, QueryOptions: structs.QueryOptions{ - Region: c.Region(), - AllowStale: false, + Region: c.Region(), + AllowStale: false, + MinQueryIndex: alloc.CreateIndex, }, } @@ -2692,7 +2741,7 @@ func (c *Client) deriveSIToken(alloc *structs.Allocation, taskNames []string) (m // https://www.consul.io/api/acl/tokens.html#read-a-token // https://www.consul.io/docs/internals/security.html - m := helper.CopyMapStringString(resp.Tokens) + m := maps.Clone(resp.Tokens) return m, nil } @@ -2737,7 +2786,8 @@ func taskIsPresent(taskName string, tasks []*structs.Task) bool { // triggerDiscovery causes a Consul discovery to begin (if one hasn't already) func (c *Client) triggerDiscovery() { - if c.configCopy.ConsulConfig.ClientAutoJoin != nil && *c.configCopy.ConsulConfig.ClientAutoJoin { + config := c.GetConfig() + if config.ConsulConfig.ClientAutoJoin != nil && *config.ConsulConfig.ClientAutoJoin { select { case c.triggerDiscoveryCh <- struct{}{}: // Discovery goroutine was released to execute @@ -2779,18 +2829,10 @@ func (c *Client) consulDiscoveryImpl() error { // datacenterQueryLimit, the next heartbeat will pick // a new set of servers so it's okay. shuffleStrings(dcs[1:]) - dcs = dcs[0:helper.MinInt(len(dcs), datacenterQueryLimit)] + dcs = dcs[0:helper.Min(len(dcs), datacenterQueryLimit)] } - // Query for servers in this client's region only - region := c.Region() - rpcargs := structs.GenericRequest{ - QueryOptions: structs.QueryOptions{ - Region: region, - }, - } - - serviceName := c.configCopy.ConsulConfig.ServerServiceName + serviceName := c.GetConfig().ConsulConfig.ServerServiceName var mErr multierror.Error var nomadServers servers.Servers consulLogger.Debug("bootstrap contacting Consul DCs", "consul_dcs", dcs) @@ -2819,26 +2861,15 @@ DISCOLOOP: mErr.Errors = append(mErr.Errors, err) continue } - var peers []string - if err := c.connPool.RPC(region, addr, "Status.Peers", rpcargs, &peers); err != nil { - mErr.Errors = append(mErr.Errors, err) - continue - } - // Successfully received the Server peers list of the correct - // region - for _, p := range peers { - addr, err := net.ResolveTCPAddr("tcp", p) - if err != nil { - mErr.Errors = append(mErr.Errors, err) - } - srv := &servers.Server{Addr: addr} - nomadServers = append(nomadServers, srv) - } - if len(nomadServers) > 0 { - break DISCOLOOP - } + srv := &servers.Server{Addr: addr} + nomadServers = append(nomadServers, srv) } + + if len(nomadServers) > 0 { + break DISCOLOOP + } + } if len(nomadServers) == 0 { if len(mErr.Errors) > 0 { @@ -2884,13 +2915,14 @@ func (c *Client) emitStats() { next := time.NewTimer(0) defer next.Stop() for { + config := c.GetConfig() select { case <-next.C: err := c.hostStatsCollector.Collect() - next.Reset(c.config.StatsCollectionInterval) + next.Reset(config.StatsCollectionInterval) if err != nil { c.logger.Warn("error fetching host resource usage stats", "error", err) - } else if c.config.PublishNodeMetrics { + } else if config.PublishNodeMetrics { // Publish Node metrics if operator has opted in c.emitHostStats() } @@ -2951,9 +2983,7 @@ func (c *Client) setGaugeForDiskStats(nodeID string, hStats *stats.HostStats, ba // setGaugeForAllocationStats proxies metrics for allocation specific statistics func (c *Client) setGaugeForAllocationStats(nodeID string, baseLabels []metrics.Label) { - c.configLock.RLock() - node := c.configCopy.Node - c.configLock.RUnlock() + node := c.GetConfig().Node total := node.NodeResources res := node.ReservedResources allocated := c.getAllocatedResources(node) @@ -3052,14 +3082,11 @@ func (c *Client) emitClientMetrics() { // labels takes the base labels and appends the node state func (c *Client) labels() []metrics.Label { - c.configLock.RLock() - nodeStatus := c.configCopy.Node.Status - nodeEligibility := c.configCopy.Node.SchedulingEligibility - c.configLock.RUnlock() + node := c.Node() return append(c.baseLabels, - metrics.Label{Name: "node_status", Value: nodeStatus}, - metrics.Label{Name: "node_scheduling_eligibility", Value: nodeEligibility}, + metrics.Label{Name: "node_status", Value: node.Status}, + metrics.Label{Name: "node_scheduling_eligibility", Value: node.SchedulingEligibility}, ) } diff --git a/client/client_test.go b/client/client_test.go index 8750b9531e9..c9b4b6fb98b 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -2,7 +2,6 @@ package client import ( "fmt" - "io/ioutil" "net" "os" "path/filepath" @@ -16,6 +15,7 @@ import ( trstate "github.com/hashicorp/nomad/client/allocrunner/taskrunner/state" "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/client/fingerprint" + "github.com/hashicorp/nomad/client/lib/cgutil" regMock "github.com/hashicorp/nomad/client/serviceregistration/mock" cstate "github.com/hashicorp/nomad/client/state" "github.com/hashicorp/nomad/command/agent/consul" @@ -192,63 +192,55 @@ func TestClient_Fingerprint_Periodic(t *testing.T) { }) defer cleanup() - node := c1.config.Node - { - // Ensure the mock driver is registered on the client - testutil.WaitForResult(func() (bool, error) { - c1.configLock.Lock() - defer c1.configLock.Unlock() + // Ensure the mock driver is registered on the client + testutil.WaitForResult(func() (bool, error) { + node := c1.Node() - // assert that the driver is set on the node attributes - mockDriverInfoAttr := node.Attributes["driver.mock_driver"] - if mockDriverInfoAttr == "" { - return false, fmt.Errorf("mock driver is empty when it should be set on the node attributes") - } + // assert that the driver is set on the node attributes + mockDriverInfoAttr := node.Attributes["driver.mock_driver"] + if mockDriverInfoAttr == "" { + return false, fmt.Errorf("mock driver is empty when it should be set on the node attributes") + } - mockDriverInfo := node.Drivers["mock_driver"] + mockDriverInfo := node.Drivers["mock_driver"] - // assert that the Driver information for the node is also set correctly - if mockDriverInfo == nil { - return false, fmt.Errorf("mock driver is nil when it should be set on node Drivers") - } - if !mockDriverInfo.Detected { - return false, fmt.Errorf("mock driver should be set as detected") - } - if !mockDriverInfo.Healthy { - return false, fmt.Errorf("mock driver should be set as healthy") - } - if mockDriverInfo.HealthDescription == "" { - return false, fmt.Errorf("mock driver description should not be empty") - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - } + // assert that the Driver information for the node is also set correctly + if mockDriverInfo == nil { + return false, fmt.Errorf("mock driver is nil when it should be set on node Drivers") + } + if !mockDriverInfo.Detected { + return false, fmt.Errorf("mock driver should be set as detected") + } + if !mockDriverInfo.Healthy { + return false, fmt.Errorf("mock driver should be set as healthy") + } + if mockDriverInfo.HealthDescription == "" { + return false, fmt.Errorf("mock driver description should not be empty") + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) - { - testutil.WaitForResult(func() (bool, error) { - c1.configLock.Lock() - defer c1.configLock.Unlock() - mockDriverInfo := node.Drivers["mock_driver"] - // assert that the Driver information for the node is also set correctly - if mockDriverInfo == nil { - return false, fmt.Errorf("mock driver is nil when it should be set on node Drivers") - } - if mockDriverInfo.Detected { - return false, fmt.Errorf("mock driver should not be set as detected") - } - if mockDriverInfo.Healthy { - return false, fmt.Errorf("mock driver should not be set as healthy") - } - if mockDriverInfo.HealthDescription == "" { - return false, fmt.Errorf("mock driver description should not be empty") - } - return true, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - } + testutil.WaitForResult(func() (bool, error) { + mockDriverInfo := c1.Node().Drivers["mock_driver"] + // assert that the Driver information for the node is also set correctly + if mockDriverInfo == nil { + return false, fmt.Errorf("mock driver is nil when it should be set on node Drivers") + } + if mockDriverInfo.Detected { + return false, fmt.Errorf("mock driver should not be set as detected") + } + if mockDriverInfo.Healthy { + return false, fmt.Errorf("mock driver should not be set as healthy") + } + if mockDriverInfo.HealthDescription == "" { + return false, fmt.Errorf("mock driver description should not be empty") + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) } // TestClient_MixedTLS asserts that when a server is running with TLS enabled @@ -508,7 +500,7 @@ func TestClient_WatchAllocs(t *testing.T) { }) // Delete one allocation - if err := state.DeleteEval(103, nil, []string{alloc1.ID}); err != nil { + if err := state.DeleteEval(103, nil, []string{alloc1.ID}, false); err != nil { t.Fatalf("err: %v", err) } @@ -734,11 +726,8 @@ func TestClient_AddAllocError(t *testing.T) { func TestClient_Init(t *testing.T) { ci.Parallel(t) - dir, err := ioutil.TempDir("", "nomad") - if err != nil { - t.Fatalf("err: %s", err) - } - defer os.RemoveAll(dir) + dir := t.TempDir() + allocDir := filepath.Join(dir, "alloc") config := config.DefaultConfig() @@ -749,8 +738,9 @@ func TestClient_Init(t *testing.T) { config.Node = mock.Node() client := &Client{ - config: config, - logger: testlog.HCLogger(t), + config: config, + logger: testlog.HCLogger(t), + cpusetManager: new(cgutil.NoopCpusetManager), } if err := client.init(); err != nil { @@ -1120,17 +1110,18 @@ func TestClient_UpdateNodeFromDevicesAccumulates(t *testing.T) { }) // initial check + conf := client.GetConfig() expectedResources := &structs.NodeResources{ // computed through test client initialization - Networks: client.configCopy.Node.NodeResources.Networks, - NodeNetworks: client.configCopy.Node.NodeResources.NodeNetworks, - Disk: client.configCopy.Node.NodeResources.Disk, + Networks: conf.Node.NodeResources.Networks, + NodeNetworks: conf.Node.NodeResources.NodeNetworks, + Disk: conf.Node.NodeResources.Disk, // injected Cpu: structs.NodeCpuResources{ CpuShares: 123, - ReservableCpuCores: client.configCopy.Node.NodeResources.Cpu.ReservableCpuCores, - TotalCpuCores: client.configCopy.Node.NodeResources.Cpu.TotalCpuCores, + ReservableCpuCores: conf.Node.NodeResources.Cpu.ReservableCpuCores, + TotalCpuCores: conf.Node.NodeResources.Cpu.TotalCpuCores, }, Memory: structs.NodeMemoryResources{MemoryMB: 1024}, Devices: []*structs.NodeDeviceResource{ @@ -1141,7 +1132,7 @@ func TestClient_UpdateNodeFromDevicesAccumulates(t *testing.T) { }, } - assert.EqualValues(t, expectedResources, client.configCopy.Node.NodeResources) + assert.EqualValues(t, expectedResources, conf.Node.NodeResources) // overrides of values @@ -1162,17 +1153,19 @@ func TestClient_UpdateNodeFromDevicesAccumulates(t *testing.T) { }, }) + conf = client.GetConfig() + expectedResources2 := &structs.NodeResources{ // computed through test client initialization - Networks: client.configCopy.Node.NodeResources.Networks, - NodeNetworks: client.configCopy.Node.NodeResources.NodeNetworks, - Disk: client.configCopy.Node.NodeResources.Disk, + Networks: conf.Node.NodeResources.Networks, + NodeNetworks: conf.Node.NodeResources.NodeNetworks, + Disk: conf.Node.NodeResources.Disk, // injected Cpu: structs.NodeCpuResources{ CpuShares: 123, - ReservableCpuCores: client.configCopy.Node.NodeResources.Cpu.ReservableCpuCores, - TotalCpuCores: client.configCopy.Node.NodeResources.Cpu.TotalCpuCores, + ReservableCpuCores: conf.Node.NodeResources.Cpu.ReservableCpuCores, + TotalCpuCores: conf.Node.NodeResources.Cpu.TotalCpuCores, }, Memory: structs.NodeMemoryResources{MemoryMB: 2048}, Devices: []*structs.NodeDeviceResource{ @@ -1187,7 +1180,7 @@ func TestClient_UpdateNodeFromDevicesAccumulates(t *testing.T) { }, } - assert.EqualValues(t, expectedResources2, client.configCopy.Node.NodeResources) + assert.EqualValues(t, expectedResources2, conf.Node.NodeResources) } diff --git a/client/config/artifact.go b/client/config/artifact.go new file mode 100644 index 00000000000..3b1a6a94ad4 --- /dev/null +++ b/client/config/artifact.go @@ -0,0 +1,86 @@ +package config + +import ( + "fmt" + "time" + + "github.com/dustin/go-humanize" + "github.com/hashicorp/nomad/nomad/structs/config" +) + +// ArtifactConfig is the internal readonly copy of the client agent's +// ArtifactConfig. +type ArtifactConfig struct { + HTTPReadTimeout time.Duration + HTTPMaxBytes int64 + + GCSTimeout time.Duration + GitTimeout time.Duration + HgTimeout time.Duration + S3Timeout time.Duration + + DecompressionLimitSize int64 + DecompressionLimitFileCount int +} + +// ArtifactConfigFromAgent creates a new internal readonly copy of the client +// agent's ArtifactConfig. The config should have already been validated. +func ArtifactConfigFromAgent(c *config.ArtifactConfig) (*ArtifactConfig, error) { + newConfig := &ArtifactConfig{} + + t, err := time.ParseDuration(*c.HTTPReadTimeout) + if err != nil { + return nil, fmt.Errorf("error parsing HTTPReadTimeout: %w", err) + } + newConfig.HTTPReadTimeout = t + + s, err := humanize.ParseBytes(*c.HTTPMaxSize) + if err != nil { + return nil, fmt.Errorf("error parsing HTTPMaxSize: %w", err) + } + newConfig.HTTPMaxBytes = int64(s) + + t, err = time.ParseDuration(*c.GCSTimeout) + if err != nil { + return nil, fmt.Errorf("error parsing GCSTimeout: %w", err) + } + newConfig.GCSTimeout = t + + t, err = time.ParseDuration(*c.GitTimeout) + if err != nil { + return nil, fmt.Errorf("error parsing GitTimeout: %w", err) + } + newConfig.GitTimeout = t + + t, err = time.ParseDuration(*c.HgTimeout) + if err != nil { + return nil, fmt.Errorf("error parsing HgTimeout: %w", err) + } + newConfig.HgTimeout = t + + t, err = time.ParseDuration(*c.S3Timeout) + if err != nil { + return nil, fmt.Errorf("error parsing S3Timeout: %w", err) + } + newConfig.S3Timeout = t + + s, err = humanize.ParseBytes(*c.DecompressionSizeLimit) + if err != nil { + return nil, fmt.Errorf("error parsing DecompressionLimitSize: %w", err) + } + newConfig.DecompressionLimitSize = int64(s) + + // no parsing its just an int + newConfig.DecompressionLimitFileCount = *c.DecompressionFileCountLimit + + return newConfig, nil +} + +func (a *ArtifactConfig) Copy() *ArtifactConfig { + if a == nil { + return nil + } + + newCopy := *a + return &newCopy +} diff --git a/client/config/artifact_test.go b/client/config/artifact_test.go new file mode 100644 index 00000000000..a79b4b2b72f --- /dev/null +++ b/client/config/artifact_test.go @@ -0,0 +1,155 @@ +package config + +import ( + "testing" + "time" + + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper/pointer" + "github.com/hashicorp/nomad/nomad/structs/config" + "github.com/stretchr/testify/require" +) + +func TestArtifactConfigFromAgent(t *testing.T) { + ci.Parallel(t) + + testCases := []struct { + name string + config *config.ArtifactConfig + expected *ArtifactConfig + expectedError string + }{ + { + name: "from default", + config: config.DefaultArtifactConfig(), + expected: &ArtifactConfig{ + HTTPReadTimeout: 30 * time.Minute, + HTTPMaxBytes: 100_000_000_000, + GCSTimeout: 30 * time.Minute, + GitTimeout: 30 * time.Minute, + HgTimeout: 30 * time.Minute, + S3Timeout: 30 * time.Minute, + }, + }, + { + name: "invalid http read timeout", + config: &config.ArtifactConfig{ + HTTPReadTimeout: pointer.Of("invalid"), + HTTPMaxSize: pointer.Of("100GB"), + GCSTimeout: pointer.Of("30m"), + GitTimeout: pointer.Of("30m"), + HgTimeout: pointer.Of("30m"), + S3Timeout: pointer.Of("30m"), + }, + expectedError: "error parsing HTTPReadTimeout", + }, + { + name: "invalid http max size", + config: &config.ArtifactConfig{ + HTTPReadTimeout: pointer.Of("30m"), + HTTPMaxSize: pointer.Of("invalid"), + GCSTimeout: pointer.Of("30m"), + GitTimeout: pointer.Of("30m"), + HgTimeout: pointer.Of("30m"), + S3Timeout: pointer.Of("30m"), + }, + expectedError: "error parsing HTTPMaxSize", + }, + { + name: "invalid gcs timeout", + config: &config.ArtifactConfig{ + HTTPReadTimeout: pointer.Of("30m"), + HTTPMaxSize: pointer.Of("100GB"), + GCSTimeout: pointer.Of("invalid"), + GitTimeout: pointer.Of("30m"), + HgTimeout: pointer.Of("30m"), + S3Timeout: pointer.Of("30m"), + }, + expectedError: "error parsing GCSTimeout", + }, + { + name: "invalid git timeout", + config: &config.ArtifactConfig{ + HTTPReadTimeout: pointer.Of("30m"), + HTTPMaxSize: pointer.Of("100GB"), + GCSTimeout: pointer.Of("30m"), + GitTimeout: pointer.Of("invalid"), + HgTimeout: pointer.Of("30m"), + S3Timeout: pointer.Of("30m"), + }, + expectedError: "error parsing GitTimeout", + }, + { + name: "invalid hg timeout", + config: &config.ArtifactConfig{ + HTTPReadTimeout: pointer.Of("30m"), + HTTPMaxSize: pointer.Of("100GB"), + GCSTimeout: pointer.Of("30m"), + GitTimeout: pointer.Of("30m"), + HgTimeout: pointer.Of("invalid"), + S3Timeout: pointer.Of("30m"), + }, + expectedError: "error parsing HgTimeout", + }, + { + name: "invalid s3 timeout", + config: &config.ArtifactConfig{ + HTTPReadTimeout: pointer.Of("30m"), + HTTPMaxSize: pointer.Of("100GB"), + GCSTimeout: pointer.Of("30m"), + GitTimeout: pointer.Of("30m"), + HgTimeout: pointer.Of("30m"), + S3Timeout: pointer.Of("invalid"), + }, + expectedError: "error parsing S3Timeout", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + got, err := ArtifactConfigFromAgent(tc.config) + + if tc.expectedError != "" { + require.Error(t, err) + require.Contains(t, err.Error(), tc.expectedError) + } else { + require.NoError(t, err) + require.Equal(t, tc.expected, got) + } + }) + } +} + +func TestArtifactConfig_Copy(t *testing.T) { + ci.Parallel(t) + + config := &ArtifactConfig{ + HTTPReadTimeout: time.Minute, + HTTPMaxBytes: 1000, + GCSTimeout: 2 * time.Minute, + GitTimeout: time.Second, + HgTimeout: time.Hour, + S3Timeout: 5 * time.Minute, + } + + // make sure values are copied. + configCopy := config.Copy() + require.Equal(t, config, configCopy) + + // modify copy and make sure original doesn't change. + configCopy.HTTPReadTimeout = 5 * time.Minute + configCopy.HTTPMaxBytes = 2000 + configCopy.GCSTimeout = 5 * time.Second + configCopy.GitTimeout = 3 * time.Second + configCopy.HgTimeout = 2 * time.Hour + configCopy.S3Timeout = 10 * time.Minute + + require.Equal(t, &ArtifactConfig{ + HTTPReadTimeout: time.Minute, + HTTPMaxBytes: 1000, + GCSTimeout: 2 * time.Minute, + GitTimeout: time.Second, + HgTimeout: time.Hour, + S3Timeout: 5 * time.Minute, + }, config) +} diff --git a/client/config/config.go b/client/config/config.go index c1d59ea6b9e..4c6a9fc9fff 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -13,12 +13,14 @@ import ( "github.com/hashicorp/consul-template/config" "github.com/hashicorp/nomad/client/lib/cgutil" "github.com/hashicorp/nomad/command/agent/host" + "golang.org/x/exp/maps" + "golang.org/x/exp/slices" log "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/client/state" - "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/bufconndialer" "github.com/hashicorp/nomad/helper/pluginutils/loader" + "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/nomad/structs" structsc "github.com/hashicorp/nomad/nomad/structs/config" "github.com/hashicorp/nomad/plugins/base" @@ -64,7 +66,7 @@ var ( "/run/systemd/resolve": "/run/systemd/resolve", } - DefaultTemplateMaxStale = 5 * time.Second + DefaultTemplateMaxStale = 87600 * time.Hour DefaultTemplateFunctionDenylist = []string{"plugin", "writeToFile"} ) @@ -290,6 +292,9 @@ type Config struct { // TemplateDialer is our custom HTTP dialer for consul-template. This is // used for template functions which require access to the Nomad API. TemplateDialer *bufconndialer.BufConnWrapper + + // Artifact configuration from the agent's config file. + Artifact *ArtifactConfig } // ClientTemplateConfig is configuration on the client specific to template @@ -355,6 +360,13 @@ type ClientTemplateConfig struct { // to wait for the cluster to become available, as is customary in distributed // systems. VaultRetry *RetryConfig `hcl:"vault_retry,optional"` + + // This controls the retry behavior when an error is returned from Nomad. + // Consul Template is highly fault tolerant, meaning it does not exit in the + // face of failure. Instead, it uses exponential back-off and retry functions + // to wait for the cluster to become available, as is customary in distributed + // systems. + NomadRetry *RetryConfig `hcl:"nomad_retry,optional"` } // Copy returns a deep copy of a ClientTemplateConfig @@ -367,7 +379,7 @@ func (c *ClientTemplateConfig) Copy() *ClientTemplateConfig { *nc = *c if len(c.FunctionDenylist) > 0 { - nc.FunctionDenylist = helper.CopySliceString(nc.FunctionDenylist) + nc.FunctionDenylist = slices.Clone(nc.FunctionDenylist) } else if c.FunctionDenylist != nil { // Explicitly no functions denied (which is different than nil) nc.FunctionDenylist = []string{} @@ -393,6 +405,10 @@ func (c *ClientTemplateConfig) Copy() *ClientTemplateConfig { nc.VaultRetry = c.VaultRetry.Copy() } + if c.NomadRetry != nil { + nc.NomadRetry = c.NomadRetry.Copy() + } + return nc } @@ -410,7 +426,8 @@ func (c *ClientTemplateConfig) IsEmpty() bool { c.MaxStaleHCL == "" && c.Wait.IsEmpty() && c.ConsulRetry.IsEmpty() && - c.VaultRetry.IsEmpty() + c.VaultRetry.IsEmpty() && + c.NomadRetry.IsEmpty() } // WaitConfig is mirrored from templateconfig.WaitConfig because we need to handle @@ -521,7 +538,7 @@ func (wc *WaitConfig) ToConsulTemplate() (*config.WaitConfig, error) { return nil, err } - result := &config.WaitConfig{Enabled: helper.BoolToPtr(true)} + result := &config.WaitConfig{Enabled: pointer.Of(true)} if wc.Min != nil { result.Min = wc.Min @@ -664,7 +681,7 @@ func (rc *RetryConfig) ToConsulTemplate() (*config.RetryConfig, error) { return nil, err } - result := &config.RetryConfig{Enabled: helper.BoolToPtr(true)} + result := &config.RetryConfig{Enabled: pointer.Of(true)} if rc.Attempts != nil { result.Attempts = rc.Attempts @@ -682,20 +699,21 @@ func (rc *RetryConfig) ToConsulTemplate() (*config.RetryConfig, error) { } func (c *Config) Copy() *Config { - nc := new(Config) - *nc = *c + if c == nil { + return nil + } + + nc := *c nc.Node = nc.Node.Copy() - nc.Servers = helper.CopySliceString(nc.Servers) - nc.Options = helper.CopyMapStringString(nc.Options) + nc.Servers = slices.Clone(nc.Servers) + nc.Options = maps.Clone(nc.Options) nc.HostVolumes = structs.CopyMapStringClientHostVolumeConfig(nc.HostVolumes) nc.ConsulConfig = c.ConsulConfig.Copy() nc.VaultConfig = c.VaultConfig.Copy() nc.TemplateConfig = c.TemplateConfig.Copy() - if c.ReservableCores != nil { - nc.ReservableCores = make([]uint16, len(c.ReservableCores)) - copy(nc.ReservableCores, c.ReservableCores) - } - return nc + nc.ReservableCores = slices.Clone(c.ReservableCores) + nc.Artifact = c.Artifact.Copy() + return &nc } // DefaultConfig returns the default configuration @@ -717,8 +735,23 @@ func DefaultConfig() *Config { NoHostUUID: true, DisableRemoteExec: false, TemplateConfig: &ClientTemplateConfig{ - FunctionDenylist: DefaultTemplateFunctionDenylist, - DisableSandbox: false, + FunctionDenylist: DefaultTemplateFunctionDenylist, + DisableSandbox: false, + BlockQueryWaitTime: pointer.Of(5 * time.Minute), // match Consul default + MaxStale: pointer.Of(DefaultTemplateMaxStale), // match Consul default + Wait: &WaitConfig{ + Min: pointer.Of(5 * time.Second), + Max: pointer.Of(4 * time.Minute), + }, + ConsulRetry: &RetryConfig{ + Attempts: pointer.Of(0), // unlimited + }, + VaultRetry: &RetryConfig{ + Attempts: pointer.Of(0), // unlimited + }, + NomadRetry: &RetryConfig{ + Attempts: pointer.Of(0), // unlimited + }, }, RPCHoldTimeout: 5 * time.Second, CNIPath: "/opt/cni/bin", diff --git a/client/config/config_test.go b/client/config/config_test.go index 88f5bd1b741..fada640e25b 100644 --- a/client/config/config_test.go +++ b/client/config/config_test.go @@ -6,7 +6,7 @@ import ( "github.com/hashicorp/consul-template/config" "github.com/hashicorp/nomad/ci" - "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/pointer" "github.com/stretchr/testify/require" ) @@ -49,8 +49,8 @@ func TestConfigReadDefault(t *testing.T) { func mockWaitConfig() *WaitConfig { return &WaitConfig{ - Min: helper.TimeToPtr(5 * time.Second), - Max: helper.TimeToPtr(10 * time.Second), + Min: pointer.Of(5 * time.Second), + Max: pointer.Of(10 * time.Second), } } @@ -66,26 +66,26 @@ func TestWaitConfig_Copy(t *testing.T) { "fully-populated", mockWaitConfig(), &WaitConfig{ - Min: helper.TimeToPtr(5 * time.Second), - Max: helper.TimeToPtr(10 * time.Second), + Min: pointer.Of(5 * time.Second), + Max: pointer.Of(10 * time.Second), }, }, { "min-only", &WaitConfig{ - Min: helper.TimeToPtr(5 * time.Second), + Min: pointer.Of(5 * time.Second), }, &WaitConfig{ - Min: helper.TimeToPtr(5 * time.Second), + Min: pointer.Of(5 * time.Second), }, }, { "max-only", &WaitConfig{ - Max: helper.TimeToPtr(5 * time.Second), + Max: pointer.Of(5 * time.Second), }, &WaitConfig{ - Max: helper.TimeToPtr(5 * time.Second), + Max: pointer.Of(5 * time.Second), }, }, } @@ -122,7 +122,7 @@ func TestWaitConfig_IsEmpty(t *testing.T) { { "is-not-empty", &WaitConfig{ - Min: helper.TimeToPtr(10 * time.Second), + Min: pointer.Of(10 * time.Second), }, false, }, @@ -148,8 +148,8 @@ func TestWaitConfig_IsEqual(t *testing.T) { "are-equal", mockWaitConfig(), &WaitConfig{ - Min: helper.TimeToPtr(5 * time.Second), - Max: helper.TimeToPtr(10 * time.Second), + Min: pointer.Of(5 * time.Second), + Max: pointer.Of(10 * time.Second), }, true, }, @@ -157,8 +157,8 @@ func TestWaitConfig_IsEqual(t *testing.T) { "min-different", mockWaitConfig(), &WaitConfig{ - Min: helper.TimeToPtr(4 * time.Second), - Max: helper.TimeToPtr(10 * time.Second), + Min: pointer.Of(4 * time.Second), + Max: pointer.Of(10 * time.Second), }, false, }, @@ -166,8 +166,8 @@ func TestWaitConfig_IsEqual(t *testing.T) { "max-different", mockWaitConfig(), &WaitConfig{ - Min: helper.TimeToPtr(5 * time.Second), - Max: helper.TimeToPtr(9 * time.Second), + Min: pointer.Of(5 * time.Second), + Max: pointer.Of(9 * time.Second), }, false, }, @@ -191,8 +191,8 @@ func TestWaitConfig_IsValid(t *testing.T) { { "is-valid", &WaitConfig{ - Min: helper.TimeToPtr(5 * time.Second), - Max: helper.TimeToPtr(10 * time.Second), + Min: pointer.Of(5 * time.Second), + Max: pointer.Of(10 * time.Second), }, "", }, @@ -209,15 +209,15 @@ func TestWaitConfig_IsValid(t *testing.T) { { "min-greater-than-max", &WaitConfig{ - Min: helper.TimeToPtr(10 * time.Second), - Max: helper.TimeToPtr(5 * time.Second), + Min: pointer.Of(10 * time.Second), + Max: pointer.Of(5 * time.Second), }, "greater than", }, { "max-not-set", &WaitConfig{ - Min: helper.TimeToPtr(10 * time.Second), + Min: pointer.Of(10 * time.Second), }, "", }, @@ -248,36 +248,36 @@ func TestWaitConfig_Merge(t *testing.T) { "all-fields", mockWaitConfig(), &WaitConfig{ - Min: helper.TimeToPtr(4 * time.Second), - Max: helper.TimeToPtr(9 * time.Second), + Min: pointer.Of(4 * time.Second), + Max: pointer.Of(9 * time.Second), }, &WaitConfig{ - Min: helper.TimeToPtr(4 * time.Second), - Max: helper.TimeToPtr(9 * time.Second), + Min: pointer.Of(4 * time.Second), + Max: pointer.Of(9 * time.Second), }, }, { "min-only", mockWaitConfig(), &WaitConfig{ - Min: helper.TimeToPtr(4 * time.Second), - Max: helper.TimeToPtr(10 * time.Second), + Min: pointer.Of(4 * time.Second), + Max: pointer.Of(10 * time.Second), }, &WaitConfig{ - Min: helper.TimeToPtr(4 * time.Second), - Max: helper.TimeToPtr(10 * time.Second), + Min: pointer.Of(4 * time.Second), + Max: pointer.Of(10 * time.Second), }, }, { "max-only", mockWaitConfig(), &WaitConfig{ - Min: helper.TimeToPtr(5 * time.Second), - Max: helper.TimeToPtr(9 * time.Second), + Min: pointer.Of(5 * time.Second), + Max: pointer.Of(9 * time.Second), }, &WaitConfig{ - Min: helper.TimeToPtr(5 * time.Second), - Max: helper.TimeToPtr(9 * time.Second), + Min: pointer.Of(5 * time.Second), + Max: pointer.Of(9 * time.Second), }, }, } @@ -298,14 +298,14 @@ func TestWaitConfig_ToConsulTemplate(t *testing.T) { ci.Parallel(t) expected := config.WaitConfig{ - Enabled: helper.BoolToPtr(true), - Min: helper.TimeToPtr(5 * time.Second), - Max: helper.TimeToPtr(10 * time.Second), + Enabled: pointer.Of(true), + Min: pointer.Of(5 * time.Second), + Max: pointer.Of(10 * time.Second), } clientWaitConfig := &WaitConfig{ - Min: helper.TimeToPtr(5 * time.Second), - Max: helper.TimeToPtr(10 * time.Second), + Min: pointer.Of(5 * time.Second), + Max: pointer.Of(10 * time.Second), } actual, err := clientWaitConfig.ToConsulTemplate() @@ -316,10 +316,10 @@ func TestWaitConfig_ToConsulTemplate(t *testing.T) { func mockRetryConfig() *RetryConfig { return &RetryConfig{ - Attempts: helper.IntToPtr(5), - Backoff: helper.TimeToPtr(5 * time.Second), + Attempts: pointer.Of(5), + Backoff: pointer.Of(5 * time.Second), BackoffHCL: "5s", - MaxBackoff: helper.TimeToPtr(10 * time.Second), + MaxBackoff: pointer.Of(10 * time.Second), MaxBackoffHCL: "10s", } } @@ -335,29 +335,29 @@ func TestRetryConfig_Copy(t *testing.T) { "fully-populated", mockRetryConfig(), &RetryConfig{ - Attempts: helper.IntToPtr(5), - Backoff: helper.TimeToPtr(5 * time.Second), + Attempts: pointer.Of(5), + Backoff: pointer.Of(5 * time.Second), BackoffHCL: "5s", - MaxBackoff: helper.TimeToPtr(10 * time.Second), + MaxBackoff: pointer.Of(10 * time.Second), MaxBackoffHCL: "10s", }, }, { "attempts-only", &RetryConfig{ - Attempts: helper.IntToPtr(5), + Attempts: pointer.Of(5), }, &RetryConfig{ - Attempts: helper.IntToPtr(5), + Attempts: pointer.Of(5), }, }, { "backoff-only", &RetryConfig{ - Backoff: helper.TimeToPtr(5 * time.Second), + Backoff: pointer.Of(5 * time.Second), }, &RetryConfig{ - Backoff: helper.TimeToPtr(5 * time.Second), + Backoff: pointer.Of(5 * time.Second), }, }, { @@ -372,10 +372,10 @@ func TestRetryConfig_Copy(t *testing.T) { { "max-backoff-only", &RetryConfig{ - MaxBackoff: helper.TimeToPtr(10 * time.Second), + MaxBackoff: pointer.Of(10 * time.Second), }, &RetryConfig{ - MaxBackoff: helper.TimeToPtr(10 * time.Second), + MaxBackoff: pointer.Of(10 * time.Second), }, }, { @@ -421,7 +421,7 @@ func TestRetryConfig_IsEmpty(t *testing.T) { { "is-not-empty", &RetryConfig{ - Attempts: helper.IntToPtr(12), + Attempts: pointer.Of(12), }, false, }, @@ -447,10 +447,10 @@ func TestRetryConfig_IsEqual(t *testing.T) { "are-equal", mockRetryConfig(), &RetryConfig{ - Attempts: helper.IntToPtr(5), - Backoff: helper.TimeToPtr(5 * time.Second), + Attempts: pointer.Of(5), + Backoff: pointer.Of(5 * time.Second), BackoffHCL: "5s", - MaxBackoff: helper.TimeToPtr(10 * time.Second), + MaxBackoff: pointer.Of(10 * time.Second), MaxBackoffHCL: "10s", }, true, @@ -459,10 +459,10 @@ func TestRetryConfig_IsEqual(t *testing.T) { "attempts-different", mockRetryConfig(), &RetryConfig{ - Attempts: helper.IntToPtr(4), - Backoff: helper.TimeToPtr(5 * time.Second), + Attempts: pointer.Of(4), + Backoff: pointer.Of(5 * time.Second), BackoffHCL: "5s", - MaxBackoff: helper.TimeToPtr(10 * time.Second), + MaxBackoff: pointer.Of(10 * time.Second), MaxBackoffHCL: "10s", }, false, @@ -471,10 +471,10 @@ func TestRetryConfig_IsEqual(t *testing.T) { "backoff-different", mockRetryConfig(), &RetryConfig{ - Attempts: helper.IntToPtr(5), - Backoff: helper.TimeToPtr(4 * time.Second), + Attempts: pointer.Of(5), + Backoff: pointer.Of(4 * time.Second), BackoffHCL: "5s", - MaxBackoff: helper.TimeToPtr(10 * time.Second), + MaxBackoff: pointer.Of(10 * time.Second), MaxBackoffHCL: "10s", }, false, @@ -483,10 +483,10 @@ func TestRetryConfig_IsEqual(t *testing.T) { "backoff-hcl-different", mockRetryConfig(), &RetryConfig{ - Attempts: helper.IntToPtr(5), - Backoff: helper.TimeToPtr(5 * time.Second), + Attempts: pointer.Of(5), + Backoff: pointer.Of(5 * time.Second), BackoffHCL: "4s", - MaxBackoff: helper.TimeToPtr(10 * time.Second), + MaxBackoff: pointer.Of(10 * time.Second), MaxBackoffHCL: "10s", }, false, @@ -495,10 +495,10 @@ func TestRetryConfig_IsEqual(t *testing.T) { "max-backoff-different", mockRetryConfig(), &RetryConfig{ - Attempts: helper.IntToPtr(5), - Backoff: helper.TimeToPtr(5 * time.Second), + Attempts: pointer.Of(5), + Backoff: pointer.Of(5 * time.Second), BackoffHCL: "5s", - MaxBackoff: helper.TimeToPtr(9 * time.Second), + MaxBackoff: pointer.Of(9 * time.Second), MaxBackoffHCL: "10s", }, false, @@ -507,10 +507,10 @@ func TestRetryConfig_IsEqual(t *testing.T) { "max-backoff-hcl-different", mockRetryConfig(), &RetryConfig{ - Attempts: helper.IntToPtr(5), - Backoff: helper.TimeToPtr(5 * time.Second), + Attempts: pointer.Of(5), + Backoff: pointer.Of(5 * time.Second), BackoffHCL: "5s", - MaxBackoff: helper.TimeToPtr(10 * time.Second), + MaxBackoff: pointer.Of(10 * time.Second), MaxBackoffHCL: "9s", }, false, @@ -535,8 +535,8 @@ func TestRetryConfig_IsValid(t *testing.T) { { "is-valid", &RetryConfig{ - Backoff: helper.TimeToPtr(5 * time.Second), - MaxBackoff: helper.TimeToPtr(10 * time.Second), + Backoff: pointer.Of(5 * time.Second), + MaxBackoff: pointer.Of(10 * time.Second), }, "", }, @@ -553,30 +553,30 @@ func TestRetryConfig_IsValid(t *testing.T) { { "backoff-greater-than-max-backoff", &RetryConfig{ - Backoff: helper.TimeToPtr(10 * time.Second), - MaxBackoff: helper.TimeToPtr(5 * time.Second), + Backoff: pointer.Of(10 * time.Second), + MaxBackoff: pointer.Of(5 * time.Second), }, "greater than max_backoff", }, { "backoff-not-set", &RetryConfig{ - MaxBackoff: helper.TimeToPtr(10 * time.Second), + MaxBackoff: pointer.Of(10 * time.Second), }, "", }, { "max-backoff-not-set", &RetryConfig{ - Backoff: helper.TimeToPtr(2 * time.Minute), + Backoff: pointer.Of(2 * time.Minute), }, "greater than default", }, { "max-backoff-unbounded", &RetryConfig{ - Backoff: helper.TimeToPtr(10 * time.Second), - MaxBackoff: helper.TimeToPtr(0 * time.Second), + Backoff: pointer.Of(10 * time.Second), + MaxBackoff: pointer.Of(0 * time.Second), }, "", }, @@ -607,17 +607,17 @@ func TestRetryConfig_Merge(t *testing.T) { "all-fields", mockRetryConfig(), &RetryConfig{ - Attempts: helper.IntToPtr(4), - Backoff: helper.TimeToPtr(4 * time.Second), + Attempts: pointer.Of(4), + Backoff: pointer.Of(4 * time.Second), BackoffHCL: "4s", - MaxBackoff: helper.TimeToPtr(9 * time.Second), + MaxBackoff: pointer.Of(9 * time.Second), MaxBackoffHCL: "9s", }, &RetryConfig{ - Attempts: helper.IntToPtr(4), - Backoff: helper.TimeToPtr(4 * time.Second), + Attempts: pointer.Of(4), + Backoff: pointer.Of(4 * time.Second), BackoffHCL: "4s", - MaxBackoff: helper.TimeToPtr(9 * time.Second), + MaxBackoff: pointer.Of(9 * time.Second), MaxBackoffHCL: "9s", }, }, @@ -625,17 +625,17 @@ func TestRetryConfig_Merge(t *testing.T) { "attempts-only", mockRetryConfig(), &RetryConfig{ - Attempts: helper.IntToPtr(4), - Backoff: helper.TimeToPtr(5 * time.Second), + Attempts: pointer.Of(4), + Backoff: pointer.Of(5 * time.Second), BackoffHCL: "5s", - MaxBackoff: helper.TimeToPtr(10 * time.Second), + MaxBackoff: pointer.Of(10 * time.Second), MaxBackoffHCL: "10s", }, &RetryConfig{ - Attempts: helper.IntToPtr(4), - Backoff: helper.TimeToPtr(5 * time.Second), + Attempts: pointer.Of(4), + Backoff: pointer.Of(5 * time.Second), BackoffHCL: "5s", - MaxBackoff: helper.TimeToPtr(10 * time.Second), + MaxBackoff: pointer.Of(10 * time.Second), MaxBackoffHCL: "10s", }, }, @@ -643,17 +643,17 @@ func TestRetryConfig_Merge(t *testing.T) { "multi-field", mockRetryConfig(), &RetryConfig{ - Attempts: helper.IntToPtr(5), - Backoff: helper.TimeToPtr(4 * time.Second), + Attempts: pointer.Of(5), + Backoff: pointer.Of(4 * time.Second), BackoffHCL: "4s", - MaxBackoff: helper.TimeToPtr(9 * time.Second), + MaxBackoff: pointer.Of(9 * time.Second), MaxBackoffHCL: "9s", }, &RetryConfig{ - Attempts: helper.IntToPtr(5), - Backoff: helper.TimeToPtr(4 * time.Second), + Attempts: pointer.Of(5), + Backoff: pointer.Of(4 * time.Second), BackoffHCL: "4s", - MaxBackoff: helper.TimeToPtr(9 * time.Second), + MaxBackoff: pointer.Of(9 * time.Second), MaxBackoffHCL: "9s", }, }, @@ -675,10 +675,10 @@ func TestRetryConfig_ToConsulTemplate(t *testing.T) { ci.Parallel(t) expected := config.RetryConfig{ - Enabled: helper.BoolToPtr(true), - Attempts: helper.IntToPtr(5), - Backoff: helper.TimeToPtr(5 * time.Second), - MaxBackoff: helper.TimeToPtr(10 * time.Second), + Enabled: pointer.Of(true), + Attempts: pointer.Of(5), + Backoff: pointer.Of(5 * time.Second), + MaxBackoff: pointer.Of(10 * time.Second), } actual := mockRetryConfig() diff --git a/client/config/testing.go b/client/config/testing.go index a326cfbd6eb..f06fbe748e3 100644 --- a/client/config/testing.go +++ b/client/config/testing.go @@ -1,12 +1,12 @@ package config import ( - "io/ioutil" "os" "path/filepath" + "time" "github.com/hashicorp/nomad/ci" - "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/nomad/mock" testing "github.com/mitchellh/go-testing-interface" @@ -30,7 +30,7 @@ func TestClientConfig(t testing.T) (*Config, func()) { tmpDir = filepath.Clean(tmpDir) // Create a tempdir to hold state and alloc subdirs - parent, err := ioutil.TempDir(tmpDir, "nomadtest") + parent, err := os.MkdirTemp(tmpDir, "nomadtest") if err != nil { t.Fatalf("error creating client dir: %v", err) } @@ -58,11 +58,15 @@ func TestClientConfig(t testing.T) (*Config, func()) { // Helps make sure we are respecting configured parent conf.CgroupParent = "testing.slice" - conf.VaultConfig.Enabled = helper.BoolToPtr(false) + conf.VaultConfig.Enabled = pointer.Of(false) conf.DevMode = true // Loosen GC threshold conf.GCDiskUsageThreshold = 98.0 conf.GCInodeUsageThreshold = 98.0 + + // Same as default; necessary for task Event messages + conf.MaxKillTimeout = 30 * time.Second + return conf, cleanup } diff --git a/client/devicemanager/manager_test.go b/client/devicemanager/manager_test.go index c3da419e204..337f5c79525 100644 --- a/client/devicemanager/manager_test.go +++ b/client/devicemanager/manager_test.go @@ -11,8 +11,8 @@ import ( plugin "github.com/hashicorp/go-plugin" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/state" - "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/pluginutils/loader" + "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/structs" @@ -42,7 +42,7 @@ var ( }, Attributes: map[string]*psstructs.Attribute{ "memory": { - Int: helper.Int64ToPtr(4), + Int: pointer.Of(int64(4)), Unit: "GB", }, }, @@ -61,7 +61,7 @@ var ( }, Attributes: map[string]*psstructs.Attribute{ "memory": { - Int: helper.Int64ToPtr(2), + Int: pointer.Of(int64(2)), Unit: "GB", }, }, @@ -74,14 +74,14 @@ var ( InstanceStats: map[string]*device.DeviceStats{ nvidiaDevice0ID: { Summary: &psstructs.StatValue{ - IntNumeratorVal: helper.Int64ToPtr(212), + IntNumeratorVal: pointer.Of(int64(212)), Unit: "F", Desc: "Temperature", }, }, nvidiaDevice1ID: { Summary: &psstructs.StatValue{ - IntNumeratorVal: helper.Int64ToPtr(218), + IntNumeratorVal: pointer.Of(int64(218)), Unit: "F", Desc: "Temperature", }, @@ -96,7 +96,7 @@ var ( InstanceStats: map[string]*device.DeviceStats{ intelDeviceID: { Summary: &psstructs.StatValue{ - IntNumeratorVal: helper.Int64ToPtr(220), + IntNumeratorVal: pointer.Of(int64(220)), Unit: "F", Desc: "Temperature", }, diff --git a/client/driver_manager_test.go b/client/driver_manager_test.go index 8a930b75eba..996b64ff1e6 100644 --- a/client/driver_manager_test.go +++ b/client/driver_manager_test.go @@ -22,10 +22,11 @@ func TestDriverManager_Fingerprint_Run(t *testing.T) { testClient, cleanup := TestClient(t, nil) defer cleanup() + conf := testClient.GetConfig() dm := drivermanager.New(&drivermanager.Config{ Logger: testClient.logger, - Loader: testClient.config.PluginSingletonLoader, - PluginConfig: testClient.configCopy.NomadPluginConfig(), + Loader: conf.PluginSingletonLoader, + PluginConfig: conf.NomadPluginConfig(), Updater: testClient.updateNodeFromDriver, EventHandlerFactory: testClient.GetTaskEventHandler, State: testClient.stateDB, @@ -35,7 +36,7 @@ func TestDriverManager_Fingerprint_Run(t *testing.T) { defer dm.Shutdown() testutil.WaitForResult(func() (bool, error) { - node := testClient.configCopy.Node + node := testClient.Node() d, ok := node.Drivers["mock_driver"] if !ok { @@ -73,10 +74,11 @@ func TestDriverManager_Fingerprint_Periodic(t *testing.T) { }) defer cleanup() + conf := testClient.GetConfig() dm := drivermanager.New(&drivermanager.Config{ Logger: testClient.logger, - Loader: testClient.config.PluginSingletonLoader, - PluginConfig: testClient.configCopy.NomadPluginConfig(), + Loader: conf.PluginSingletonLoader, + PluginConfig: conf.NomadPluginConfig(), Updater: testClient.updateNodeFromDriver, EventHandlerFactory: testClient.GetTaskEventHandler, State: testClient.stateDB, @@ -134,10 +136,11 @@ func TestDriverManager_NodeAttributes_Run(t *testing.T) { }) defer cleanup() + conf := testClient.GetConfig() dm := drivermanager.New(&drivermanager.Config{ Logger: testClient.logger, - Loader: testClient.config.PluginSingletonLoader, - PluginConfig: testClient.configCopy.NomadPluginConfig(), + Loader: conf.PluginSingletonLoader, + PluginConfig: conf.NomadPluginConfig(), Updater: testClient.updateNodeFromDriver, EventHandlerFactory: testClient.GetTaskEventHandler, State: testClient.stateDB, diff --git a/client/dynamicplugins/registry.go b/client/dynamicplugins/registry.go index 65f8c355cbc..4515c64fb68 100644 --- a/client/dynamicplugins/registry.go +++ b/client/dynamicplugins/registry.go @@ -98,7 +98,8 @@ type PluginInfo struct { // PluginConnectionInfo is the data required to connect to the plugin. // note: We currently only support Unix Domain Sockets, but this may be expanded -// to support other connection modes in the future. +// +// to support other connection modes in the future. type PluginConnectionInfo struct { // SocketPath is the path to the plugins api socket. SocketPath string @@ -268,12 +269,14 @@ func (d *dynamicRegistry) DeregisterPlugin(ptype, name, allocID string) error { } } - broadcaster := d.broadcasterForPluginType(ptype) - event := &PluginUpdateEvent{ - EventType: EventTypeDeregistered, - Info: info, + if info != nil { + broadcaster := d.broadcasterForPluginType(ptype) + event := &PluginUpdateEvent{ + EventType: EventTypeDeregistered, + Info: info, + } + broadcaster.broadcast(event) } - broadcaster.broadcast(event) return d.sync() } diff --git a/client/fingerprint/bridge_linux_test.go b/client/fingerprint/bridge_linux_test.go index 739ef73f475..ce1f8f355a7 100644 --- a/client/fingerprint/bridge_linux_test.go +++ b/client/fingerprint/bridge_linux_test.go @@ -3,7 +3,6 @@ package fingerprint import ( "fmt" "io" - "io/ioutil" "os" "strings" "testing" @@ -25,7 +24,7 @@ func TestBridgeFingerprint_detect(t *testing.T) { } func writeFile(t *testing.T, prefix, content string) string { - f, err := ioutil.TempFile("", "bridge-fp-") + f, err := os.CreateTemp("", "bridge-fp-") require.NoError(t, err) _, err = io.Copy(f, strings.NewReader(content)) diff --git a/client/fingerprint/cni.go b/client/fingerprint/cni.go index b4bfff69597..351d7b3d240 100644 --- a/client/fingerprint/cni.go +++ b/client/fingerprint/cni.go @@ -6,16 +6,18 @@ import ( "strings" "github.com/containernetworking/cni/libcni" - log "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/nomad/structs" ) +// CNIFingerprint creates a fingerprint of the CNI configuration(s) on the +// Nomad client. type CNIFingerprint struct { StaticFingerprinter - logger log.Logger + logger hclog.Logger } -func NewCNIFingerprint(logger log.Logger) Fingerprint { +func NewCNIFingerprint(logger hclog.Logger) Fingerprint { return &CNIFingerprint{logger: logger} } diff --git a/client/fingerprint/consul.go b/client/fingerprint/consul.go index f562672332b..3922f25dbd2 100644 --- a/client/fingerprint/consul.go +++ b/client/fingerprint/consul.go @@ -3,10 +3,12 @@ package fingerprint import ( "fmt" "strconv" + "strings" "time" consulapi "github.com/hashicorp/consul/api" log "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-version" agentconsul "github.com/hashicorp/nomad/command/agent/consul" ) @@ -15,6 +17,14 @@ const ( consulUnavailable = "unavailable" ) +var ( + // consulGRPCPortChangeVersion is the Consul version which made a breaking + // change to the way gRPC API listeners are created. This means Nomad must + // perform different fingerprinting depending on which version of Consul it + // is communicating with. + consulGRPCPortChangeVersion = version.Must(version.NewVersion("1.14.0")) +) + // ConsulFingerprint is used to fingerprint for Consul type ConsulFingerprint struct { logger log.Logger @@ -105,7 +115,7 @@ func (f *ConsulFingerprint) initialize(req *FingerprintRequest) error { "consul.datacenter": f.dc, "consul.segment": f.segment, "consul.connect": f.connect, - "consul.grpc": f.grpc, + "consul.grpc": f.grpc(consulConfig.Scheme), "consul.ft.namespaces": f.namespaces, } } @@ -183,11 +193,50 @@ func (f *ConsulFingerprint) connect(info agentconsul.Self) (string, bool) { return strconv.FormatBool(c), ok } -func (f *ConsulFingerprint) grpc(info agentconsul.Self) (string, bool) { +func (f *ConsulFingerprint) grpc(scheme string) func(info agentconsul.Self) (string, bool) { + return func(info agentconsul.Self) (string, bool) { + + // The version is needed in order to understand which config object to + // query. This is because Consul 1.14.0 added a new gRPC port which + // broke the previous behaviour. + v, ok := info["Config"]["Version"].(string) + if !ok { + return "", false + } + + consulVersion, err := version.NewVersion(v) + if err != nil { + return "", false + } + + // If the Consul agent being fingerprinted is running a version less + // than 1.14.0 we use the original single gRPC port. + if consulVersion.Core().LessThan(consulGRPCPortChangeVersion.Core()) { + return f.grpcPort(info) + } + + // Now that we know we are querying a Consul agent running v1.14.0 or + // greater, we need to select the correct port parameter from the + // config depending on whether we have been asked to speak TLS or not. + switch strings.ToLower(scheme) { + case "https": + return f.grpcTLSPort(info) + default: + return f.grpcPort(info) + } + } +} + +func (f *ConsulFingerprint) grpcPort(info agentconsul.Self) (string, bool) { p, ok := info["DebugConfig"]["GRPCPort"].(float64) return fmt.Sprintf("%d", int(p)), ok } +func (f *ConsulFingerprint) grpcTLSPort(info agentconsul.Self) (string, bool) { + p, ok := info["DebugConfig"]["GRPCTLSPort"].(float64) + return fmt.Sprintf("%d", int(p)), ok +} + func (f *ConsulFingerprint) namespaces(info agentconsul.Self) (string, bool) { return strconv.FormatBool(agentconsul.Namespaces(info)), true } diff --git a/client/fingerprint/consul_test.go b/client/fingerprint/consul_test.go index 4b3887478d7..f2237496267 100644 --- a/client/fingerprint/consul_test.go +++ b/client/fingerprint/consul_test.go @@ -2,9 +2,9 @@ package fingerprint import ( "io" - "io/ioutil" "net/http" "net/http/httptest" + "os" "strings" "testing" @@ -38,7 +38,7 @@ func fakeConsul(payload string) (*httptest.Server, *config.Config) { } func fakeConsulPayload(t *testing.T, filename string) string { - b, err := ioutil.ReadFile(filename) + b, err := os.ReadFile(filename) require.NoError(t, err) return string(b) } @@ -296,29 +296,91 @@ func TestConsulFingerprint_grpc(t *testing.T) { fp := newConsulFingerPrint(t) - t.Run("grpc set", func(t *testing.T) { - s, ok := fp.grpc(agentconsul.Self{ + t.Run("grpc set pre-1.14 http", func(t *testing.T) { + s, ok := fp.grpc("http")(agentconsul.Self{ + "Config": {"Version": "1.13.3"}, "DebugConfig": {"GRPCPort": 8502.0}, // JSON numbers are floats }) require.True(t, ok) require.Equal(t, "8502", s) }) - t.Run("grpc disabled", func(t *testing.T) { - s, ok := fp.grpc(agentconsul.Self{ + t.Run("grpc disabled pre-1.14 http", func(t *testing.T) { + s, ok := fp.grpc("http")(agentconsul.Self{ + "Config": {"Version": "1.13.3"}, "DebugConfig": {"GRPCPort": -1.0}, // JSON numbers are floats }) require.True(t, ok) require.Equal(t, "-1", s) }) - t.Run("grpc missing", func(t *testing.T) { - _, ok := fp.grpc(agentconsul.Self{ + t.Run("grpc set pre-1.14 https", func(t *testing.T) { + s, ok := fp.grpc("https")(agentconsul.Self{ + "Config": {"Version": "1.13.3"}, + "DebugConfig": {"GRPCPort": 8502.0}, // JSON numbers are floats + }) + require.True(t, ok) + require.Equal(t, "8502", s) + }) + + t.Run("grpc disabled pre-1.14 https", func(t *testing.T) { + s, ok := fp.grpc("https")(agentconsul.Self{ + "Config": {"Version": "1.13.3"}, + "DebugConfig": {"GRPCPort": -1.0}, // JSON numbers are floats + }) + require.True(t, ok) + require.Equal(t, "-1", s) + }) + + t.Run("grpc set post-1.14 http", func(t *testing.T) { + s, ok := fp.grpc("http")(agentconsul.Self{ + "Config": {"Version": "1.14.0"}, + "DebugConfig": {"GRPCPort": 8502.0}, // JSON numbers are floats + }) + require.True(t, ok) + require.Equal(t, "8502", s) + }) + + t.Run("grpc disabled post-1.14 http", func(t *testing.T) { + s, ok := fp.grpc("http")(agentconsul.Self{ + "Config": {"Version": "1.14.0"}, + "DebugConfig": {"GRPCPort": -1.0}, // JSON numbers are floats + }) + require.True(t, ok) + require.Equal(t, "-1", s) + }) + + t.Run("grpc disabled post-1.14 https", func(t *testing.T) { + s, ok := fp.grpc("https")(agentconsul.Self{ + "Config": {"Version": "1.14.0"}, + "DebugConfig": {"GRPCTLSPort": -1.0}, // JSON numbers are floats + }) + require.True(t, ok) + require.Equal(t, "-1", s) + }) + + t.Run("grpc set post-1.14 https", func(t *testing.T) { + s, ok := fp.grpc("https")(agentconsul.Self{ + "Config": {"Version": "1.14.0"}, + "DebugConfig": {"GRPCTLSPort": 8503.0}, // JSON numbers are floats + }) + require.True(t, ok) + require.Equal(t, "8503", s) + }) + + t.Run("grpc missing http", func(t *testing.T) { + _, ok := fp.grpc("http")(agentconsul.Self{ "DebugConfig": {}, }) require.False(t, ok) }) + t.Run("grpc missing https", func(t *testing.T) { + _, ok := fp.grpc("https")(agentconsul.Self{ + "DebugConfig": {}, + }) + require.False(t, ok) + }) } func TestConsulFingerprint_namespaces(t *testing.T) { diff --git a/client/fingerprint/env_aws_cpu.go b/client/fingerprint/env_aws_cpu.go index 5f7a0d94bd4..5c188458a0e 100644 --- a/client/fingerprint/env_aws_cpu.go +++ b/client/fingerprint/env_aws_cpu.go @@ -146,7 +146,34 @@ var instanceTypeCPU = map[string]CPU{ "c6i.large": newCPU(2, 3.5), "c6i.metal": newCPU(128, 3.5), "c6i.xlarge": newCPU(4, 3.5), - "cc2.8xlarge": newCPU(32, 2.6), + "c6id.12xlarge": newCPU(48, 3.5), + "c6id.16xlarge": newCPU(64, 3.5), + "c6id.24xlarge": newCPU(96, 3.5), + "c6id.2xlarge": newCPU(8, 3.5), + "c6id.32xlarge": newCPU(128, 3.5), + "c6id.4xlarge": newCPU(16, 3.5), + "c6id.8xlarge": newCPU(32, 3.5), + "c6id.large": newCPU(2, 3.5), + "c6id.metal": newCPU(128, 3.5), + "c6id.xlarge": newCPU(4, 3.5), + "c6in.12xlarge": newCPU(48, 3.5), + "c6in.16xlarge": newCPU(64, 3.5), + "c6in.24xlarge": newCPU(96, 3.5), + "c6in.2xlarge": newCPU(8, 3.5), + "c6in.32xlarge": newCPU(128, 3.5), + "c6in.4xlarge": newCPU(16, 3.5), + "c6in.8xlarge": newCPU(32, 3.5), + "c6in.large": newCPU(2, 3.5), + "c6in.xlarge": newCPU(4, 3.5), + "c7g.12xlarge": newCPU(48, 2.6), + "c7g.16xlarge": newCPU(64, 2.6), + "c7g.2xlarge": newCPU(8, 2.6), + "c7g.4xlarge": newCPU(16, 2.6), + "c7g.8xlarge": newCPU(32, 2.6), + "c7g.large": newCPU(2, 2.6), + "c7g.medium": newCPU(1, 2.6), + "c7g.metal": newCPU(64, 2.6), + "c7g.xlarge": newCPU(4, 2.6), "d2.2xlarge": newCPU(8, 2.4), "d2.4xlarge": newCPU(16, 2.4), "d2.8xlarge": newCPU(36, 2.4), @@ -202,6 +229,7 @@ var instanceTypeCPU = map[string]CPU{ "h1.4xlarge": newCPU(16, 2.3), "h1.8xlarge": newCPU(32, 2.3), "hpc6a.48xlarge": newCPU(96, 3.6), + "hpc6id.32xlarge": newCPU(64, 3.5), "i2.2xlarge": newCPU(8, 2.5), "i2.4xlarge": newCPU(16, 2.5), "i2.8xlarge": newCPU(32, 2.5), @@ -221,6 +249,14 @@ var instanceTypeCPU = map[string]CPU{ "i3en.large": newCPU(2, 3.1), "i3en.metal": newCPU(96, 3.1), "i3en.xlarge": newCPU(4, 3.1), + "i4i.16xlarge": newCPU(64, 3.5), + "i4i.2xlarge": newCPU(8, 3.5), + "i4i.32xlarge": newCPU(128, 3.5), + "i4i.4xlarge": newCPU(16, 3.5), + "i4i.8xlarge": newCPU(32, 3.5), + "i4i.large": newCPU(2, 3.5), + "i4i.metal": newCPU(128, 3.5), + "i4i.xlarge": newCPU(4, 3.5), "im4gn.16xlarge": newCPU(64, 2.5), "im4gn.2xlarge": newCPU(8, 2.5), "im4gn.4xlarge": newCPU(16, 2.5), @@ -345,7 +381,45 @@ var instanceTypeCPU = map[string]CPU{ "m6i.large": newCPU(2, 3.5), "m6i.metal": newCPU(128, 3.5), "m6i.xlarge": newCPU(4, 3.5), + "m6id.12xlarge": newCPU(48, 3.5), + "m6id.16xlarge": newCPU(64, 3.5), + "m6id.24xlarge": newCPU(96, 3.5), + "m6id.2xlarge": newCPU(8, 3.5), + "m6id.32xlarge": newCPU(128, 3.5), + "m6id.4xlarge": newCPU(16, 3.5), + "m6id.8xlarge": newCPU(32, 3.5), + "m6id.large": newCPU(2, 3.5), + "m6id.metal": newCPU(128, 3.5), + "m6id.xlarge": newCPU(4, 3.5), + "m6idn.12xlarge": newCPU(48, 3.5), + "m6idn.16xlarge": newCPU(64, 3.5), + "m6idn.24xlarge": newCPU(96, 3.5), + "m6idn.2xlarge": newCPU(8, 3.5), + "m6idn.32xlarge": newCPU(128, 3.5), + "m6idn.4xlarge": newCPU(16, 3.5), + "m6idn.8xlarge": newCPU(32, 3.5), + "m6idn.large": newCPU(2, 3.5), + "m6idn.xlarge": newCPU(4, 3.5), + "m6in.12xlarge": newCPU(48, 3.5), + "m6in.16xlarge": newCPU(64, 3.5), + "m6in.24xlarge": newCPU(96, 3.5), + "m6in.2xlarge": newCPU(8, 3.5), + "m6in.32xlarge": newCPU(128, 3.5), + "m6in.4xlarge": newCPU(16, 3.5), + "m6in.8xlarge": newCPU(32, 3.5), + "m6in.large": newCPU(2, 3.5), + "m6in.xlarge": newCPU(4, 3.5), + "m7g.12xlarge": newCPU(48, 2.6), + "m7g.16xlarge": newCPU(64, 2.6), + "m7g.2xlarge": newCPU(8, 2.6), + "m7g.4xlarge": newCPU(16, 2.6), + "m7g.8xlarge": newCPU(32, 2.6), + "m7g.large": newCPU(2, 2.6), + "m7g.medium": newCPU(1, 2.6), + "m7g.metal": newCPU(64, 2.6), + "m7g.xlarge": newCPU(4, 2.6), "mac1.metal": newCPU(12, 3.2), + "mac2.metal": newCPU(8, 3.2), "p2.16xlarge": newCPU(64, 2.3), "p2.8xlarge": newCPU(32, 2.7), "p2.xlarge": newCPU(4, 2.7), @@ -426,6 +500,17 @@ var instanceTypeCPU = map[string]CPU{ "r5n.large": newCPU(2, 3.1), "r5n.metal": newCPU(96, 3.1), "r5n.xlarge": newCPU(4, 3.1), + "r6a.12xlarge": newCPU(48, 3.6), + "r6a.16xlarge": newCPU(64, 3.6), + "r6a.24xlarge": newCPU(96, 3.6), + "r6a.2xlarge": newCPU(8, 3.6), + "r6a.32xlarge": newCPU(128, 3.6), + "r6a.48xlarge": newCPU(192, 3.6), + "r6a.4xlarge": newCPU(16, 3.6), + "r6a.8xlarge": newCPU(32, 3.6), + "r6a.large": newCPU(2, 3.6), + "r6a.metal": newCPU(192, 3.6), + "r6a.xlarge": newCPU(4, 3.6), "r6g.12xlarge": newCPU(48, 2.5), "r6g.16xlarge": newCPU(64, 2.5), "r6g.2xlarge": newCPU(8, 2.5), @@ -454,6 +539,43 @@ var instanceTypeCPU = map[string]CPU{ "r6i.large": newCPU(2, 3.5), "r6i.metal": newCPU(128, 3.5), "r6i.xlarge": newCPU(4, 3.5), + "r6id.12xlarge": newCPU(48, 3.5), + "r6id.16xlarge": newCPU(64, 3.5), + "r6id.24xlarge": newCPU(96, 3.5), + "r6id.2xlarge": newCPU(8, 3.5), + "r6id.32xlarge": newCPU(128, 3.5), + "r6id.4xlarge": newCPU(16, 3.5), + "r6id.8xlarge": newCPU(32, 3.5), + "r6id.large": newCPU(2, 3.5), + "r6id.metal": newCPU(128, 3.5), + "r6id.xlarge": newCPU(4, 3.5), + "r6idn.12xlarge": newCPU(48, 3.5), + "r6idn.16xlarge": newCPU(64, 3.5), + "r6idn.24xlarge": newCPU(96, 3.5), + "r6idn.2xlarge": newCPU(8, 3.5), + "r6idn.32xlarge": newCPU(128, 3.5), + "r6idn.4xlarge": newCPU(16, 3.5), + "r6idn.8xlarge": newCPU(32, 3.5), + "r6idn.large": newCPU(2, 3.5), + "r6idn.xlarge": newCPU(4, 3.5), + "r6in.12xlarge": newCPU(48, 3.5), + "r6in.16xlarge": newCPU(64, 3.5), + "r6in.24xlarge": newCPU(96, 3.5), + "r6in.2xlarge": newCPU(8, 3.5), + "r6in.32xlarge": newCPU(128, 3.5), + "r6in.4xlarge": newCPU(16, 3.5), + "r6in.8xlarge": newCPU(32, 3.5), + "r6in.large": newCPU(2, 3.5), + "r6in.xlarge": newCPU(4, 3.5), + "r7g.12xlarge": newCPU(48, 2.6), + "r7g.16xlarge": newCPU(64, 2.6), + "r7g.2xlarge": newCPU(8, 2.6), + "r7g.4xlarge": newCPU(16, 2.6), + "r7g.8xlarge": newCPU(32, 2.6), + "r7g.large": newCPU(2, 2.6), + "r7g.medium": newCPU(1, 2.6), + "r7g.metal": newCPU(64, 2.6), + "r7g.xlarge": newCPU(4, 2.6), "t2.2xlarge": newCPU(8, 2.3), "t2.large": newCPU(2, 2.3), "t2.medium": newCPU(2, 2.3), @@ -482,7 +604,11 @@ var instanceTypeCPU = map[string]CPU{ "t4g.nano": newCPU(2, 2.5), "t4g.small": newCPU(2, 2.5), "t4g.xlarge": newCPU(4, 2.5), + "trn1.2xlarge": newCPU(8, 3.5), + "trn1.32xlarge": newCPU(128, 3.5), "u-12tb1.112xlarge": newCPU(448, 2.1), + "u-18tb1.112xlarge": newCPU(448, 2.7), + "u-24tb1.112xlarge": newCPU(448, 2.7), "u-3tb1.56xlarge": newCPU(224, 2.1), "u-6tb1.112xlarge": newCPU(448, 2.1), "u-6tb1.56xlarge": newCPU(224, 2.1), @@ -510,12 +636,14 @@ var instanceTypeCPU = map[string]CPU{ "x2idn.16xlarge": newCPU(64, 3.5), "x2idn.24xlarge": newCPU(96, 3.5), "x2idn.32xlarge": newCPU(128, 3.5), + "x2idn.metal": newCPU(128, 3.5), "x2iedn.16xlarge": newCPU(64, 3.5), "x2iedn.24xlarge": newCPU(96, 3.5), "x2iedn.2xlarge": newCPU(8, 3.5), "x2iedn.32xlarge": newCPU(128, 3.5), "x2iedn.4xlarge": newCPU(16, 3.5), "x2iedn.8xlarge": newCPU(32, 3.5), + "x2iedn.metal": newCPU(128, 3.5), "x2iedn.xlarge": newCPU(4, 3.5), "x2iezn.12xlarge": newCPU(48, 4.5), "x2iezn.2xlarge": newCPU(8, 4.5), diff --git a/client/fingerprint/env_azure.go b/client/fingerprint/env_azure.go index b440ee90672..c49b6cfd7ed 100644 --- a/client/fingerprint/env_azure.go +++ b/client/fingerprint/env_azure.go @@ -3,7 +3,7 @@ package fingerprint import ( "encoding/json" "fmt" - "io/ioutil" + "io" "net/http" "net/url" "os" @@ -96,7 +96,7 @@ func (f *EnvAzureFingerprint) Get(attribute string, format string) (string, erro return "", err } - resp, err := ioutil.ReadAll(res.Body) + resp, err := io.ReadAll(res.Body) res.Body.Close() if err != nil { f.logger.Error("error reading response body for Azure attribute", "attribute", attribute, "error", err) diff --git a/client/fingerprint/env_digitalocean.go b/client/fingerprint/env_digitalocean.go index f899ac6a833..2028dc12a84 100644 --- a/client/fingerprint/env_digitalocean.go +++ b/client/fingerprint/env_digitalocean.go @@ -2,7 +2,7 @@ package fingerprint import ( "fmt" - "io/ioutil" + "io" "net/http" "net/url" "os" @@ -82,7 +82,7 @@ func (f *EnvDigitalOceanFingerprint) Get(attribute string, format string) (strin return "", err } - body, err := ioutil.ReadAll(res.Body) + body, err := io.ReadAll(res.Body) res.Body.Close() if err != nil { f.logger.Error("failed to read metadata", "attribute", attribute, "error", err, "resp_code", res.StatusCode) diff --git a/client/fingerprint/env_gce.go b/client/fingerprint/env_gce.go index 3849431b66d..9a4900093f2 100644 --- a/client/fingerprint/env_gce.go +++ b/client/fingerprint/env_gce.go @@ -3,7 +3,7 @@ package fingerprint import ( "encoding/json" "fmt" - "io/ioutil" + "io" "net/http" "net/url" "os" @@ -111,7 +111,7 @@ func (f *EnvGCEFingerprint) Get(attribute string, recursive bool) (string, error return "", err } - resp, err := ioutil.ReadAll(res.Body) + resp, err := io.ReadAll(res.Body) res.Body.Close() if err != nil { f.logger.Error("error reading response body for GCE attribute", "attribute", attribute, "error", err) diff --git a/client/fingerprint/fingerprint.go b/client/fingerprint/fingerprint.go index a12ea98f442..39c8dcba964 100644 --- a/client/fingerprint/fingerprint.go +++ b/client/fingerprint/fingerprint.go @@ -29,17 +29,18 @@ var ( // hostFingerprinters contains the host fingerprints which are available for a // given platform. hostFingerprinters = map[string]Factory{ - "arch": NewArchFingerprint, - "consul": NewConsulFingerprint, - "cni": NewCNIFingerprint, - "cpu": NewCPUFingerprint, - "host": NewHostFingerprint, - "memory": NewMemoryFingerprint, - "network": NewNetworkFingerprint, - "nomad": NewNomadFingerprint, - "signal": NewSignalFingerprint, - "storage": NewStorageFingerprint, - "vault": NewVaultFingerprint, + "arch": NewArchFingerprint, + "consul": NewConsulFingerprint, + "cni": NewCNIFingerprint, // networks + "cpu": NewCPUFingerprint, + "host": NewHostFingerprint, + "memory": NewMemoryFingerprint, + "network": NewNetworkFingerprint, + "nomad": NewNomadFingerprint, + "plugins_cni": NewPluginsCNIFingerprint, + "signal": NewSignalFingerprint, + "storage": NewStorageFingerprint, + "vault": NewVaultFingerprint, } // envFingerprinters contains the fingerprints that are environment specific. diff --git a/client/fingerprint/host.go b/client/fingerprint/host.go index f7b82982dea..4d37d091d07 100644 --- a/client/fingerprint/host.go +++ b/client/fingerprint/host.go @@ -30,6 +30,7 @@ func (f *HostFingerprint) Fingerprint(req *FingerprintRequest, resp *Fingerprint resp.AddAttribute("os.version", hostInfo.PlatformVersion) resp.AddAttribute("kernel.name", runtime.GOOS) + resp.AddAttribute("kernel.arch", hostInfo.KernelArch) resp.AddAttribute("kernel.version", hostInfo.KernelVersion) resp.AddAttribute("unique.hostname", hostInfo.Hostname) diff --git a/client/fingerprint/network_linux.go b/client/fingerprint/network_linux.go index 6aa53571e49..a44221be095 100644 --- a/client/fingerprint/network_linux.go +++ b/client/fingerprint/network_linux.go @@ -2,7 +2,7 @@ package fingerprint import ( "fmt" - "io/ioutil" + "os" "os/exec" "regexp" "strconv" @@ -14,7 +14,7 @@ func (f *NetworkFingerprint) linkSpeedSys(device string) int { path := fmt.Sprintf("/sys/class/net/%s/speed", device) // Read contents of the device/speed file - content, err := ioutil.ReadFile(path) + content, err := os.ReadFile(path) if err != nil { f.logger.Debug("unable to read link speed", "path", path, "device", device) return 0 diff --git a/client/fingerprint/plugins_cni.go b/client/fingerprint/plugins_cni.go new file mode 100644 index 00000000000..e1eb89d3e8f --- /dev/null +++ b/client/fingerprint/plugins_cni.go @@ -0,0 +1,114 @@ +package fingerprint + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "time" + + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-version" +) + +const ( + cniPluginAttribute = "plugins.cni.version" +) + +// PluginsCNIFingerprint creates a fingerprint of the CNI plugins present on the +// CNI plugin path specified for the Nomad client. +type PluginsCNIFingerprint struct { + StaticFingerprinter + logger hclog.Logger + lister func(string) ([]os.DirEntry, error) +} + +func NewPluginsCNIFingerprint(logger hclog.Logger) Fingerprint { + return &PluginsCNIFingerprint{ + logger: logger.Named("cni_plugins"), + lister: os.ReadDir, + } +} + +func (f *PluginsCNIFingerprint) Fingerprint(req *FingerprintRequest, resp *FingerprintResponse) error { + cniPath := req.Config.CNIPath + if cniPath == "" { + // this will be set to default by client; if empty then lets just do + // nothing rather than re-assume a default of our own + return nil + } + + // list the cni_path directory + entries, err := f.lister(cniPath) + switch { + case err != nil: + f.logger.Warn("failed to read CNI plugins directory", "cni_path", cniPath, "error", err) + resp.Detected = false + return nil + case len(entries) == 0: + f.logger.Debug("no CNI plugins found", "cni_path", cniPath) + resp.Detected = true + return nil + } + + // for each file in cni_path, detect executables and try to get their version + for _, entry := range entries { + v, ok := f.detectOne(cniPath, entry) + if ok { + resp.AddAttribute(f.attribute(entry.Name()), v) + } + } + + // detection complete, regardless of results + resp.Detected = true + return nil +} + +func (f *PluginsCNIFingerprint) attribute(filename string) string { + return fmt.Sprintf("%s.%s", cniPluginAttribute, filename) +} + +func (f *PluginsCNIFingerprint) detectOne(cniPath string, entry os.DirEntry) (string, bool) { + fi, err := entry.Info() + if err != nil { + f.logger.Debug("failed to read cni directory entry", "error", err) + return "", false + } + + if fi.Mode()&0o111 == 0 { + f.logger.Debug("unexpected non-executable in cni plugin directory", "name", fi.Name()) + return "", false // not executable + } + + exePath := filepath.Join(cniPath, fi.Name()) + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + + // best effort attempt to get a version from the executable, otherwise + // the version will be "unknown" + // execute with no args; at least container-networking plugins respond with + // version string in this case, which makes Windows support simpler + cmd := exec.CommandContext(ctx, exePath) + output, err := cmd.CombinedOutput() + if err != nil { + f.logger.Debug("failed to detect CNI plugin version", "name", fi.Name(), "error", err) + return "unknown", false + } + + // try to find semantic versioning string + // e.g. + // /opt/cni/bin/bridge + // CNI bridge plugin v1.0.0 + tokens := strings.Fields(string(output)) + for i := len(tokens) - 1; i >= 0; i-- { + token := tokens[i] + if _, parseErr := version.NewSemver(token); parseErr == nil { + return token, true + } + } + + f.logger.Debug("failed to parse CNI plugin version", "name", fi.Name()) + return "unknown", false +} diff --git a/client/fingerprint/plugins_cni_test.go b/client/fingerprint/plugins_cni_test.go new file mode 100644 index 00000000000..4a03baec0a0 --- /dev/null +++ b/client/fingerprint/plugins_cni_test.go @@ -0,0 +1,87 @@ +package fingerprint + +import ( + "os" + "testing" + + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/client/config" + "github.com/hashicorp/nomad/helper/testlog" + "github.com/shoenig/test/must" +) + +func TestPluginsCNIFingerprint_Fingerprint_present(t *testing.T) { + ci.Parallel(t) + + f := NewPluginsCNIFingerprint(testlog.HCLogger(t)) + request := &FingerprintRequest{ + Config: &config.Config{ + CNIPath: "./test_fixtures/cni", + }, + } + response := new(FingerprintResponse) + + err := f.Fingerprint(request, response) + must.NoError(t, err) + must.True(t, response.Detected) + attrCustom := f.(*PluginsCNIFingerprint).attribute("custom") + attrBridge := f.(*PluginsCNIFingerprint).attribute("bridge") + must.Eq(t, "v1.2.3", response.Attributes[attrCustom]) + must.Eq(t, "v1.0.2", response.Attributes[attrBridge]) +} + +func TestPluginsCNIFingerprint_Fingerprint_absent(t *testing.T) { + ci.Parallel(t) + + f := NewPluginsCNIFingerprint(testlog.HCLogger(t)) + request := &FingerprintRequest{ + Config: &config.Config{ + CNIPath: "/does/not/exist", + }, + } + response := new(FingerprintResponse) + + err := f.Fingerprint(request, response) + must.NoError(t, err) + must.False(t, response.Detected) + attrCustom := f.(*PluginsCNIFingerprint).attribute("custom") + attrBridge := f.(*PluginsCNIFingerprint).attribute("bridge") + must.MapNotContainsKeys(t, response.Attributes, []string{attrCustom, attrBridge}) +} + +func TestPluginsCNIFingerprint_Fingerprint_empty(t *testing.T) { + ci.Parallel(t) + + lister := func(string) ([]os.DirEntry, error) { + // return an empty slice of directory entries + // i.e. no plugins present + return nil, nil + } + + f := NewPluginsCNIFingerprint(testlog.HCLogger(t)) + f.(*PluginsCNIFingerprint).lister = lister + request := &FingerprintRequest{ + Config: &config.Config{ + CNIPath: "./test_fixtures/cni", + }, + } + response := new(FingerprintResponse) + + err := f.Fingerprint(request, response) + must.NoError(t, err) + must.True(t, response.Detected) +} + +func TestPluginsCNIFingerprint_Fingerprint_unset(t *testing.T) { + ci.Parallel(t) + + f := NewPluginsCNIFingerprint(testlog.HCLogger(t)) + request := &FingerprintRequest{ + Config: new(config.Config), + } + response := new(FingerprintResponse) + + err := f.Fingerprint(request, response) + must.NoError(t, err) + must.False(t, response.Detected) +} diff --git a/client/fingerprint/test_fixtures/cni/bridge b/client/fingerprint/test_fixtures/cni/bridge new file mode 100755 index 00000000000..0b7f14f7f50 --- /dev/null +++ b/client/fingerprint/test_fixtures/cni/bridge @@ -0,0 +1,3 @@ +#!/bin/sh + +echo "CNI bridge plugin v1.0.2" diff --git a/client/fingerprint/test_fixtures/cni/custom b/client/fingerprint/test_fixtures/cni/custom new file mode 100755 index 00000000000..d2beee878c9 --- /dev/null +++ b/client/fingerprint/test_fixtures/cni/custom @@ -0,0 +1,4 @@ +#!/bin/sh + +echo "Custom v1.2.3 Plugin" + diff --git a/client/fs_endpoint.go b/client/fs_endpoint.go index 2796c45c2b3..3d988520ac4 100644 --- a/client/fs_endpoint.go +++ b/client/fs_endpoint.go @@ -23,7 +23,7 @@ import ( "github.com/hashicorp/nomad/client/allocdir" sframer "github.com/hashicorp/nomad/client/lib/streamframer" cstructs "github.com/hashicorp/nomad/client/structs" - "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/nomad/structs" ) @@ -166,32 +166,32 @@ func (f *FileSystem) stream(conn io.ReadWriteCloser) { encoder := codec.NewEncoder(conn, structs.MsgpackHandle) if err := decoder.Decode(&req); err != nil { - handleStreamResultError(err, helper.Int64ToPtr(500), encoder) + handleStreamResultError(err, pointer.Of(int64(500)), encoder) return } if req.AllocID == "" { - handleStreamResultError(allocIDNotPresentErr, helper.Int64ToPtr(400), encoder) + handleStreamResultError(allocIDNotPresentErr, pointer.Of(int64(400)), encoder) return } alloc, err := f.c.GetAlloc(req.AllocID) if err != nil { - handleStreamResultError(structs.NewErrUnknownAllocation(req.AllocID), helper.Int64ToPtr(404), encoder) + handleStreamResultError(structs.NewErrUnknownAllocation(req.AllocID), pointer.Of(int64(404)), encoder) return } // Check read permissions if aclObj, err := f.c.ResolveToken(req.QueryOptions.AuthToken); err != nil { - handleStreamResultError(err, helper.Int64ToPtr(403), encoder) + handleStreamResultError(err, pointer.Of(int64(403)), encoder) return } else if aclObj != nil && !aclObj.AllowNsOp(alloc.Namespace, acl.NamespaceCapabilityReadFS) { - handleStreamResultError(structs.ErrPermissionDenied, helper.Int64ToPtr(403), encoder) + handleStreamResultError(structs.ErrPermissionDenied, pointer.Of(int64(403)), encoder) return } // Validate the arguments if req.Path == "" { - handleStreamResultError(pathNotPresentErr, helper.Int64ToPtr(400), encoder) + handleStreamResultError(pathNotPresentErr, pointer.Of(int64(400)), encoder) return } switch req.Origin { @@ -199,15 +199,15 @@ func (f *FileSystem) stream(conn io.ReadWriteCloser) { case "": req.Origin = "start" default: - handleStreamResultError(invalidOrigin, helper.Int64ToPtr(400), encoder) + handleStreamResultError(invalidOrigin, pointer.Of(int64(400)), encoder) return } fs, err := f.c.GetAllocFS(req.AllocID) if err != nil { - code := helper.Int64ToPtr(500) + code := pointer.Of(int64(500)) if structs.IsErrUnknownAllocation(err) { - code = helper.Int64ToPtr(404) + code = pointer.Of(int64(404)) } handleStreamResultError(err, code, encoder) @@ -217,13 +217,13 @@ func (f *FileSystem) stream(conn io.ReadWriteCloser) { // Calculate the offset fileInfo, err := fs.Stat(req.Path) if err != nil { - handleStreamResultError(err, helper.Int64ToPtr(400), encoder) + handleStreamResultError(err, pointer.Of(int64(400)), encoder) return } if fileInfo.IsDir { handleStreamResultError( fmt.Errorf("file %q is a directory", req.Path), - helper.Int64ToPtr(400), encoder) + pointer.Of(int64(400)), encoder) return } @@ -325,7 +325,7 @@ OUTER: } if streamErr != nil { - handleStreamResultError(streamErr, helper.Int64ToPtr(500), encoder) + handleStreamResultError(streamErr, pointer.Of(int64(500)), encoder) return } } @@ -341,17 +341,17 @@ func (f *FileSystem) logs(conn io.ReadWriteCloser) { encoder := codec.NewEncoder(conn, structs.MsgpackHandle) if err := decoder.Decode(&req); err != nil { - handleStreamResultError(err, helper.Int64ToPtr(500), encoder) + handleStreamResultError(err, pointer.Of(int64(500)), encoder) return } if req.AllocID == "" { - handleStreamResultError(allocIDNotPresentErr, helper.Int64ToPtr(400), encoder) + handleStreamResultError(allocIDNotPresentErr, pointer.Of(int64(400)), encoder) return } alloc, err := f.c.GetAlloc(req.AllocID) if err != nil { - handleStreamResultError(structs.NewErrUnknownAllocation(req.AllocID), helper.Int64ToPtr(404), encoder) + handleStreamResultError(structs.NewErrUnknownAllocation(req.AllocID), pointer.Of(int64(404)), encoder) return } @@ -370,13 +370,13 @@ func (f *FileSystem) logs(conn io.ReadWriteCloser) { // Validate the arguments if req.Task == "" { - handleStreamResultError(taskNotPresentErr, helper.Int64ToPtr(400), encoder) + handleStreamResultError(taskNotPresentErr, pointer.Of(int64(400)), encoder) return } switch req.LogType { case "stdout", "stderr": default: - handleStreamResultError(logTypeNotPresentErr, helper.Int64ToPtr(400), encoder) + handleStreamResultError(logTypeNotPresentErr, pointer.Of(int64(400)), encoder) return } switch req.Origin { @@ -384,15 +384,15 @@ func (f *FileSystem) logs(conn io.ReadWriteCloser) { case "": req.Origin = "start" default: - handleStreamResultError(invalidOrigin, helper.Int64ToPtr(400), encoder) + handleStreamResultError(invalidOrigin, pointer.Of(int64(400)), encoder) return } fs, err := f.c.GetAllocFS(req.AllocID) if err != nil { - code := helper.Int64ToPtr(500) + code := pointer.Of(int64(500)) if structs.IsErrUnknownAllocation(err) { - code = helper.Int64ToPtr(404) + code = pointer.Of(int64(404)) } handleStreamResultError(err, code, encoder) @@ -401,9 +401,9 @@ func (f *FileSystem) logs(conn io.ReadWriteCloser) { allocState, err := f.c.GetAllocState(req.AllocID) if err != nil { - code := helper.Int64ToPtr(500) + code := pointer.Of(int64(500)) if structs.IsErrUnknownAllocation(err) { - code = helper.Int64ToPtr(404) + code = pointer.Of(int64(404)) } handleStreamResultError(err, code, encoder) @@ -415,7 +415,7 @@ func (f *FileSystem) logs(conn io.ReadWriteCloser) { if taskState == nil { handleStreamResultError( fmt.Errorf("unknown task name %q", req.Task), - helper.Int64ToPtr(400), + pointer.Of(int64(400)), encoder) return } @@ -423,7 +423,7 @@ func (f *FileSystem) logs(conn io.ReadWriteCloser) { if taskState.StartedAt.IsZero() { handleStreamResultError( fmt.Errorf("task %q not started yet. No logs available", req.Task), - helper.Int64ToPtr(404), + pointer.Of(int64(404)), encoder) return } diff --git a/client/fs_endpoint_test.go b/client/fs_endpoint_test.go index 4f5a40eddc4..63c04d91d72 100644 --- a/client/fs_endpoint_test.go +++ b/client/fs_endpoint_test.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "io" - "io/ioutil" "math" "net" "os" @@ -1854,7 +1853,7 @@ func TestFS_logsImpl_NoFollow(t *testing.T) { for i := 0; i < 3; i++ { logFile := fmt.Sprintf("%s.%s.%d", task, logType, i) logFilePath := filepath.Join(logDir, logFile) - err := ioutil.WriteFile(logFilePath, expected[i:i+1], 0777) + err := os.WriteFile(logFilePath, expected[i:i+1], 0777) if err != nil { t.Fatalf("Failed to create file: %v", err) } @@ -1928,7 +1927,7 @@ func TestFS_logsImpl_Follow(t *testing.T) { } writeToFile := func(index int, data []byte) { logFilePath := filePath(index) - err := ioutil.WriteFile(logFilePath, data, 0777) + err := os.WriteFile(logFilePath, data, 0777) if err != nil { t.Fatalf("Failed to create file: %v", err) } diff --git a/client/interfaces/client.go b/client/interfaces/client.go index 35f28c321fa..f3fc4a5a804 100644 --- a/client/interfaces/client.go +++ b/client/interfaces/client.go @@ -24,3 +24,15 @@ type AllocStateHandler interface { type DeviceStatsReporter interface { LatestDeviceResourceStats([]*structs.AllocatedDeviceResource) []*device.DeviceGroupStats } + +// EnvReplacer is an interface which can interpolate environment variables and +// is usually satisfied by taskenv.TaskEnv. +type EnvReplacer interface { + ReplaceEnv(string) string + ClientPath(string, bool) (string, bool) +} + +// ArtifactGetter is an interface satisfied by the helper/getter package. +type ArtifactGetter interface { + GetArtifact(taskEnv EnvReplacer, artifact *structs.TaskArtifact) error +} diff --git a/client/lib/cgutil/cgutil_linux.go b/client/lib/cgutil/cgutil_linux.go index 1333d0cc13b..ae8f6478bef 100644 --- a/client/lib/cgutil/cgutil_linux.go +++ b/client/lib/cgutil/cgutil_linux.go @@ -18,25 +18,43 @@ import ( // cgroups.v1 // // This is a read-only value. -var UseV2 = cgroups.IsCgroup2UnifiedMode() +var UseV2 = safelyDetectUnifiedMode() + +// Currently it is possible for the runc utility function to panic +// https://github.com/opencontainers/runc/pull/3745 +func safelyDetectUnifiedMode() (result bool) { + defer func() { + if r := recover(); r != nil { + result = false + } + }() + result = cgroups.IsCgroup2UnifiedMode() + return +} // GetCgroupParent returns the mount point under the root cgroup in which Nomad // will create cgroups. If parent is not set, an appropriate name for the version // of cgroups will be used. func GetCgroupParent(parent string) string { - if UseV2 { - return getParentV2(parent) + switch { + case parent != "": + return parent + case UseV2: + return DefaultCgroupParentV2 + default: + return DefaultCgroupV1Parent } - return getParentV1(parent) } // CreateCPUSetManager creates a V1 or V2 CpusetManager depending on system configuration. -func CreateCPUSetManager(parent string, logger hclog.Logger) CpusetManager { +func CreateCPUSetManager(parent string, reservable []uint16, logger hclog.Logger) CpusetManager { parent = GetCgroupParent(parent) // use appropriate default parent if not set in client config - if UseV2 { - return NewCpusetManagerV2(parent, logger.Named("cpuset.v2")) + switch { + case UseV2: + return NewCpusetManagerV2(parent, reservable, logger.Named("cpuset.v2")) + default: + return NewCpusetManagerV1(parent, reservable, logger.Named("cpuset.v1")) } - return NewCpusetManagerV1(parent, logger.Named("cpuset.v1")) } // GetCPUsFromCgroup gets the effective cpuset value for the given cgroup. diff --git a/client/lib/cgutil/cgutil_linux_test.go b/client/lib/cgutil/cgutil_linux_test.go index ed3ae87bd85..b57f79cfeb1 100644 --- a/client/lib/cgutil/cgutil_linux_test.go +++ b/client/lib/cgutil/cgutil_linux_test.go @@ -13,6 +13,7 @@ import ( "github.com/hashicorp/nomad/helper/uuid" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fs2" + "github.com/shoenig/test/must" "github.com/stretchr/testify/require" ) @@ -58,19 +59,21 @@ func TestUtil_CreateCPUSetManager(t *testing.T) { t.Run("v1", func(t *testing.T) { testutil.CgroupsCompatibleV1(t) parent := "/" + uuid.Short() - manager := CreateCPUSetManager(parent, logger) - err := manager.Init([]uint16{0}) - require.NoError(t, err) - require.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent))) + manager := CreateCPUSetManager(parent, []uint16{0}, logger) + manager.Init() + _, ok := manager.(*cpusetManagerV1) + must.True(t, ok) + must.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent))) }) t.Run("v2", func(t *testing.T) { testutil.CgroupsCompatibleV2(t) parent := uuid.Short() + ".slice" - manager := CreateCPUSetManager(parent, logger) - err := manager.Init([]uint16{0}) - require.NoError(t, err) - require.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent))) + manager := CreateCPUSetManager(parent, []uint16{0}, logger) + manager.Init() + _, ok := manager.(*cpusetManagerV2) + must.True(t, ok) + must.NoError(t, cgroups.RemovePath(filepath.Join(CgroupRoot, parent))) }) } diff --git a/client/lib/cgutil/cgutil_noop.go b/client/lib/cgutil/cgutil_noop.go index 91a35b14492..89c86c10984 100644 --- a/client/lib/cgutil/cgutil_noop.go +++ b/client/lib/cgutil/cgutil_noop.go @@ -17,7 +17,7 @@ const ( var UseV2 = false // CreateCPUSetManager creates a no-op CpusetManager for non-Linux operating systems. -func CreateCPUSetManager(string, hclog.Logger) CpusetManager { +func CreateCPUSetManager(string, []uint16, hclog.Logger) CpusetManager { return new(NoopCpusetManager) } diff --git a/client/lib/cgutil/cpuset_manager.go b/client/lib/cgutil/cpuset_manager.go index 7d16c752f84..b3e56fa6050 100644 --- a/client/lib/cgutil/cpuset_manager.go +++ b/client/lib/cgutil/cpuset_manager.go @@ -18,10 +18,9 @@ const ( // CpusetManager is used to setup cpuset cgroups for each task. type CpusetManager interface { - // Init should be called with the initial set of reservable cores before any - // allocations are managed. Ensures the parent cgroup exists and proper permissions - // are available for managing cgroups. - Init([]uint16) error + // Init should be called before the client starts running allocations. This + // is where the cpuset manager should start doing background operations. + Init() // AddAlloc adds an allocation to the manager AddAlloc(alloc *structs.Allocation) @@ -36,8 +35,7 @@ type CpusetManager interface { type NoopCpusetManager struct{} -func (n NoopCpusetManager) Init([]uint16) error { - return nil +func (n NoopCpusetManager) Init() { } func (n NoopCpusetManager) AddAlloc(alloc *structs.Allocation) { diff --git a/client/lib/cgutil/cpuset_manager_v1.go b/client/lib/cgutil/cpuset_manager_v1.go index f0fa3252746..1a316324ae6 100644 --- a/client/lib/cgutil/cpuset_manager_v1.go +++ b/client/lib/cgutil/cpuset_manager_v1.go @@ -4,8 +4,8 @@ package cgutil import ( "context" + "errors" "fmt" - "io/ioutil" "os" "path/filepath" "strings" @@ -29,14 +29,54 @@ const ( ) // NewCpusetManagerV1 creates a CpusetManager compatible with cgroups.v1 -func NewCpusetManagerV1(cgroupParent string, logger hclog.Logger) CpusetManager { +func NewCpusetManagerV1(cgroupParent string, _ []uint16, logger hclog.Logger) CpusetManager { if cgroupParent == "" { cgroupParent = DefaultCgroupV1Parent } + + cgroupParentPath, err := GetCgroupPathHelperV1("cpuset", cgroupParent) + if err != nil { + logger.Warn("failed to get cgroup path; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + + // ensures that shared cpuset exists and that the cpuset values are copied from the parent if created + if err = cpusetEnsureParentV1(filepath.Join(cgroupParentPath, SharedCpusetCgroupName)); err != nil { + logger.Warn("failed to ensure cgroup parent exists; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + + parentCpus, parentMems, err := getCpusetSubsystemSettingsV1(cgroupParentPath) + if err != nil { + logger.Warn("failed to detect parent cpuset settings; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + + parentCpuset, err := cpuset.Parse(parentCpus) + if err != nil { + logger.Warn("failed to parse parent cpuset.cpus setting; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + + // ensure the reserved cpuset exists, but only copy the mems from the parent if creating the cgroup + if err = os.Mkdir(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), 0755); err != nil { + if !errors.Is(err, os.ErrExist) { + logger.Warn("failed to ensure reserved cpuset.cpus interface exists; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + } + + if err = cgroups.WriteFile(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), "cpuset.mems", parentMems); err != nil { + logger.Warn("failed to ensure reserved cpuset.mems interface exists; disable cpuset management", "error", err) + return new(NoopCpusetManager) + } + return &cpusetManagerV1{ - cgroupParent: cgroupParent, - cgroupInfo: map[string]allocTaskCgroupInfo{}, - logger: logger, + parentCpuset: parentCpuset, + cgroupParent: cgroupParent, + cgroupParentPath: cgroupParentPath, + cgroupInfo: map[string]allocTaskCgroupInfo{}, + logger: logger, } } @@ -140,48 +180,11 @@ type allocTaskCgroupInfo map[string]*TaskCgroupInfo // Init checks that the cgroup parent and expected child cgroups have been created // If the cgroup parent is set to /nomad then this will ensure that the /nomad/shared // cgroup is initialized. -func (c *cpusetManagerV1) Init(_ []uint16) error { - cgroupParentPath, err := GetCgroupPathHelperV1("cpuset", c.cgroupParent) - if err != nil { - return err - } - c.cgroupParentPath = cgroupParentPath - - // ensures that shared cpuset exists and that the cpuset values are copied from the parent if created - if err := cpusetEnsureParentV1(filepath.Join(cgroupParentPath, SharedCpusetCgroupName)); err != nil { - return err - } - - parentCpus, parentMems, err := getCpusetSubsystemSettingsV1(cgroupParentPath) - if err != nil { - return fmt.Errorf("failed to detect parent cpuset settings: %v", err) - } - c.parentCpuset, err = cpuset.Parse(parentCpus) - if err != nil { - return fmt.Errorf("failed to parse parent cpuset.cpus setting: %v", err) - } - - // ensure the reserved cpuset exists, but only copy the mems from the parent if creating the cgroup - if err := os.Mkdir(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), 0755); err == nil { - // cgroup created, leave cpuset.cpus empty but copy cpuset.mems from parent - if err != nil { - return err - } - } else if !os.IsExist(err) { - return err - } - - if err := cgroups.WriteFile(filepath.Join(cgroupParentPath, ReservedCpusetCgroupName), "cpuset.mems", parentMems); err != nil { - return err - } - +func (c *cpusetManagerV1) Init() { c.doneCh = make(chan struct{}) c.signalCh = make(chan struct{}) - c.logger.Info("initialized cpuset cgroup manager", "parent", c.cgroupParent, "cpuset", c.parentCpuset.String()) - go c.reconcileLoop() - return nil } func (c *cpusetManagerV1) reconcileLoop() { @@ -223,7 +226,7 @@ func (c *cpusetManagerV1) reconcileCpusets() { } // look for reserved cpusets which we don't know about and remove - files, err := ioutil.ReadDir(c.reservedCpusetPath()) + files, err := os.ReadDir(c.reservedCpusetPath()) if err != nil { c.logger.Error("failed to list files in reserved cgroup path during reconciliation", "path", c.reservedCpusetPath(), "error", err) } @@ -340,13 +343,6 @@ func getCPUsFromCgroupV1(group string) ([]uint16, error) { return stats.CPUSetStats.CPUs, nil } -func getParentV1(parent string) string { - if parent == "" { - return DefaultCgroupV1Parent - } - return parent -} - // cpusetEnsureParentV1 makes sure that the parent directories of current // are created and populated with the proper cpus and mems files copied // from their respective parent. It does that recursively, starting from diff --git a/client/lib/cgutil/cpuset_manager_v1_test.go b/client/lib/cgutil/cpuset_manager_v1_test.go index 9537f2f87a8..f342c270450 100644 --- a/client/lib/cgutil/cpuset_manager_v1_test.go +++ b/client/lib/cgutil/cpuset_manager_v1_test.go @@ -3,7 +3,7 @@ package cgutil import ( - "io/ioutil" + "os" "path/filepath" "testing" @@ -16,7 +16,7 @@ import ( "github.com/stretchr/testify/require" ) -func tmpCpusetManagerV1(t *testing.T) (manager *cpusetManagerV1, cleanup func()) { +func tmpCpusetManagerV1(t *testing.T) (*cpusetManagerV1, func()) { mount, err := FindCgroupMountpointDir() if err != nil || mount == "" { t.Skipf("Failed to find cgroup mount: %v %v", mount, err) @@ -25,15 +25,10 @@ func tmpCpusetManagerV1(t *testing.T) (manager *cpusetManagerV1, cleanup func()) parent := "/gotest-" + uuid.Short() require.NoError(t, cpusetEnsureParentV1(parent)) - manager = &cpusetManagerV1{ - cgroupParent: parent, - cgroupInfo: map[string]allocTaskCgroupInfo{}, - logger: testlog.HCLogger(t), - } - parentPath, err := GetCgroupPathHelperV1("cpuset", parent) require.NoError(t, err) + manager := NewCpusetManagerV1(parent, nil, testlog.HCLogger(t)).(*cpusetManagerV1) return manager, func() { require.NoError(t, cgroups.RemovePaths(map[string]string{"cpuset": parentPath})) } } @@ -42,11 +37,11 @@ func TestCpusetManager_V1_Init(t *testing.T) { manager, cleanup := tmpCpusetManagerV1(t) defer cleanup() - require.NoError(t, manager.Init(nil)) + manager.Init() require.DirExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName)) require.FileExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) - sharedCpusRaw, err := ioutil.ReadFile(filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) + sharedCpusRaw, err := os.ReadFile(filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) require.NoError(t, err) sharedCpus, err := cpuset.Parse(string(sharedCpusRaw)) require.NoError(t, err) @@ -59,7 +54,7 @@ func TestCpusetManager_V1_AddAlloc_single(t *testing.T) { manager, cleanup := tmpCpusetManagerV1(t) defer cleanup() - require.NoError(t, manager.Init(nil)) + manager.Init() alloc := mock.Alloc() // reserve just one core (the 0th core, which probably exists) @@ -73,7 +68,7 @@ func TestCpusetManager_V1_AddAlloc_single(t *testing.T) { // actual contents of shared group depends on machine core count require.DirExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName)) require.FileExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) - sharedCpusRaw, err := ioutil.ReadFile(filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) + sharedCpusRaw, err := os.ReadFile(filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) require.NoError(t, err) sharedCpus, err := cpuset.Parse(string(sharedCpusRaw)) require.NoError(t, err) @@ -82,7 +77,7 @@ func TestCpusetManager_V1_AddAlloc_single(t *testing.T) { // check that the 0th core is allocated to reserved cgroup require.DirExists(t, filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName)) - reservedCpusRaw, err := ioutil.ReadFile(filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName, "cpuset.cpus")) + reservedCpusRaw, err := os.ReadFile(filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName, "cpuset.cpus")) require.NoError(t, err) reservedCpus, err := cpuset.Parse(string(reservedCpusRaw)) require.NoError(t, err) @@ -96,7 +91,7 @@ func TestCpusetManager_V1_AddAlloc_single(t *testing.T) { require.True(t, ok) require.DirExists(t, taskInfo.CgroupPath) - taskCpusRaw, err := ioutil.ReadFile(filepath.Join(taskInfo.CgroupPath, "cpuset.cpus")) + taskCpusRaw, err := os.ReadFile(filepath.Join(taskInfo.CgroupPath, "cpuset.cpus")) require.NoError(t, err) taskCpus, err := cpuset.Parse(string(taskCpusRaw)) require.NoError(t, err) @@ -114,7 +109,7 @@ func TestCpusetManager_V1_RemoveAlloc(t *testing.T) { manager, cleanup := tmpCpusetManagerV1(t) defer cleanup() - require.NoError(t, manager.Init(nil)) + manager.Init() alloc1 := mock.Alloc() alloc1Cpuset := cpuset.New(manager.parentCpuset.ToSlice()[0]) @@ -130,14 +125,14 @@ func TestCpusetManager_V1_RemoveAlloc(t *testing.T) { manager.reconcileCpusets() // shared cpuset should not include any expected cores - sharedCpusRaw, err := ioutil.ReadFile(filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) + sharedCpusRaw, err := os.ReadFile(filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) require.NoError(t, err) sharedCpus, err := cpuset.Parse(string(sharedCpusRaw)) require.NoError(t, err) require.False(t, sharedCpus.ContainsAny(alloc1Cpuset.Union(alloc2Cpuset))) // reserved cpuset should equal the expected cpus - reservedCpusRaw, err := ioutil.ReadFile(filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName, "cpuset.cpus")) + reservedCpusRaw, err := os.ReadFile(filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName, "cpuset.cpus")) require.NoError(t, err) reservedCpus, err := cpuset.Parse(string(reservedCpusRaw)) require.NoError(t, err) @@ -152,7 +147,7 @@ func TestCpusetManager_V1_RemoveAlloc(t *testing.T) { require.NoDirExists(t, alloc1TaskPath) // shared cpuset should now include alloc1's cores - sharedCpusRaw, err = ioutil.ReadFile(filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) + sharedCpusRaw, err = os.ReadFile(filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) require.NoError(t, err) sharedCpus, err = cpuset.Parse(string(sharedCpusRaw)) require.NoError(t, err) @@ -160,7 +155,7 @@ func TestCpusetManager_V1_RemoveAlloc(t *testing.T) { require.True(t, sharedCpus.IsSupersetOf(alloc1Cpuset)) // reserved cpuset should only include alloc2's cores - reservedCpusRaw, err = ioutil.ReadFile(filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName, "cpuset.cpus")) + reservedCpusRaw, err = os.ReadFile(filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName, "cpuset.cpus")) require.NoError(t, err) reservedCpus, err = cpuset.Parse(string(reservedCpusRaw)) require.NoError(t, err) diff --git a/client/lib/cgutil/cpuset_manager_v2.go b/client/lib/cgutil/cpuset_manager_v2.go index 74a8a4f4f74..5f562e9bf35 100644 --- a/client/lib/cgutil/cpuset_manager_v2.go +++ b/client/lib/cgutil/cpuset_manager_v2.go @@ -11,6 +11,7 @@ import ( "time" "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-set" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/lib/cpuset" "github.com/hashicorp/nomad/nomad/structs" @@ -56,24 +57,67 @@ type cpusetManagerV2 struct { isolating map[identity]cpuset.CPUSet // isolating tasks using cores from the pool + reserved cores } -func NewCpusetManagerV2(parent string, logger hclog.Logger) CpusetManager { +func NewCpusetManagerV2(parent string, reservable []uint16, logger hclog.Logger) CpusetManager { + if err := minimumRootControllers(); err != nil { + logger.Error("failed to enabled minimum set of cgroup controllers; disabling cpuset management", "error", err) + return new(NoopCpusetManager) + } + + parentAbs := filepath.Join(CgroupRoot, parent) + if err := os.MkdirAll(parentAbs, 0o755); err != nil { + logger.Error("failed to ensure nomad parent cgroup exists; disabling cpuset management", "error", err) + return new(NoopCpusetManager) + } + + if len(reservable) == 0 { + // read from group + if cpus, err := GetCPUsFromCgroup(parent); err != nil { + logger.Error("failed to lookup cpus from parent cgroup; disabling cpuset management", "error", err) + return new(NoopCpusetManager) + } else { + reservable = cpus + } + } + return &cpusetManagerV2{ + initial: cpuset.New(reservable...), parent: parent, - parentAbs: filepath.Join(CgroupRoot, parent), + parentAbs: parentAbs, logger: logger, sharing: make(map[identity]nothing), isolating: make(map[identity]cpuset.CPUSet), } } -func (c *cpusetManagerV2) Init(cores []uint16) error { - c.logger.Debug("initializing with", "cores", cores) - if err := c.ensureParent(); err != nil { - c.logger.Error("failed to init cpuset manager", "err", err) +// minimumControllers sets the minimum set of required controllers on the +// /sys/fs/cgroup/cgroup.subtree_control file - ensuring [cpuset, cpu, io, memory, pids] +// are enabled. +func minimumRootControllers() error { + e := new(editor) + s, err := e.read("cgroup.subtree_control") + if err != nil { return err } - c.initial = cpuset.New(cores...) - return nil + + required := set.From[string]([]string{"cpuset", "cpu", "io", "memory", "pids"}) + enabled := set.From[string](strings.Fields(s)) + needed := required.Difference(enabled) + + if needed.Size() == 0 { + return nil // already sufficient + } + + sb := new(strings.Builder) + for _, controller := range needed.List() { + sb.WriteString("+" + controller + " ") + } + + activation := strings.TrimSpace(sb.String()) + return e.write("cgroup.subtree_control", activation) +} + +func (c *cpusetManagerV2) Init() { + c.logger.Debug("initializing with", "cores", c.initial) } func (c *cpusetManagerV2) AddAlloc(alloc *structs.Allocation) { @@ -229,7 +273,7 @@ func (c *cpusetManagerV2) cleanup() { } } -//pathOf returns the absolute path to a task with identity id. +// pathOf returns the absolute path to a task with identity id. func (c *cpusetManagerV2) pathOf(id identity) string { return filepath.Join(c.parentAbs, makeScope(id)) } @@ -285,22 +329,6 @@ func (c *cpusetManagerV2) write(id identity, set cpuset.CPUSet) { } } -// ensureParentCgroup will create parent cgroup for the manager if it does not -// exist yet. No PIDs are added to any cgroup yet. -func (c *cpusetManagerV2) ensureParent() error { - mgr, err := fs2.NewManager(nil, c.parentAbs, rootless) - if err != nil { - return err - } - - if err = mgr.Apply(CreationPID); err != nil { - return err - } - - c.logger.Trace("establish cgroup hierarchy", "parent", c.parent) - return nil -} - // fromRoot returns the joined filepath of group on the CgroupRoot func fromRoot(group string) string { return filepath.Join(CgroupRoot, group) @@ -320,12 +348,3 @@ func getCPUsFromCgroupV2(group string) ([]uint16, error) { } return set.ToSlice(), nil } - -// getParentV2 returns parent if set, otherwise the default name of Nomad's -// parent cgroup (i.e. nomad.slice). -func getParentV2(parent string) string { - if parent == "" { - return DefaultCgroupParentV2 - } - return parent -} diff --git a/client/lib/cgutil/cpuset_manager_v2_test.go b/client/lib/cgutil/cpuset_manager_v2_test.go index a6acc50e76f..ac5489aa01c 100644 --- a/client/lib/cgutil/cpuset_manager_v2_test.go +++ b/client/lib/cgutil/cpuset_manager_v2_test.go @@ -32,8 +32,8 @@ func TestCpusetManager_V2_AddAlloc(t *testing.T) { cleanup(t, parent) // setup the cpuset manager - manager := NewCpusetManagerV2(parent, logger) - require.NoError(t, manager.Init(systemCores)) + manager := NewCpusetManagerV2(parent, systemCores, logger) + manager.Init() // add our first alloc, isolating 1 core t.Run("first", func(t *testing.T) { @@ -72,8 +72,8 @@ func TestCpusetManager_V2_RemoveAlloc(t *testing.T) { cleanup(t, parent) // setup the cpuset manager - manager := NewCpusetManagerV2(parent, logger) - require.NoError(t, manager.Init(systemCores)) + manager := NewCpusetManagerV2(parent, systemCores, logger) + manager.Init() // alloc1 gets core 0 alloc1 := mock.Alloc() diff --git a/client/lib/cgutil/editor.go b/client/lib/cgutil/editor.go new file mode 100644 index 00000000000..4f354b98eee --- /dev/null +++ b/client/lib/cgutil/editor.go @@ -0,0 +1,27 @@ +//go:build linux + +package cgutil + +import ( + "os" + "path/filepath" + "strings" +) + +// editor provides a simple mechanism for reading and writing cgroup files. +type editor struct { + fromRoot string +} + +func (e *editor) path(file string) string { + return filepath.Join(CgroupRoot, e.fromRoot, file) +} + +func (e *editor) write(file, content string) error { + return os.WriteFile(e.path(file), []byte(content), 0o644) +} + +func (e *editor) read(file string) (string, error) { + b, err := os.ReadFile(e.path(file)) + return strings.TrimSpace(string(b)), err +} diff --git a/client/lib/cgutil/editor_test.go b/client/lib/cgutil/editor_test.go new file mode 100644 index 00000000000..ad9a5c3194c --- /dev/null +++ b/client/lib/cgutil/editor_test.go @@ -0,0 +1,39 @@ +//go:build linux + +package cgutil + +import ( + "os" + "path/filepath" + "testing" + + "github.com/hashicorp/nomad/client/testutil" + "github.com/hashicorp/nomad/helper/uuid" + "github.com/shoenig/test/must" +) + +func createCG(t *testing.T) (string, func()) { + name := uuid.Short() + ".scope" + path := filepath.Join(CgroupRoot, name) + err := os.Mkdir(path, 0o755) + must.NoError(t, err) + + return name, func() { + _ = os.Remove(path) + } +} + +func TestCG_editor(t *testing.T) { + testutil.CgroupsCompatibleV2(t) + + cg, rm := createCG(t) + t.Cleanup(rm) + + edits := &editor{cg} + writeErr := edits.write("cpu.weight.nice", "13") + must.NoError(t, writeErr) + + b, readErr := edits.read("cpu.weight.nice") + must.NoError(t, readErr) + must.Eq(t, "13", b) +} diff --git a/client/lib/cgutil/group_killer.go b/client/lib/cgutil/group_killer.go index 9eeae7fefee..cb72c440aae 100644 --- a/client/lib/cgutil/group_killer.go +++ b/client/lib/cgutil/group_killer.go @@ -6,13 +6,12 @@ import ( "errors" "fmt" "os" - "path/filepath" + "strconv" "time" "github.com/hashicorp/go-hclog" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fs" - "github.com/opencontainers/runc/libcontainer/cgroups/fs2" "github.com/opencontainers/runc/libcontainer/configs" ) @@ -96,55 +95,25 @@ func (d *killer) v1(cgroup *configs.Cgroup) error { } func (d *killer) v2(cgroup *configs.Cgroup) error { - if cgroup == nil { + if cgroup == nil || cgroup.Path == "" { return errors.New("missing cgroup") } - path := filepath.Join(CgroupRoot, cgroup.Path) - - existingPIDs, err := cgroups.GetPids(path) - if err != nil { - return fmt.Errorf("failed to determine pids in cgroup: %w", err) - } - - d.logger.Trace("killing processes", "cgroup_path", path, "cgroup_version", "v2", "executor_pid", d.pid, "existing_pids", existingPIDs) - - mgr, err := fs2.NewManager(cgroup, "", rootless) - if err != nil { - return fmt.Errorf("failed to create v2 cgroup manager: %w", err) - } - - // move executor PID into the root init.scope so we can kill the task pids - // without killing the executor (which is the process running this code, doing - // the killing) - init, err := fs2.NewManager(nil, filepath.Join(CgroupRoot, "init.scope"), rootless) - if err != nil { - return fmt.Errorf("failed to create v2 init cgroup manager: %w", err) - } - if err = init.Apply(d.pid); err != nil { - return fmt.Errorf("failed to move executor pid into init.scope cgroup: %w", err) - } - - d.logger.Trace("move of executor pid into init.scope complete", "pid", d.pid) - - // ability to freeze the cgroup - freeze := func() { - _ = mgr.Freeze(configs.Frozen) - } - - // ability to thaw the cgroup - thaw := func() { - _ = mgr.Freeze(configs.Thawed) + // move executor (d.PID) into init.scope + editSelf := &editor{"init.scope"} + if err := editSelf.write("cgroup.procs", strconv.Itoa(d.pid)); err != nil { + return err } - // do the common kill logic - - if err = d.kill(path, freeze, thaw); err != nil { + // write "1" to cgroup.kill + editTask := &editor{cgroup.Path} + if err := editTask.write("cgroup.kill", "1"); err != nil { return err } - // remove the cgroup from disk - return mgr.Destroy() + // note: do NOT remove the cgroup from disk; leave that to the Client, at + // least until #14375 is implemented. + return nil } // kill is used to SIGKILL all processes in cgroup diff --git a/client/lib/fifo/fifo_test.go b/client/lib/fifo/fifo_test.go index bd397f7fa15..5c4ced17759 100644 --- a/client/lib/fifo/fifo_test.go +++ b/client/lib/fifo/fifo_test.go @@ -3,8 +3,6 @@ package fifo import ( "bytes" "io" - "io/ioutil" - "os" "path/filepath" "runtime" "sync" @@ -24,11 +22,7 @@ func TestFIFO(t *testing.T) { if runtime.GOOS == "windows" { path = "//./pipe/fifo" } else { - dir, err := ioutil.TempDir("", "") - require.NoError(err) - defer os.RemoveAll(dir) - - path = filepath.Join(dir, "fifo") + path = filepath.Join(t.TempDir(), "fifo") } readerOpenFn, err := CreateAndRead(path) @@ -88,11 +82,7 @@ func TestWriteClose(t *testing.T) { if runtime.GOOS == "windows" { path = "//./pipe/" + uuid.Generate()[:4] } else { - dir, err := ioutil.TempDir("", "") - require.NoError(err) - defer os.RemoveAll(dir) - - path = filepath.Join(dir, "fifo") + path = filepath.Join(t.TempDir(), "fifo") } readerOpenFn, err := CreateAndRead(path) diff --git a/client/logmon/logging/rotator.go b/client/logmon/logging/rotator.go index 010e316e212..2ddc96e47ca 100644 --- a/client/logmon/logging/rotator.go +++ b/client/logmon/logging/rotator.go @@ -4,7 +4,6 @@ import ( "bufio" "bytes" "fmt" - "io/ioutil" "os" "path/filepath" "sort" @@ -189,7 +188,7 @@ func (f *FileRotator) nextFile() error { // lastFile finds out the rotated file with the largest index in a path. func (f *FileRotator) lastFile() error { - finfos, err := ioutil.ReadDir(f.path) + finfos, err := os.ReadDir(f.path) if err != nil { return err } @@ -275,7 +274,7 @@ func (f *FileRotator) purgeOldFiles() { select { case <-f.purgeCh: var fIndexes []int - files, err := ioutil.ReadDir(f.path) + files, err := os.ReadDir(f.path) if err != nil { f.logger.Error("error getting directory listing", "err", err) return diff --git a/client/logmon/logging/rotator_test.go b/client/logmon/logging/rotator_test.go index da80925b8b3..0c9b41903f4 100644 --- a/client/logmon/logging/rotator_test.go +++ b/client/logmon/logging/rotator_test.go @@ -2,7 +2,6 @@ package logging import ( "fmt" - "io/ioutil" "math/rand" "os" "path/filepath" @@ -15,7 +14,6 @@ import ( ) var ( - pathPrefix = "logrotator" baseFileName = "redis.stdout" ) @@ -30,9 +28,7 @@ func TestFileRotator_IncorrectPath(t *testing.T) { func TestFileRotator_CreateNewFile(t *testing.T) { defer goleak.VerifyNone(t) - path, err := ioutil.TempDir("", pathPrefix) - require.NoError(t, err) - defer os.RemoveAll(path) + path := t.TempDir() fr, err := NewFileRotator(path, baseFileName, 10, 10, testlog.HCLogger(t)) require.NoError(t, err) @@ -45,9 +41,7 @@ func TestFileRotator_CreateNewFile(t *testing.T) { func TestFileRotator_OpenLastFile(t *testing.T) { defer goleak.VerifyNone(t) - path, err := ioutil.TempDir("", pathPrefix) - require.NoError(t, err) - defer os.RemoveAll(path) + path := t.TempDir() fname1 := filepath.Join(path, "redis.stdout.0") fname2 := filepath.Join(path, "redis.stdout.2") @@ -70,9 +64,7 @@ func TestFileRotator_OpenLastFile(t *testing.T) { func TestFileRotator_WriteToCurrentFile(t *testing.T) { defer goleak.VerifyNone(t) - path, err := ioutil.TempDir("", pathPrefix) - require.NoError(t, err) - defer os.RemoveAll(path) + path := t.TempDir() fname1 := filepath.Join(path, "redis.stdout.0") f1, err := os.Create(fname1) @@ -104,9 +96,7 @@ func TestFileRotator_WriteToCurrentFile(t *testing.T) { func TestFileRotator_RotateFiles(t *testing.T) { defer goleak.VerifyNone(t) - path, err := ioutil.TempDir("", pathPrefix) - require.NoError(t, err) - defer os.RemoveAll(path) + path := t.TempDir() fr, err := NewFileRotator(path, baseFileName, 10, 5, testlog.HCLogger(t)) require.NoError(t, err) @@ -149,9 +139,7 @@ func TestFileRotator_RotateFiles(t *testing.T) { func TestFileRotator_RotateFiles_Boundary(t *testing.T) { defer goleak.VerifyNone(t) - path, err := ioutil.TempDir("", pathPrefix) - require.NoError(t, err) - defer os.RemoveAll(path) + path := t.TempDir() fr, err := NewFileRotator(path, baseFileName, 10, 5, testlog.HCLogger(t)) require.NoError(t, err) @@ -197,12 +185,10 @@ func TestFileRotator_RotateFiles_Boundary(t *testing.T) { func TestFileRotator_WriteRemaining(t *testing.T) { defer goleak.VerifyNone(t) - path, err := ioutil.TempDir("", pathPrefix) - require.NoError(t, err) - defer os.RemoveAll(path) + path := t.TempDir() fname1 := filepath.Join(path, "redis.stdout.0") - err = ioutil.WriteFile(fname1, []byte("abcd"), 0600) + err := os.WriteFile(fname1, []byte("abcd"), 0600) require.NoError(t, err) fr, err := NewFileRotator(path, baseFileName, 10, 5, testlog.HCLogger(t)) @@ -259,9 +245,7 @@ func TestFileRotator_WriteRemaining(t *testing.T) { func TestFileRotator_PurgeOldFiles(t *testing.T) { defer goleak.VerifyNone(t) - path, err := ioutil.TempDir("", pathPrefix) - require.NoError(t, err) - defer os.RemoveAll(path) + path := t.TempDir() fr, err := NewFileRotator(path, baseFileName, 2, 2, testlog.HCLogger(t)) require.NoError(t, err) @@ -273,7 +257,7 @@ func TestFileRotator_PurgeOldFiles(t *testing.T) { require.Equal(t, len(str), nw) testutil.WaitForResult(func() (bool, error) { - f, err := ioutil.ReadDir(path) + f, err := os.ReadDir(path) if err != nil { return false, fmt.Errorf("failed to read dir %v: %w", path, err) } @@ -298,9 +282,7 @@ func BenchmarkRotator(b *testing.B) { } func benchmarkRotatorWithInputSize(size int, b *testing.B) { - path, err := ioutil.TempDir("", pathPrefix) - require.NoError(b, err) - defer os.RemoveAll(path) + path := b.TempDir() fr, err := NewFileRotator(path, baseFileName, 5, 1024*1024, testlog.HCLogger(b)) require.NoError(b, err) diff --git a/client/logmon/logmon_test.go b/client/logmon/logmon_test.go index d8481cb7e06..05be035c325 100644 --- a/client/logmon/logmon_test.go +++ b/client/logmon/logmon_test.go @@ -3,7 +3,6 @@ package logmon import ( "crypto/rand" "fmt" - "io/ioutil" "os" "path/filepath" "runtime" @@ -23,9 +22,7 @@ func TestLogmon_Start_rotate(t *testing.T) { require := require.New(t) var stdoutFifoPath, stderrFifoPath string - dir, err := ioutil.TempDir("", "nomadtest") - require.NoError(err) - defer os.RemoveAll(dir) + dir := t.TempDir() if runtime.GOOS == "windows" { stdoutFifoPath = "//./pipe/test-rotate.stdout" @@ -89,9 +86,7 @@ func TestLogmon_Start_restart_flusheslogs(t *testing.T) { require := require.New(t) var stdoutFifoPath, stderrFifoPath string - dir, err := ioutil.TempDir("", "nomadtest") - require.NoError(err) - defer os.RemoveAll(dir) + dir := t.TempDir() if runtime.GOOS == "windows" { stdoutFifoPath = "//./pipe/test-restart.stdout" @@ -126,7 +121,7 @@ func TestLogmon_Start_restart_flusheslogs(t *testing.T) { require.NoError(err) testutil.WaitForResult(func() (bool, error) { - raw, err := ioutil.ReadFile(filepath.Join(dir, "stdout.0")) + raw, err := os.ReadFile(filepath.Join(dir, "stdout.0")) if err != nil { return false, err } @@ -155,7 +150,7 @@ func TestLogmon_Start_restart_flusheslogs(t *testing.T) { require.NoError(err) testutil.WaitForResult(func() (bool, error) { - raw, err := ioutil.ReadFile(filepath.Join(dir, "stdout.0")) + raw, err := os.ReadFile(filepath.Join(dir, "stdout.0")) if err != nil { return false, err } @@ -175,7 +170,7 @@ func TestLogmon_Start_restart_flusheslogs(t *testing.T) { _, err = stdout.Write([]byte("st\n")) require.NoError(err) testutil.WaitForResult(func() (bool, error) { - raw, err := ioutil.ReadFile(filepath.Join(dir, "stdout.0")) + raw, err := os.ReadFile(filepath.Join(dir, "stdout.0")) if err != nil { return false, err } @@ -194,9 +189,7 @@ func TestLogmon_Start_restart(t *testing.T) { require := require.New(t) var stdoutFifoPath, stderrFifoPath string - dir, err := ioutil.TempDir("", "nomadtest") - require.NoError(err) - defer os.RemoveAll(dir) + dir := t.TempDir() if runtime.GOOS == "windows" { stdoutFifoPath = "//./pipe/test-restart.stdout" @@ -220,6 +213,9 @@ func TestLogmon_Start_restart(t *testing.T) { impl, ok := lm.(*logmonImpl) require.True(ok) require.NoError(lm.Start(cfg)) + t.Cleanup(func() { + require.NoError(lm.Stop()) + }) stdout, err := fifo.OpenWriter(stdoutFifoPath) require.NoError(err) @@ -231,7 +227,7 @@ func TestLogmon_Start_restart(t *testing.T) { require.NoError(err) testutil.WaitForResult(func() (bool, error) { - raw, err := ioutil.ReadFile(filepath.Join(dir, "stdout.0")) + raw, err := os.ReadFile(filepath.Join(dir, "stdout.0")) if err != nil { return false, err } @@ -256,13 +252,20 @@ func TestLogmon_Start_restart(t *testing.T) { stdout, err = fifo.OpenWriter(stdoutFifoPath) require.NoError(err) + t.Cleanup(func() { + require.NoError(stdout.Close()) + }) + stderr, err = fifo.OpenWriter(stderrFifoPath) require.NoError(err) + t.Cleanup(func() { + require.NoError(stderr.Close()) + }) _, err = stdout.Write([]byte("test\n")) require.NoError(err) testutil.WaitForResult(func() (bool, error) { - raw, err := ioutil.ReadFile(filepath.Join(dir, "stdout.0")) + raw, err := os.ReadFile(filepath.Join(dir, "stdout.0")) if err != nil { return false, err } diff --git a/client/logmon/plugin.go b/client/logmon/plugin.go index 5d97b751d10..a21777357cb 100644 --- a/client/logmon/plugin.go +++ b/client/logmon/plugin.go @@ -5,22 +5,27 @@ import ( "os" "os/exec" - hclog "github.com/hashicorp/go-hclog" - plugin "github.com/hashicorp/go-plugin" + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-plugin" "github.com/hashicorp/nomad/client/logmon/proto" "github.com/hashicorp/nomad/plugins/base" "google.golang.org/grpc" ) +var bin = getBin() + +func getBin() string { + b, err := os.Executable() + if err != nil { + panic(err) + } + return b +} + // LaunchLogMon launches a new logmon or reattaches to an existing one. // TODO: Integrate with base plugin loader func LaunchLogMon(logger hclog.Logger, reattachConfig *plugin.ReattachConfig) (LogMon, *plugin.Client, error) { logger = logger.Named("logmon") - bin, err := os.Executable() - if err != nil { - return nil, nil, err - } - conf := &plugin.ClientConfig{ HandshakeConfig: base.Handshake, Plugins: map[string]plugin.Plugin{ diff --git a/client/node_updater.go b/client/node_updater.go index 1be3e15d418..1b341ee7c01 100644 --- a/client/node_updater.go +++ b/client/node_updater.go @@ -41,18 +41,20 @@ SEND_BATCH: c.configLock.Lock() defer c.configLock.Unlock() + newConfig := c.config.Copy() + // csi updates var csiChanged bool c.batchNodeUpdates.batchCSIUpdates(func(name string, info *structs.CSIInfo) { - if c.updateNodeFromCSIControllerLocked(name, info) { - if c.config.Node.CSIControllerPlugins[name].UpdateTime.IsZero() { - c.config.Node.CSIControllerPlugins[name].UpdateTime = time.Now() + if c.updateNodeFromCSIControllerLocked(name, info, newConfig.Node) { + if newConfig.Node.CSIControllerPlugins[name].UpdateTime.IsZero() { + newConfig.Node.CSIControllerPlugins[name].UpdateTime = time.Now() } csiChanged = true } - if c.updateNodeFromCSINodeLocked(name, info) { - if c.config.Node.CSINodePlugins[name].UpdateTime.IsZero() { - c.config.Node.CSINodePlugins[name].UpdateTime = time.Now() + if c.updateNodeFromCSINodeLocked(name, info, newConfig.Node) { + if newConfig.Node.CSINodePlugins[name].UpdateTime.IsZero() { + newConfig.Node.CSINodePlugins[name].UpdateTime = time.Now() } csiChanged = true } @@ -61,10 +63,10 @@ SEND_BATCH: // driver node updates var driverChanged bool c.batchNodeUpdates.batchDriverUpdates(func(driver string, info *structs.DriverInfo) { - if c.updateNodeFromDriverLocked(driver, info) { - c.config.Node.Drivers[driver] = info - if c.config.Node.Drivers[driver].UpdateTime.IsZero() { - c.config.Node.Drivers[driver].UpdateTime = time.Now() + if c.applyNodeUpdatesFromDriver(driver, info, newConfig.Node) { + newConfig.Node.Drivers[driver] = info + if newConfig.Node.Drivers[driver].UpdateTime.IsZero() { + newConfig.Node.Drivers[driver].UpdateTime = time.Now() } driverChanged = true } @@ -74,13 +76,15 @@ SEND_BATCH: var devicesChanged bool c.batchNodeUpdates.batchDevicesUpdates(func(devices []*structs.NodeDeviceResource) { if c.updateNodeFromDevicesLocked(devices) { + newConfig.Node.NodeResources.Devices = devices devicesChanged = true } }) // only update the node if changes occurred if driverChanged || devicesChanged || csiChanged { - c.updateNodeLocked() + c.config = newConfig + c.updateNode() } close(c.fpInitialized) @@ -92,24 +96,27 @@ func (c *Client) updateNodeFromCSI(name string, info *structs.CSIInfo) { c.configLock.Lock() defer c.configLock.Unlock() + newConfig := c.config.Copy() + changed := false - if c.updateNodeFromCSIControllerLocked(name, info) { - if c.config.Node.CSIControllerPlugins[name].UpdateTime.IsZero() { - c.config.Node.CSIControllerPlugins[name].UpdateTime = time.Now() + if c.updateNodeFromCSIControllerLocked(name, info, newConfig.Node) { + if newConfig.Node.CSIControllerPlugins[name].UpdateTime.IsZero() { + newConfig.Node.CSIControllerPlugins[name].UpdateTime = time.Now() } changed = true } - if c.updateNodeFromCSINodeLocked(name, info) { - if c.config.Node.CSINodePlugins[name].UpdateTime.IsZero() { - c.config.Node.CSINodePlugins[name].UpdateTime = time.Now() + if c.updateNodeFromCSINodeLocked(name, info, newConfig.Node) { + if newConfig.Node.CSINodePlugins[name].UpdateTime.IsZero() { + newConfig.Node.CSINodePlugins[name].UpdateTime = time.Now() } changed = true } if changed { - c.updateNodeLocked() + c.config = newConfig + c.updateNode() } } @@ -119,7 +126,7 @@ func (c *Client) updateNodeFromCSI(name string, info *structs.CSIInfo) { // // It is safe to call for all CSI Updates, but will only perform changes when // a ControllerInfo field is present. -func (c *Client) updateNodeFromCSIControllerLocked(name string, info *structs.CSIInfo) bool { +func (c *Client) updateNodeFromCSIControllerLocked(name string, info *structs.CSIInfo, node *structs.Node) bool { var changed bool if info.ControllerInfo == nil { return false @@ -127,15 +134,15 @@ func (c *Client) updateNodeFromCSIControllerLocked(name string, info *structs.CS i := info.Copy() i.NodeInfo = nil - oldController, hadController := c.config.Node.CSIControllerPlugins[name] + oldController, hadController := node.CSIControllerPlugins[name] if !hadController { // If the controller info has not yet been set, do that here changed = true - c.config.Node.CSIControllerPlugins[name] = i + node.CSIControllerPlugins[name] = i } else { // The controller info has already been set, fix it up if !oldController.Equal(i) { - c.config.Node.CSIControllerPlugins[name] = i + node.CSIControllerPlugins[name] = i changed = true } @@ -162,7 +169,7 @@ func (c *Client) updateNodeFromCSIControllerLocked(name string, info *structs.CS // // It is safe to call for all CSI Updates, but will only perform changes when // a NodeInfo field is present. -func (c *Client) updateNodeFromCSINodeLocked(name string, info *structs.CSIInfo) bool { +func (c *Client) updateNodeFromCSINodeLocked(name string, info *structs.CSIInfo, node *structs.Node) bool { var changed bool if info.NodeInfo == nil { return false @@ -170,15 +177,15 @@ func (c *Client) updateNodeFromCSINodeLocked(name string, info *structs.CSIInfo) i := info.Copy() i.ControllerInfo = nil - oldNode, hadNode := c.config.Node.CSINodePlugins[name] + oldNode, hadNode := node.CSINodePlugins[name] if !hadNode { // If the Node info has not yet been set, do that here changed = true - c.config.Node.CSINodePlugins[name] = i + node.CSINodePlugins[name] = i } else { // The node info has already been set, fix it up if !oldNode.Equal(info) { - c.config.Node.CSINodePlugins[name] = i + node.CSINodePlugins[name] = i changed = true } @@ -205,30 +212,33 @@ func (c *Client) updateNodeFromDriver(name string, info *structs.DriverInfo) { c.configLock.Lock() defer c.configLock.Unlock() - if c.updateNodeFromDriverLocked(name, info) { - c.config.Node.Drivers[name] = info - if c.config.Node.Drivers[name].UpdateTime.IsZero() { - c.config.Node.Drivers[name].UpdateTime = time.Now() + newConfig := c.config.Copy() + + if c.applyNodeUpdatesFromDriver(name, info, newConfig.Node) { + newConfig.Node.Drivers[name] = info + if newConfig.Node.Drivers[name].UpdateTime.IsZero() { + newConfig.Node.Drivers[name].UpdateTime = time.Now() } - c.updateNodeLocked() + + c.config = newConfig + c.updateNode() } } -// updateNodeFromDriverLocked makes the changes to the node from a driver update -// but does not send the update to the server. c.configLock must be held before -// calling this func -func (c *Client) updateNodeFromDriverLocked(name string, info *structs.DriverInfo) bool { +// applyNodeUpdatesFromDriver applies changes to the passed in node. true is +// returned if the node has changed. +func (c *Client) applyNodeUpdatesFromDriver(name string, info *structs.DriverInfo, node *structs.Node) bool { var hasChanged bool - hadDriver := c.config.Node.Drivers[name] != nil + hadDriver := node.Drivers[name] != nil if !hadDriver { // If the driver info has not yet been set, do that here hasChanged = true for attrName, newVal := range info.Attributes { - c.config.Node.Attributes[attrName] = newVal + node.Attributes[attrName] = newVal } } else { - oldVal := c.config.Node.Drivers[name] + oldVal := node.Drivers[name] // The driver info has already been set, fix it up if oldVal.Detected != info.Detected { hasChanged = true @@ -247,16 +257,16 @@ func (c *Client) updateNodeFromDriverLocked(name string, info *structs.DriverInf } for attrName, newVal := range info.Attributes { - oldVal := c.config.Node.Drivers[name].Attributes[attrName] + oldVal := node.Drivers[name].Attributes[attrName] if oldVal == newVal { continue } hasChanged = true if newVal == "" { - delete(c.config.Node.Attributes, attrName) + delete(node.Attributes, attrName) } else { - c.config.Node.Attributes[attrName] = newVal + node.Attributes[attrName] = newVal } } } @@ -266,16 +276,14 @@ func (c *Client) updateNodeFromDriverLocked(name string, info *structs.DriverInf // their attributes as DriverInfo driverName := fmt.Sprintf("driver.%s", name) if info.Detected { - c.config.Node.Attributes[driverName] = "1" + node.Attributes[driverName] = "1" } else { - delete(c.config.Node.Attributes, driverName) + delete(node.Attributes, driverName) } return hasChanged } -// updateNodeFromFingerprint updates the node with the result of -// fingerprinting the node from the diff that was created func (c *Client) updateNodeFromDevices(devices []*structs.NodeDeviceResource) { c.configLock.Lock() defer c.configLock.Unlock() @@ -284,7 +292,7 @@ func (c *Client) updateNodeFromDevices(devices []*structs.NodeDeviceResource) { // dispatched task resources and not appropriate for expressing // node available device resources if c.updateNodeFromDevicesLocked(devices) { - c.updateNodeLocked() + c.updateNode() } } @@ -294,7 +302,9 @@ func (c *Client) updateNodeFromDevices(devices []*structs.NodeDeviceResource) { func (c *Client) updateNodeFromDevicesLocked(devices []*structs.NodeDeviceResource) bool { if !structs.DevicesEquals(c.config.Node.NodeResources.Devices, devices) { c.logger.Debug("new devices detected", "devices", len(devices)) - c.config.Node.NodeResources.Devices = devices + newConfig := c.config.Copy() + newConfig.Node.NodeResources.Devices = devices + c.config = newConfig return true } diff --git a/client/pluginmanager/csimanager/doc.go b/client/pluginmanager/csimanager/doc.go index 42400a09241..298bf18bf7a 100644 --- a/client/pluginmanager/csimanager/doc.go +++ b/client/pluginmanager/csimanager/doc.go @@ -1,15 +1,16 @@ -/** +/* +* csimanager manages locally running CSI Plugins on a Nomad host, and provides a few different interfaces. It provides: -- a pluginmanager.PluginManager implementation that is used to fingerprint and - heartbeat local node plugins -- (TODO) a csimanager.AttachmentWaiter implementation that can be used to wait for an - external CSIVolume to be attached to the node before returning -- (TODO) a csimanager.NodeController implementation that is used to manage the node-local - portions of the CSI specification, and encompassess volume staging/publishing -- (TODO) a csimanager.VolumeChecker implementation that can be used by hooks to ensure - their volumes are healthy(ish) + - a pluginmanager.PluginManager implementation that is used to fingerprint and + heartbeat local node plugins + - (TODO) a csimanager.AttachmentWaiter implementation that can be used to wait for an + external CSIVolume to be attached to the node before returning + - (TODO) a csimanager.NodeController implementation that is used to manage the node-local + portions of the CSI specification, and encompassess volume staging/publishing + - (TODO) a csimanager.VolumeChecker implementation that can be used by hooks to ensure + their volumes are healthy(ish) */ package csimanager diff --git a/client/pluginmanager/csimanager/fingerprint.go b/client/pluginmanager/csimanager/fingerprint.go index 981bb63c675..db10aac98ac 100644 --- a/client/pluginmanager/csimanager/fingerprint.go +++ b/client/pluginmanager/csimanager/fingerprint.go @@ -6,9 +6,9 @@ import ( "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/client/dynamicplugins" - "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/plugins/csi" + "golang.org/x/exp/maps" ) type pluginFingerprinter struct { @@ -181,6 +181,6 @@ func structCSITopologyFromCSITopology(a *csi.Topology) *structs.CSITopology { } return &structs.CSITopology{ - Segments: helper.CopyMapStringString(a.Segments), + Segments: maps.Clone(a.Segments), } } diff --git a/client/pluginmanager/csimanager/manager.go b/client/pluginmanager/csimanager/manager.go index a5b2f51cd44..178b0fe1513 100644 --- a/client/pluginmanager/csimanager/manager.go +++ b/client/pluginmanager/csimanager/manager.go @@ -149,7 +149,7 @@ func (c *csiManager) resyncPluginsFromRegistry(ptype string) { // handlePluginEvent syncs a single event against the plugin registry func (c *csiManager) handlePluginEvent(event *dynamicplugins.PluginUpdateEvent) { - if event == nil { + if event == nil || event.Info == nil { return } c.logger.Trace("dynamic plugin event", diff --git a/client/pluginmanager/csimanager/volume_test.go b/client/pluginmanager/csimanager/volume_test.go index 94ef2cf06b2..9650aae352e 100644 --- a/client/pluginmanager/csimanager/volume_test.go +++ b/client/pluginmanager/csimanager/volume_test.go @@ -3,7 +3,6 @@ package csimanager import ( "context" "errors" - "io/ioutil" "os" "runtime" "testing" @@ -18,13 +17,6 @@ import ( "github.com/stretchr/testify/require" ) -func tmpDir(t testing.TB) string { - t.Helper() - dir, err := ioutil.TempDir("", "nomad") - require.NoError(t, err) - return dir -} - func checkMountSupport() bool { path, err := os.Getwd() if err != nil { @@ -93,8 +85,7 @@ func TestVolumeManager_ensureStagingDir(t *testing.T) { } // Step 2: Test Setup - tmpPath := tmpDir(t) - defer os.RemoveAll(tmpPath) + tmpPath := t.TempDir() csiFake := &csifake.Client{} eventer := func(e *structs.NodeEvent) {} @@ -193,8 +184,7 @@ func TestVolumeManager_stageVolume(t *testing.T) { for _, tc := range cases { t.Run(tc.Name, func(t *testing.T) { - tmpPath := tmpDir(t) - defer os.RemoveAll(tmpPath) + tmpPath := t.TempDir() csiFake := &csifake.Client{} csiFake.NextNodeStageVolumeErr = tc.PluginErr @@ -252,8 +242,7 @@ func TestVolumeManager_unstageVolume(t *testing.T) { for _, tc := range cases { t.Run(tc.Name, func(t *testing.T) { - tmpPath := tmpDir(t) - defer os.RemoveAll(tmpPath) + tmpPath := t.TempDir() csiFake := &csifake.Client{} csiFake.NextNodeUnstageVolumeErr = tc.PluginErr @@ -376,8 +365,7 @@ func TestVolumeManager_publishVolume(t *testing.T) { for _, tc := range cases { t.Run(tc.Name, func(t *testing.T) { - tmpPath := tmpDir(t) - defer os.RemoveAll(tmpPath) + tmpPath := t.TempDir() csiFake := &csifake.Client{} csiFake.NextNodePublishVolumeErr = tc.PluginErr @@ -444,8 +432,7 @@ func TestVolumeManager_unpublishVolume(t *testing.T) { for _, tc := range cases { t.Run(tc.Name, func(t *testing.T) { - tmpPath := tmpDir(t) - defer os.RemoveAll(tmpPath) + tmpPath := t.TempDir() csiFake := &csifake.Client{} csiFake.NextNodeUnpublishVolumeErr = tc.PluginErr @@ -474,8 +461,7 @@ func TestVolumeManager_MountVolumeEvents(t *testing.T) { } ci.Parallel(t) - tmpPath := tmpDir(t) - defer os.RemoveAll(tmpPath) + tmpPath := t.TempDir() csiFake := &csifake.Client{} diff --git a/client/rpc.go b/client/rpc.go index 7d1dfb6e40f..54e2edec569 100644 --- a/client/rpc.go +++ b/client/rpc.go @@ -47,9 +47,11 @@ func (c *Client) StreamingRpcHandler(method string) (structs.StreamingRpcHandler // RPC is used to forward an RPC call to a nomad server, or fail if no servers. func (c *Client) RPC(method string, args interface{}, reply interface{}) error { + conf := c.GetConfig() + // Invoke the RPCHandler if it exists - if c.config.RPCHandler != nil { - return c.config.RPCHandler.RPC(method, args, reply) + if conf.RPCHandler != nil { + return conf.RPCHandler.RPC(method, args, reply) } // We will try to automatically retry requests that fail due to things like server unavailability @@ -60,7 +62,7 @@ func (c *Client) RPC(method string, args interface{}, reply interface{}) error { // to the leader they may also allow for an RPCHoldTimeout while waiting for leader election. // That's OK, we won't double up because we are using it here not as a sleep but // as a hint to give up - deadline = deadline.Add(c.config.RPCHoldTimeout) + deadline = deadline.Add(conf.RPCHoldTimeout) // If its a blocking query, allow the time specified by the request if info, ok := args.(structs.RPCInfo); ok { @@ -68,34 +70,37 @@ func (c *Client) RPC(method string, args interface{}, reply interface{}) error { } TRY: + var rpcErr error + server := c.servers.FindServer() if server == nil { - return noServersErr - } - - // Make the request. - rpcErr := c.connPool.RPC(c.Region(), server.Addr, method, args, reply) + rpcErr = noServersErr + } else { + // Make the request. + rpcErr = c.connPool.RPC(c.Region(), server.Addr, method, args, reply) - if rpcErr == nil { - c.fireRpcRetryWatcher() - return nil - } + if rpcErr == nil { + c.fireRpcRetryWatcher() + return nil + } - // If shutting down, exit without logging the error - select { - case <-c.shutdownCh: - return nil - default: - } + // If shutting down, exit without logging the error + select { + case <-c.shutdownCh: + return nil + default: + } - // Move off to another server, and see if we can retry. - c.rpcLogger.Error("error performing RPC to server", "error", rpcErr, "rpc", method, "server", server.Addr) - c.servers.NotifyFailedServer(server) + // Move off to another server, and see if we can retry. + c.rpcLogger.Error("error performing RPC to server", "error", rpcErr, "rpc", method, "server", server.Addr) + c.servers.NotifyFailedServer(server) - if !canRetry(args, rpcErr) { - c.rpcLogger.Error("error performing RPC to server which is not safe to automatically retry", "error", rpcErr, "rpc", method, "server", server.Addr) - return rpcErr + if !canRetry(args, rpcErr) { + c.rpcLogger.Error("error performing RPC to server which is not safe to automatically retry", "error", rpcErr, "rpc", method, "server", server.Addr) + return rpcErr + } } + if time.Now().After(deadline) { // Blocking queries are tricky. jitters and rpcholdtimes in multiple places can result in our server call taking longer than we wanted it to. For example: // a block time of 5s may easily turn into the server blocking for 10s since it applies its own RPCHoldTime. If the server dies at t=7s we still want to retry @@ -104,12 +109,12 @@ TRY: info.SetTimeToBlock(0) return c.RPC(method, args, reply) } - c.rpcLogger.Error("error performing RPC to server, deadline exceeded, cannot retry", "error", rpcErr, "rpc", method, "server", server.Addr) + c.rpcLogger.Error("error performing RPC to server, deadline exceeded, cannot retry", "error", rpcErr, "rpc", method) return rpcErr } // Wait to avoid thundering herd - timer, cancel := helper.NewSafeTimer(helper.RandomStagger(c.config.RPCHoldTimeout / structs.JitterFraction)) + timer, cancel := helper.NewSafeTimer(helper.RandomStagger(conf.RPCHoldTimeout / structs.JitterFraction)) defer cancel() select { diff --git a/client/serviceregistration/service_registration.go b/client/serviceregistration/service_registration.go index 4981ff1cfeb..e5a37c6182e 100644 --- a/client/serviceregistration/service_registration.go +++ b/client/serviceregistration/service_registration.go @@ -4,8 +4,8 @@ import ( "context" "github.com/hashicorp/consul/api" - "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/nomad/structs" + "golang.org/x/exp/maps" ) // Handler is the interface the Nomad Client uses to register, update and @@ -130,7 +130,7 @@ type ServiceRegistration struct { // services/checks registered in Consul. It is used to materialize the other // fields when queried. ServiceID string - CheckIDs map[string]struct{} + CheckIDs map[string]struct{} // todo: use a Set? // CheckOnUpdate is a map of checkIDs and the associated OnUpdate value // from the ServiceCheck It is used to determine how a reported checks @@ -151,7 +151,7 @@ func (s *ServiceRegistration) copy() *ServiceRegistration { // external fields. return &ServiceRegistration{ ServiceID: s.ServiceID, - CheckIDs: helper.CopyMapStringStruct(s.CheckIDs), - CheckOnUpdate: helper.CopyMapStringString(s.CheckOnUpdate), + CheckIDs: maps.Clone(s.CheckIDs), + CheckOnUpdate: maps.Clone(s.CheckOnUpdate), } } diff --git a/client/serviceregistration/workload.go b/client/serviceregistration/workload.go index 064f4fa0630..37a19c6b9dc 100644 --- a/client/serviceregistration/workload.go +++ b/client/serviceregistration/workload.go @@ -32,13 +32,13 @@ type WorkloadServices struct { Namespace string // Restarter allows restarting the task or task group depending on the - // check_restart stanzas. + // check_restart blocks. Restarter WorkloadRestarter // Services and checks to register for the task. Services []*structs.Service - // Networks from the task's resources stanza. + // Networks from the task's resources block. // TODO: remove and use Ports Networks structs.Networks diff --git a/client/state/08types.go b/client/state/08types.go index a10d537f0af..b4cc54bf702 100644 --- a/client/state/08types.go +++ b/client/state/08types.go @@ -16,7 +16,6 @@ import ( // changed over the life-cycle of the alloc_runner in Nomad 0.8. // // https://github.com/hashicorp/nomad/blob/v0.8.6/client/alloc_runner.go#L146-L153 -// type allocRunnerMutableState08 struct { // AllocClientStatus does not need to be upgraded as it is computed // from task states. diff --git a/client/state/db_test.go b/client/state/db_test.go index 05081d14df6..ad928878a15 100644 --- a/client/state/db_test.go +++ b/client/state/db_test.go @@ -1,7 +1,6 @@ package state import ( - "io/ioutil" "os" "reflect" "sync" @@ -20,9 +19,8 @@ import ( "github.com/stretchr/testify/require" ) -func setupBoltStateDB(t *testing.T) (*BoltStateDB, func()) { - dir, err := ioutil.TempDir("", "nomadtest") - require.NoError(t, err) +func setupBoltStateDB(t *testing.T) *BoltStateDB { + dir := t.TempDir() db, err := NewBoltStateDB(testlog.HCLogger(t), dir) if err != nil { @@ -32,21 +30,17 @@ func setupBoltStateDB(t *testing.T) (*BoltStateDB, func()) { t.Fatalf("error creating boltdb: %v", err) } - cleanup := func() { + t.Cleanup(func() { if err := db.Close(); err != nil { t.Errorf("error closing boltdb: %v", err) } - if err := os.RemoveAll(dir); err != nil { - t.Logf("error removing boltdb dir: %v", err) - } - } + }) - return db.(*BoltStateDB), cleanup + return db.(*BoltStateDB) } func testDB(t *testing.T, f func(*testing.T, StateDB)) { - boltdb, cleanup := setupBoltStateDB(t) - defer cleanup() + boltdb := setupBoltStateDB(t) memdb := NewMemDB(testlog.HCLogger(t)) diff --git a/client/state/upgrade.go b/client/state/upgrade.go index 31f8c3bef6d..863dbe6e3dc 100644 --- a/client/state/upgrade.go +++ b/client/state/upgrade.go @@ -82,15 +82,14 @@ func backupDB(bdb *bbolt.DB, dst string) error { // UpgradeAllocs upgrades the boltdb schema. Example 0.8 schema: // -// * allocations -// * 15d83e8a-74a2-b4da-3f17-ed5c12895ea8 -// * echo -// - simple-all (342 bytes) -// - alloc (2827 bytes) -// - alloc-dir (166 bytes) -// - immutable (15 bytes) -// - mutable (1294 bytes) -// +// - allocations +// - 15d83e8a-74a2-b4da-3f17-ed5c12895ea8 +// - echo +// - simple-all (342 bytes) +// - alloc (2827 bytes) +// - alloc-dir (166 bytes) +// - immutable (15 bytes) +// - mutable (1294 bytes) func UpgradeAllocs(logger hclog.Logger, tx *boltdd.Tx) error { btx := tx.BoltTx() allocationsBucket := btx.Bucket(allocationsBucketName) diff --git a/client/state/upgrade_int_test.go b/client/state/upgrade_int_test.go index 8f61609bdaa..5bcf12d5cf1 100644 --- a/client/state/upgrade_int_test.go +++ b/client/state/upgrade_int_test.go @@ -5,7 +5,6 @@ import ( "compress/gzip" "fmt" "io" - "io/ioutil" "os" "path/filepath" "strings" @@ -72,9 +71,7 @@ func TestBoltStateDB_UpgradeOld_Ok(t *testing.T) { for _, fn := range pre09files { t.Run(fn, func(t *testing.T) { - dir, err := ioutil.TempDir("", "nomadtest") - require.NoError(t, err) - defer os.RemoveAll(dir) + dir := t.TempDir() db := dbFromTestFile(t, dir, fn) defer db.Close() @@ -133,9 +130,7 @@ func TestBoltStateDB_UpgradeOld_Ok(t *testing.T) { t.Run("testdata/state-1.2.6.db.gz", func(t *testing.T) { fn := "testdata/state-1.2.6.db.gz" - dir, err := ioutil.TempDir("", "nomadtest") - require.NoError(t, err) - defer os.RemoveAll(dir) + dir := t.TempDir() db := dbFromTestFile(t, dir, fn) defer db.Close() diff --git a/client/state/upgrade_test.go b/client/state/upgrade_test.go index 5f248d787a5..9409173536a 100644 --- a/client/state/upgrade_test.go +++ b/client/state/upgrade_test.go @@ -2,8 +2,6 @@ package state import ( "fmt" - "io/ioutil" - "os" "path/filepath" "testing" @@ -15,20 +13,17 @@ import ( "go.etcd.io/bbolt" ) -func setupBoltDB(t *testing.T) (*bbolt.DB, func()) { - dir, err := ioutil.TempDir("", "nomadtest") - require.NoError(t, err) +func setupBoltDB(t *testing.T) *bbolt.DB { + dir := t.TempDir() db, err := bbolt.Open(filepath.Join(dir, "state.db"), 0666, nil) - if err != nil { - os.RemoveAll(dir) - require.NoError(t, err) - } + require.NoError(t, err) - return db, func() { + t.Cleanup(func() { require.NoError(t, db.Close()) - require.NoError(t, os.RemoveAll(dir)) - } + }) + + return db } // TestUpgrade_NeedsUpgrade_New asserts new state dbs do not need upgrading. @@ -36,8 +31,7 @@ func TestUpgrade_NeedsUpgrade_New(t *testing.T) { ci.Parallel(t) // Setting up a new StateDB should initialize it at the latest version. - db, cleanup := setupBoltStateDB(t) - defer cleanup() + db := setupBoltStateDB(t) to09, to12, err := NeedsUpgrade(db.DB().BoltDB()) require.NoError(t, err) @@ -50,8 +44,7 @@ func TestUpgrade_NeedsUpgrade_New(t *testing.T) { func TestUpgrade_NeedsUpgrade_Old(t *testing.T) { ci.Parallel(t) - db, cleanup := setupBoltDB(t) - defer cleanup() + db := setupBoltDB(t) // Create the allocations bucket which exists in both the old and 0.9 // schemas @@ -89,8 +82,7 @@ func TestUpgrade_NeedsUpgrade_Error(t *testing.T) { for _, tc := range cases { tc := tc t.Run(fmt.Sprintf("%v", tc), func(t *testing.T) { - db, cleanup := setupBoltDB(t) - defer cleanup() + db := setupBoltDB(t) require.NoError(t, db.Update(func(tx *bbolt.Tx) error { bkt, err := tx.CreateBucketIfNotExists(metaBucketName) @@ -110,8 +102,7 @@ func TestUpgrade_NeedsUpgrade_Error(t *testing.T) { func TestUpgrade_DeleteInvalidAllocs_NoAlloc(t *testing.T) { ci.Parallel(t) - bdb, cleanup := setupBoltDB(t) - defer cleanup() + bdb := setupBoltDB(t) db := boltdd.New(bdb) @@ -155,8 +146,7 @@ func TestUpgrade_DeleteInvalidAllocs_NoAlloc(t *testing.T) { func TestUpgrade_upgradeTaskBucket_InvalidEntries(t *testing.T) { ci.Parallel(t) - db, cleanup := setupBoltDB(t) - defer cleanup() + db := setupBoltDB(t) taskName := []byte("fake-task") diff --git a/client/taskenv/env.go b/client/taskenv/env.go index d40b52ab95c..1ed6f51814f 100644 --- a/client/taskenv/env.go +++ b/client/taskenv/env.go @@ -11,6 +11,7 @@ import ( "github.com/hashicorp/nomad/helper" hargs "github.com/hashicorp/nomad/helper/args" + "github.com/hashicorp/nomad/helper/escapingfs" "github.com/hashicorp/nomad/lib/cpuset" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/plugins/drivers" @@ -341,7 +342,7 @@ func (t *TaskEnv) replaceEnvClient(arg string) string { // directory path fields of this TaskEnv func (t *TaskEnv) checkEscape(testPath string) bool { for _, p := range []string{t.clientTaskDir, t.clientSharedAllocDir} { - if p != "" && !helper.PathEscapesSandbox(p, testPath) { + if p != "" && !escapingfs.PathEscapesSandbox(p, testPath) { return false } } @@ -903,7 +904,6 @@ func (b *Builder) SetDriverNetwork(n *drivers.DriverNetwork) *Builder { // Handled by setAlloc -> otherPorts: // // Task: NOMAD_TASK_{IP,PORT,ADDR}__