From 8390a491b0cd102482b83a2f0c7fdbf976610f2e Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Wed, 25 Sep 2024 09:46:49 -0700 Subject: [PATCH] add `cargo xtask ls-apis` (#6561) --- .github/buildomat/build-and-test.sh | 14 + .../buildomat/jobs/build-and-test-helios.sh | 3 + .../buildomat/jobs/build-and-test-linux.sh | 3 + Cargo.lock | 16 + Cargo.toml | 2 + dev-tools/ls-apis/Cargo.toml | 20 + dev-tools/ls-apis/README.adoc | 210 ++++++ dev-tools/ls-apis/api-manifest.toml | 442 ++++++++++++ dev-tools/ls-apis/src/api_metadata.rs | 246 +++++++ dev-tools/ls-apis/src/bin/ls-apis.rs | 248 +++++++ dev-tools/ls-apis/src/cargo.rs | 377 +++++++++++ dev-tools/ls-apis/src/lib.rs | 87 +++ dev-tools/ls-apis/src/system_apis.rs | 629 ++++++++++++++++++ dev-tools/ls-apis/src/workspaces.rs | 292 ++++++++ dev-tools/openapi-manager/src/spec.rs | 1 + dev-tools/xtask/src/main.rs | 4 + package-manifest.toml | 7 + 17 files changed, 2601 insertions(+) create mode 100644 dev-tools/ls-apis/Cargo.toml create mode 100644 dev-tools/ls-apis/README.adoc create mode 100644 dev-tools/ls-apis/api-manifest.toml create mode 100644 dev-tools/ls-apis/src/api_metadata.rs create mode 100644 dev-tools/ls-apis/src/bin/ls-apis.rs create mode 100644 dev-tools/ls-apis/src/cargo.rs create mode 100644 dev-tools/ls-apis/src/lib.rs create mode 100644 dev-tools/ls-apis/src/system_apis.rs create mode 100644 dev-tools/ls-apis/src/workspaces.rs diff --git a/.github/buildomat/build-and-test.sh b/.github/buildomat/build-and-test.sh index fba501b08a..64efc9eb3b 100755 --- a/.github/buildomat/build-and-test.sh +++ b/.github/buildomat/build-and-test.sh @@ -79,6 +79,19 @@ export RUSTC_BOOTSTRAP=1 # We report build progress to stderr, and the "--timings=json" output goes to stdout. ptime -m cargo build -Z unstable-options --timings=json --workspace --tests --locked --verbose 1> "$OUTPUT_DIR/crate-build-timings.json" +# Do some test runs of the `ls-apis` command. +# +# This may require cloning some dependent private repos. We do this before the +# main battery of tests because the GitHub tokens required for this only last +# for an hour so we want to get this done early. +banner ls-apis +( + source ./tools/include/force-git-over-https.sh; + ptime -m cargo xtask ls-apis apis && + ptime -m cargo xtask ls-apis deployment-units && + ptime -m cargo xtask ls-apis servers +) + # # We apply our own timeout to ensure that we get a normal failure on timeout # rather than a buildomat timeout. See oxidecomputer/buildomat#8. @@ -95,6 +108,7 @@ ptime -m timeout 1h cargo test --doc --locked --verbose --no-fail-fast # Build the live-tests. This is only supported on illumos. # We also can't actually run them here. See the README for more details. if [[ $target_os == "illumos" ]]; then + banner "live-test" ptime -m cargo xtask live-tests fi diff --git a/.github/buildomat/jobs/build-and-test-helios.sh b/.github/buildomat/jobs/build-and-test-helios.sh index a0137eddde..326fafa643 100755 --- a/.github/buildomat/jobs/build-and-test-helios.sh +++ b/.github/buildomat/jobs/build-and-test-helios.sh @@ -10,5 +10,8 @@ #: "!/var/tmp/omicron_tmp/crdb-base*", #: "!/var/tmp/omicron_tmp/rustc*", #: ] +#: access_repos = [ +#: "oxidecomputer/dendrite", +#: ] exec .github/buildomat/build-and-test.sh illumos diff --git a/.github/buildomat/jobs/build-and-test-linux.sh b/.github/buildomat/jobs/build-and-test-linux.sh index bc4de5cc1a..07328fffca 100755 --- a/.github/buildomat/jobs/build-and-test-linux.sh +++ b/.github/buildomat/jobs/build-and-test-linux.sh @@ -10,5 +10,8 @@ #: "!/var/tmp/omicron_tmp/crdb-base*", #: "!/var/tmp/omicron_tmp/rustc*", #: ] +#: access_repos = [ +#: "oxidecomputer/dendrite", +#: ] exec .github/buildomat/build-and-test.sh linux diff --git a/Cargo.lock b/Cargo.lock index ceba971d3c..da60ee64c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6609,6 +6609,22 @@ dependencies = [ "uuid", ] +[[package]] +name = "omicron-ls-apis" +version = "0.1.0" +dependencies = [ + "anyhow", + "camino", + "cargo_metadata", + "clap", + "newtype_derive", + "omicron-workspace-hack", + "parse-display", + "petgraph", + "serde", + "toml 0.8.19", +] + [[package]] name = "omicron-nexus" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index b8ae255512..9732d16a24 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ members = [ "dev-tools/crdb-seed", "dev-tools/db-dev", "dev-tools/downloader", + "dev-tools/ls-apis", "dev-tools/mgs-dev", "dev-tools/omdb", "dev-tools/omicron-dev", @@ -145,6 +146,7 @@ default-members = [ "dev-tools/crdb-seed", "dev-tools/db-dev", "dev-tools/downloader", + "dev-tools/ls-apis", "dev-tools/mgs-dev", "dev-tools/omdb", "dev-tools/omicron-dev", diff --git a/dev-tools/ls-apis/Cargo.toml b/dev-tools/ls-apis/Cargo.toml new file mode 100644 index 0000000000..f66f3e4ee2 --- /dev/null +++ b/dev-tools/ls-apis/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "omicron-ls-apis" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +anyhow.workspace = true +camino.workspace = true +cargo_metadata.workspace = true +clap.workspace = true +newtype_derive.workspace = true +parse-display.workspace = true +petgraph.workspace = true +serde.workspace = true +toml.workspace = true +omicron-workspace-hack.workspace = true diff --git a/dev-tools/ls-apis/README.adoc b/dev-tools/ls-apis/README.adoc new file mode 100644 index 0000000000..00c229bff2 --- /dev/null +++ b/dev-tools/ls-apis/README.adoc @@ -0,0 +1,210 @@ +:showtitle: +:toc: left +:icons: font + += ls-apis: list information about Dropshot/Progenitor-based APIs + +This tool attempts to identify Progenitor-based API dependencies between Rust components and show information about them. The focus is on providing information to inform the online upgrade project. + +== Quick start + +=== Listing APIs and their consumers + +``` +$ cargo xtask ls-apis apis +... +Nexus Internal API (client: nexus-client) + consumed by: dpd (dendrite/dpd) + consumed by: omicron-nexus (omicron/nexus) + consumed by: omicron-sled-agent (omicron/sled-agent) + consumed by: oximeter-collector (omicron/oximeter/collector) + consumed by: propolis-server (propolis/bin/propolis-server) +... +``` + +To see what Cargo dependencies caused the tool to identify an API dependency, use `--show-deps`: + +``` +$ cargo xtask ls-apis apis --show-deps +... +Dendrite DPD (client: dpd-client) + consumed by: ddmd (maghemite/ddmd) + via path+file:///home/dap/omicron-merge/out/ls-apis/checkout/maghemite/ddmd#0.1.0 + consumed by: mgd (maghemite/mgd) + via path+file:///home/dap/omicron-merge/out/ls-apis/checkout/maghemite/mg-lower#0.1.0 + via path+file:///home/dap/omicron-merge/out/ls-apis/checkout/maghemite/mgd#0.1.0 + consumed by: omicron-nexus (omicron/nexus) + via path+file:///home/dap/omicron-merge/nexus#omicron-nexus@0.1.0 + consumed by: omicron-sled-agent (omicron/sled-agent) + via path+file:///home/dap/omicron-merge/sled-agent#omicron-sled-agent@0.1.0 + consumed by: tfportd (dendrite/tfportd) + via path+file:///home/dap/omicron-merge/out/ls-apis/checkout/dendrite/tfportd#0.1.0 + consumed by: wicketd (omicron/wicketd) + via path+file:///home/dap/omicron-merge/wicketd#0.1.0 +... +``` + +These paths are local to your system and will differ from system to system. + +=== Listing servers and the APIs exposed and consumed by each one + +``` +$ cargo xtask ls-apis servers +... +omicron-sled-agent (omicron/sled-agent) + exposes: Bootstrap Agent (client = bootstrap-agent-client) + exposes: Sled Agent (client = sled-agent-client) + consumes: bootstrap-agent-client + consumes: ddm-admin-client + consumes: dns-service-client + consumes: dpd-client + consumes: gateway-client + consumes: mg-admin-client + consumes: nexus-client + consumes: propolis-client + consumes: sled-agent-client +... +``` + +You can similarly use `--show-deps` to see the Cargo dependency path that shows the API dependency. + +=== Listing deployment units, their servers, and the APIs produced and consumed by each one + +Deployment units are groups of components that are always deployed together. For example, all the components in the host OS global zone and switch zone represent one deployment unit. + +``` +$ cargo xtask ls-apis deployment-units +... +Crucible + crucible-agent (crucible/agent) + exposes: Crucible Agent (client = crucible-agent-client) + + crucible-downstairs (crucible/downstairs) + exposes: Crucible Repair (client = repair-client) + consumes: repair-client + + +Crucible Pantry + crucible-pantry (crucible/pantry) + exposes: Crucible Pantry (client = crucible-pantry-client) +... +``` + +=== Visualizing dependencies + +The `servers` and `deployment-units` commands accept a `--dot` argument to print output in a format that `dot(1)` can process: + +``` +$ cargo xtask ls-apis deployment-units --dot > deployment-units.dot +``` + +You can generate a PNG image of the graph like this: + +``` +$ dot -T png -o deployment-units.png deployment-units.dot +``` + + +== Details + +This tool is aimed at helping answer these questions related to online upgrade: + +* What Dropshot/Progenitor-based API dependencies exist on all the software that ships on the Oxide rack? +* Why does any particular component depend on some other component? +* Is there a way to sequence upgrades of some API servers so that clients can always assume that the corresponding servers have been upgraded? + +This tool combines **two sources of information:** + +* Cargo/Rust package metadata (including package names and dependencies) +* Developer-maintained metadata about APIs and their dependencies, located in link:./api-manifest.toml[] + +This tool basically works as follows: + +. It loads and validates information about all of the relevant Cargo workspaces by running `cargo metadata` using manifests from the local Git clones. +. Using this information, it identifies all packages that _look_ like Progenitor-based clients for Dropshot APIs: these are packages that (1) depend directly on `progenitor` as a normal or build dependency, and (2) end in `-client`. (A few non-client packages depend on Progenitor, like `omicron-common`. These are ignored using a hardcoded ignore list. Any other package that depends on Progenitor but does _not_ end in `-client` will produce a warning.) +. Then, it loads and validates the developer-maintained metadata (`api-manifest.toml`). +. Then, it applies whatever filter has been selected and prints out whatever information was asked for. + +The filtering is a little complicated but very important! + +=== The purpose of filtering + +Built-in filtering aims to solve a few different problems: + +. Many apparent dependencies identified through the above process are bogus. This usually happens because a package `P` depends on a Progenitor client solely for access to its types (e.g., to define a `From` impl for its own types). In this case, a component using `P` does not necessarily depend on the corresponding API. We want to ignore these bogus dependencies altogether. (If the component _does_ depend on that API, it must have a different dependency on the Progenitor client package and that one will still cause this tool to identify the API dependency.) +. While exploring the dependency graph, we sometimes want to exclude some legitimate dependencies. Sometimes, a package `P` depends on a Progenitor client, but only for a test program or some other thing that doesn't actually get deployed with `P`. These are not bogus dependencies, but they're not interesting for the purpose of online upgrade. +. To keep track of (and filter output based on) developer-maintained labels for each API dependency. More on this below. + +Our broader goal is to construct a DAG whose nodes are deployment units and whose edges represent API dependencies between them. By doing that, we can define an update order that greatly simplifies any changes to these APIs because clients can always assume their dependencies are updated before them. We hope to do this by: + +1. Starting with the complete directed graph of API dependencies discovered by this tool, ignoring bogus dependencies and dependencies from non-deployed components. +2. Removing one edge, meaning that we nominate that API as one where clients _cannot_ assume their dependencies will be updated before them. +3. Checking if we still have cycles. If so, repeat. + +=== How filters work + +==== Example + +Filter rules are defined in `api-manifest.toml` in the `dependency_filter_rules` block. Here's an example: + +```toml +[[dependency_filter_rules]] +ancestor = "nexus-types" +client = "gateway-client" +evaluation = "bogus" +note = """ +nexus-types depends on gateway-client for defining some types. +""" +``` + +Implied in this rule is that the Rust package `nexus-types` depends on the Rust package `gateway-client`, which is a client for the MGS API. Without this rule, the tool would identify any Rust component that depends on `nexus-types` as depending on the MGS API. This rule says: ignore any dependency on `gateway-client` that goes through `nexus-types` because it's `bogus`: it's not a real dependency because `nexus-types` doesn't actually make requests to MGS. It just borrows some types. + +Say we have a component called `omicron-nexus` that depends on `nexus-types` _and_ `gateway-client`. For that component, this rule has no effect because there's another Rust dependency path from `omicron-nexus` to `gateway-client` that doesn't go through `nexus-types`, so the tool still knows it depends on the MGS API. + +But if we had a component called `oximeter-collector` that depends on `nexus-types` but doesn't depend on `gateway-client` through any other path, then this rule prevents the tool from falsely claiming that `oximeter-collector` depends on the MGS API. + +==== Evaluations + +Filter rules always represent a determination that a human has made about one or more dependencies found by the tool. The possible evaluations are: + +[cols="1,3",options="header"] +|=== +|Evaluation +|Meaning + +|`unknown` +|No determination has been made. These are included by default. This is also the default evaluation for a dependency, if no filter rules match it. + +|`bogus` +|Any matching dependency is a false positive. The dependency should be ignored altogether. + +|`not-deployed` +|The matching dependency is for a program that is never deployed, like a test program, even though the package that it's _in_ does get deployed. These are ignored by default. + +|`non-dag` +|Any matching dependency has been flagged as "will not be part of the DAG used for online upgrade". This is primarily to help us keep track of the specific dependencies that we've looked at and made this determination for. These are currently ignored by default. + +|`dag` +|Any matching dependency has been flagged as "we want this to be part of the DAG used for online upgrade". + +|=== + +In summary: + +* All dependencies start as `unknown`. +* All the known false positives have been flagged as `bogus`. +* All the known dependencies from non-deployed programs inside deployed packages have been flagged as `not-deployed`. +* What remains is to evaluate the rest of the edges and determine if they're going to be `dag` or `non-dag`. + +It is a runtime error for two filter rules to match any dependency chain. This makes the evaluation unambiguous. i.e., you can't have one rule match a dependency chain and say it's `bogus` while another says it's `dag`. + +==== Applying different filters at runtime + +By default, this command shows dependencies that might be in the final graph. This includes those labeled `dag` and `unknown`. It excludes `bogus`, `non-dag`, and `not-deployed` dependencies. + +You can select different subsets using the `--filter` option, which accepts: + +* `include-non-dag`: show non-`bogus`, non-`not-deployed` dependencies (i.e., all dependencies that do exist in the deployed system). +* `non-bogus`: show everything _except_ bogus dependencies +* `bogus`: show only the bogus dependencies (useful for seeing all the false positives) +* `all`: show everything, even bogus dependencies diff --git a/dev-tools/ls-apis/api-manifest.toml b/dev-tools/ls-apis/api-manifest.toml new file mode 100644 index 0000000000..65dc28d7b2 --- /dev/null +++ b/dev-tools/ls-apis/api-manifest.toml @@ -0,0 +1,442 @@ +# Describes all the Dropshot/OpenAPI/Progenitor APIs in the system +# +# See README.adoc in this package for details. +# +# TODO It would be nice to collect a bunch of this information from the same +# sources that drive the actual build process (e.g., package-manifest.toml). +# For non-Omicron components, the deployment units (zone images and tarballs +# that get unpacked into the switch zone or global zone) come from buildomat +# jobs on other repositories. In at least some components those come from those +# components' package-manifest.toml files, which we probably have available, so +# we could still incorporate that information. +# +# TODO The following items from package-manifest.toml are currently ignored +# because they are assumed not to contain servers or clients that we care about: +# +# - faux_mgs +# - crucible_dtrace +# - mg-ddm +# +# The following were at one point included here, but do not appear to contain +# Progenitor clients or APIs, so they're left out to avoid needing to create and +# process clones of these repos: +# +# - lldp +# - pumpkind +# - thundermuffin +# +# If we do wind up processing package-manifest.toml, we may want to maintain an +# explicit list of items that we ignore so that we can fail when something is +# neither included nor ignored so that we can be sure we're not missing +# anything. +# +# TODO The metadata here overlaps with metadata hardcoded in `openapi-manager`. + +################################################################################ +# Ignored non-client packages +# +# These are packages that may be flagged as clients because they directly depend +# on Progenitor, but which are not really clients. +# +# These are cases that cannot be easily handled by the evaluation rules later in +# this file because they need to be processed earlier. +################################################################################ + +ignored_non_clients = [ + # omicron-common depends on progenitor so that it can define some generic + # error handling and a generic macro for defining clients. omicron-common + # itself is not a progenitor-based client. + "omicron-common", + + # propolis-mock-server uses Progenitor to generate types that are + # compatible with the real Propolis server. It doesn't actually use the + # client and we don't really care about it for these purposes anyway. + "propolis-mock-server", +] + +################################################################################ +# Deployment Units +# +# A deployment unit is a set of Rust packages that are always deployed together. +# This is particularly relevant for upgrade because we'll want to construct a +# DAG describing the order in which we update components. It's possible that we +# could have a DAG among the Rust packages that get deployed but there may +# still be cycles within the deployment unit dependency graph. +################################################################################ + +# The host OS includes Sled Agent, Propolis, and all the components that get +# bundled into the switch zone. +[[deployment_units]] +label = "Host OS" +packages = [ + "omicron-sled-agent", + "propolis-server", + # switch zone + "ddmd", + "dpd", + "mgd", + "omicron-gateway", + "tfportd", + "wicketd", +] + +# Installinator gets packaged into its own host OS image. +[[deployment_units]] +label = "Installinator" +packages = [ "installinator" ] + +# The rest of these get bundled by standard control plane zone images. + +[[deployment_units]] +label = "Crucible" +packages = [ "crucible-agent", "crucible-downstairs" ] + +[[deployment_units]] +label = "Crucible Pantry" +packages = [ "crucible-pantry" ] + +[[deployment_units]] +label = "Cockroach Admin" +packages = [ "omicron-cockroach-admin" ] + +[[deployment_units]] +label = "Clickhouse Admin" +packages = [ "omicron-clickhouse-admin" ] + +[[deployment_units]] +label = "DNS Server" +packages = [ "dns-server" ] + +[[deployment_units]] +label = "Nexus" +packages = [ "omicron-nexus" ] + +[[deployment_units]] +label = "Oximeter" +packages = [ "oximeter-collector" ] + + +################################################################################ +# APIs +# +# Each API includes: +# +# `client_package_name`: the name of the Rust package that's a +# Progenitor-based client for this API. This is used as a primary key for the +# API. +# +# `label`: a human-readable name for this API +# +# `server_package_name`: the package that contains the Dropshot API definition. +# +# [`notes`]: optional free-form human-readable summary documentation about this +# API +################################################################################ + +[[apis]] +client_package_name = "bootstrap-agent-client" +label = "Bootstrap Agent" +server_package_name = "bootstrap-agent-api" + +[[apis]] +client_package_name = "cockroach-admin-client" +label = "CockroachDB Cluster Admin" +server_package_name = "cockroach-admin-api" +notes = """ +This is the server running inside CockroachDB zones that performs \ +configuration and monitoring that requires the `cockroach` CLI. +""" + +[[apis]] +client_package_name = "crucible-agent-client" +label = "Crucible Agent" +server_package_name = "crucible-agent" + +[[apis]] +client_package_name = "repair-client" +label = "Crucible Repair" +server_package_name = "crucible-downstairs" +notes = """ +The repair service offered by a crucible-downstairs supports both repairing \ +one downstairs from another, and making a clone of a read-only downstairs \ +when creating a new region in the crucible agent. +""" + +[[apis]] +client_package_name = "crucible-pantry-client" +label = "Crucible Pantry" +server_package_name = "crucible-pantry" + +[[apis]] +client_package_name = "ddm-admin-client" +label = "Maghemite DDM Admin" +server_package_name = "ddmd" +notes = """ +The `ddmd` server runs in each sled GZ and each switch zone. These daemons \ +provide an interface for advertising network prefixes, and observing what \ +prefixes have been received from other DDM daemons in the rack. Sled agent \ +uses this interface to announce bootstrap and underlay network prefixes, as \ +well as learn about routes to other sleds and services in the rack. This \ +interface is required in early-networking before a rack is fully up with Nexus \ +running. Nexus does not consume this interface today, but will for \ +observability APIs in the future. +""" + +[[apis]] +client_package_name = "dns-service-client" +label = "DNS Server" +server_package_name = "dns-server-api" + +[[apis]] +client_package_name = "dpd-client" +label = "Dendrite DPD" +server_package_name = "dpd" +notes = """ +Dendrite's data plane daemon (`dpd`) is the interface to configure and manage \ +the rack switches. It's consumed by sled-agent to get the rack off the \ +ground. The dpd API is also used by nexus as operators make changes to the \ +rack external network configuration, these changes are synchronized by nexus \ +to `dpd`. The `dpd` API is a auto-generated from it's OpenAPI specification \ +and exists as a client library within omicron. This is because the Dendrite \ +repo is not currently open source. +""" + +[[apis]] +client_package_name = "gateway-client" +label = "Management Gateway Service" +server_package_name = "gateway-api" +notes = "Wicketd is deployed in a unit with MGS so we can ignore that one." + +[[apis]] +client_package_name = "installinator-client" +label = "Wicketd Installinator" +server_package_name = "installinator-api" + +[[apis]] +client_package_name = "mg-admin-client" +label = "Maghemite MG Admin" +server_package_name = "mgd" +notes = """ +The `mgd` daemon runs in each switch zone. This daemon is responsible for all \ +external route management for a switch. It provides interfaces for static \ +route management, BGP configuration and BFD configuration. This interface is \ +consumed by both nexus and sled agent, since we need external connectivity to \ +bring the rack up. +""" + +[[apis]] +client_package_name = "nexus-client" +label = "Nexus Internal API" +server_package_name = "nexus-internal-api" + +[[apis]] +client_package_name = "oxide-client" +label = "External API" +server_package_name = "nexus-external-api" +notes = "Special case, since we don't fully control all clients" + +[[apis]] +client_package_name = "oximeter-client" +label = "Oximeter" +server_package_name = "oximeter-api" +notes = """ +Shared types for this interface are in `omicron-common`. The producer makes \ +requests to Nexus, and receives them from `oximeter-collector`. \ +`oximeter-collector` makes requests to Nexus and the producer, and receives \ +them from Nexus (for periodic renewals). Nexus receives requests from both, \ +and makes the periodic renewal requests to `oximeter-collector`. +""" + +[[apis]] +client_package_name = "propolis-client" +label = "Propolis" +server_package_name = "propolis-server" +notes = """ +Sled Agent is deployed in a unit with Propolis so we can ignore that one. +""" + +[[apis]] +client_package_name = "sled-agent-client" +label = "Sled Agent" +server_package_name = "sled-agent-api" + +[[apis]] +client_package_name = "wicketd-client" +label = "Wicketd" +server_package_name = "wicketd-api" +notes = """ +wicketd-client is only used by wicket, which is deployed in a unit with wicketd. +""" + +[[apis]] +client_package_name = "crucible-control-client" +label = "Crucible Control (for testing only)" +server_package_name = "crucible" +notes = """ +Exposed by Crucible upstairs for debugging via the `cmon` debugging tool. +""" + +[[apis]] +client_package_name = "dsc-client" +label = "Downstairs Controller (debugging only)" +server_package_name = "dsc" +notes = """ +`dsc` is a control program for spinning up and controlling instances of Crucible +downstairs for testing. You can use the same program to control a running `dsc` +instance. It's also used by `crutest` for testing. +""" + +################################################################################ +# Dependency filter rules +# +# These rules are used to postprocess the API dependencies that are inferred +# from the Cargo dependencies. Each rule has properties: +# +# * `ancestor`: a Rust package name +# * `client`: the name of a Rust package that's a Progenitor-based API client +# * `evaluation`: a developer-maintained flag for this dependency +# * `note`: a human-readable explanation for the evaluation +# +# This tool works by assembling a list of _possible_ API dependencies based on +# Cargo package dependencies and then applying these rules to filter some out. +# A rule matches a possible API dependency on `client` if the Cargo package +# dependency path goes through `ancestor`. For example: omicron-sled-agent +# depends on omicron-common, which depends on mg-admin-client. This causes the +# tool to initially think that omicron-sled-agent uses the MGD Admin API. A +# rule with `client = mg-admin-client` and `ancestor = omicron-common` would +# filter this out. +# +# See the README for more details, including what the different evaluations mean. +################################################################################ + +# +# There are no DAG dependencies yet because we've only started evaluating which +# edges should be in that graph. +# + +# +# Non-DAG dependencies. See above for details. +# +[[dependency_filter_rules]] +ancestor = "oximeter-producer" +client = "nexus-client" +evaluation = "non-dag" +note = """ +All Oximeter producers are Nexus clients. This is a good candidate for a +non-DAG dependency because the API is small and stable and the reverse +directions are often not. +""" + +# +# "Not-deployed" dependencies. See above for details. +# +# TODO All things being equal, it would be better to separate these programs +# into their own packages that are separate from the package that *is* deployed. +# Then we wouldn't need these rules. As it is, these rules rely on manual +# vetting that the dependency _is_ only in the dev/test program. If things ever +# changed (e.g., if one of these grew a real dependency on the same API), we'd +# miss it in this tooling. +# + +[[dependency_filter_rules]] +ancestor = "oximeter-collector" +client = "oximeter-client" +evaluation = "not-deployed" +note = """ +Oximeter provides a standalone collector that is not used in deployed systems. +""" + +[[dependency_filter_rules]] +ancestor = "wicketd" +client = "wicketd-client" +evaluation = "not-deployed" +note = """ +Wicketd provides a function to refresh the server's config. This could probably +move into the client package instead. +""" + +[[dependency_filter_rules]] +ancestor = "dns-server" +client = "dns-service-client" +evaluation = "not-deployed" +note = """ +DNS server depends on itself only to provide TransientServer, which is not a +deployed component. +""" + +# +# "Bogus" dependencies. See above for details. +# +# In most of these cases, some crate is using a client crate for its types, not +# its client. In these cases, if the component has a real dependency on that +# other API, then it will need some other dependency on the client and that will +# show up as a non-bogus dependency. +# +# TODO It would be nice to remove the need for all of these to avoid accidental +# future false negatives. +# + +[[dependency_filter_rules]] +ancestor = "omicron-common" +client = "mg-admin-client" +evaluation = "bogus" +note = """ +omicron_common depends on mg-admin-client solely to impl some `From` +conversions. That makes it look like just about everything depends on +mg-admin-client, which isn't true. It'd be nice to remove this. Most clients +put those conversions into the client rather than omicron_common. +""" + +[[dependency_filter_rules]] +ancestor = "internal-dns" +client = "dns-service-client" +evaluation = "bogus" +note = """ +internal-dns depends on dns-service-client to use its types. They're only used +when configuring DNS, which is only done in a couple of components. But many +other components use internal-dns solely to read DNS. This dependency makes it +look like everything uses the DNS server API, but that's not true. We should +consider splitting this crate in two to eliminate this false positive. +""" + +[[dependency_filter_rules]] +ancestor = "nexus-types" +client = "gateway-client" +evaluation = "bogus" +note = """ +nexus-types depends on gateway-client for defining some types. +""" + +[[dependency_filter_rules]] +ancestor = "nexus-types" +client = "dns-service-client" +evaluation = "bogus" +note = """ +nexus-types depends on dns-service-client for defining some types. +""" + +[[dependency_filter_rules]] +ancestor = "nexus-types" +client = "sled-agent-client" +evaluation = "bogus" +note = """ +Past versions of nexus-types that are still referenced in the dependency tree +depended on sled-agent-client for defining some types. +""" + +[[dependency_filter_rules]] +ancestor = "sled-agent-types" +client = "propolis-client" +evaluation = "bogus" +note = """ +sled-agent-types uses propolis-client for types only. +""" + +[[dependency_filter_rules]] +ancestor = "omicron-sled-agent" +client = "crucible-agent-client" +evaluation = "bogus" +note = """ +Sled Agent uses the Crucible Agent client types only, and only in the simulated +sled agent. +""" diff --git a/dev-tools/ls-apis/src/api_metadata.rs b/dev-tools/ls-apis/src/api_metadata.rs new file mode 100644 index 0000000000..d7a1c6c05b --- /dev/null +++ b/dev-tools/ls-apis/src/api_metadata.rs @@ -0,0 +1,246 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Developer-maintained API metadata + +use crate::cargo::DepPath; +use crate::workspaces::Workspaces; +use crate::ClientPackageName; +use crate::DeploymentUnitName; +use crate::ServerComponentName; +use crate::ServerPackageName; +use anyhow::{bail, Result}; +use serde::Deserialize; +use std::borrow::Borrow; +use std::collections::BTreeMap; +use std::collections::BTreeSet; + +/// Describes the APIs in the system +/// +/// This is the programmatic interface to the `api-manifest.toml` file. +#[derive(Deserialize)] +#[serde(deny_unknown_fields)] +#[serde(try_from = "RawApiMetadata")] +pub struct AllApiMetadata { + apis: BTreeMap, + deployment_units: BTreeMap, + dependency_rules: BTreeMap>, + ignored_non_clients: BTreeSet, +} + +impl AllApiMetadata { + /// Iterate over the distinct APIs described by the metadata + pub fn apis(&self) -> impl Iterator { + self.apis.values() + } + + /// Iterate over the deployment units defined in the metadata + pub fn deployment_units( + &self, + ) -> impl Iterator { + self.deployment_units.iter() + } + + /// Iterate over the package names for all the APIs' clients + pub fn client_pkgnames(&self) -> impl Iterator { + self.apis.keys() + } + + /// Iterate over the package names for all the APIs' servers + pub fn server_components( + &self, + ) -> impl Iterator { + self.deployment_units.values().flat_map(|d| d.packages.iter()) + } + + /// Look up details about an API based on its client package name + pub fn client_pkgname_lookup

(&self, pkgname: &P) -> Option<&ApiMetadata> + where + ClientPackageName: Borrow

, + P: Ord, + P: ?Sized, + { + self.apis.get(pkgname) + } + + /// Returns the set of packages that should *not* be treated as + /// Progenitor-based clients + pub fn ignored_non_clients(&self) -> &BTreeSet { + &self.ignored_non_clients + } + + /// Returns how we should filter the given dependency + pub(crate) fn evaluate_dependency( + &self, + workspaces: &Workspaces, + client_pkgname: &ClientPackageName, + dep_path: &DepPath, + ) -> Result { + let Some(rules) = self.dependency_rules.get(client_pkgname) else { + return Ok(Evaluation::default()); + }; + + let which_rules: Vec<_> = rules + .iter() + .filter(|r| { + assert_eq!(r.client, *client_pkgname); + let pkgids = workspaces.workspace_pkgids(&r.ancestor); + dep_path.contains_any(&pkgids) + }) + .collect(); + + if which_rules.is_empty() { + return Ok(Evaluation::default()); + } + + if which_rules.len() > 1 { + bail!( + "client package {:?}: dependency matched multiple filters: {}", + client_pkgname, + which_rules + .into_iter() + .map(|r| r.ancestor.as_str()) + .collect::>() + .join(", ") + ); + } + + Ok(which_rules[0].evaluation) + } +} + +/// Format of the `api-manifest.toml` file +/// +/// This is not exposed outside this module. It's processed and validated in +/// the transformation to `AllApiMetadata`. +#[derive(Deserialize)] +#[serde(deny_unknown_fields)] +struct RawApiMetadata { + apis: Vec, + deployment_units: Vec, + dependency_filter_rules: Vec, + ignored_non_clients: Vec, +} + +impl TryFrom for AllApiMetadata { + type Error = anyhow::Error; + + fn try_from(raw: RawApiMetadata) -> anyhow::Result { + let mut apis = BTreeMap::new(); + + for api in raw.apis { + if let Some(previous) = + apis.insert(api.client_package_name.clone(), api) + { + bail!( + "duplicate client package name in API metadata: {}", + &previous.client_package_name, + ); + } + } + + let mut deployment_units = BTreeMap::new(); + for info in raw.deployment_units { + if let Some(previous) = + deployment_units.insert(info.label.clone(), info) + { + bail!( + "duplicate deployment unit in API metadata: {}", + &previous.label, + ); + } + } + + let mut dependency_rules = BTreeMap::new(); + for rule in raw.dependency_filter_rules { + if !apis.contains_key(&rule.client) { + bail!( + "dependency rule references unknown client: {:?}", + rule.client + ); + } + + dependency_rules + .entry(rule.client.clone()) + .or_insert_with(Vec::new) + .push(rule); + } + + let mut ignored_non_clients = BTreeSet::new(); + for client_pkg in raw.ignored_non_clients { + if !ignored_non_clients.insert(client_pkg.clone()) { + bail!( + "entry in ignored_non_clients appearead twice: {:?}", + &client_pkg + ); + } + } + + Ok(AllApiMetadata { + apis, + deployment_units, + dependency_rules, + ignored_non_clients, + }) + } +} + +/// Describes one API in the system +#[derive(Deserialize)] +pub struct ApiMetadata { + /// the package name of the Progenitor client for this API + /// + /// This is used as the primary key for APIs. + pub client_package_name: ClientPackageName, + /// human-readable label for the API + pub label: String, + /// package name of the server that provides the corresponding API + pub server_package_name: ServerPackageName, + /// human-readable notes about this API + pub notes: Option, +} + +/// Describes a unit that combines one or more servers that get deployed +/// together +#[derive(Deserialize)] +#[serde(deny_unknown_fields)] +pub struct DeploymentUnitInfo { + /// human-readable label, also used as primary key + pub label: DeploymentUnitName, + /// list of Rust packages that are shipped in this unit + pub packages: Vec, +} + +#[derive(Deserialize)] +#[serde(deny_unknown_fields)] +pub struct DependencyFilterRule { + pub ancestor: String, + pub client: ClientPackageName, + #[serde(default)] + pub evaluation: Evaluation, + // These notes are not currently used, but they are required. They could as + // well just be TOML comments. But it seems nice to enforce that they're + // present. And this would let us include this explanation in output in the + // future (e.g., to explain why some dependency was filtered out). + #[allow(dead_code)] + pub note: String, +} + +#[derive(Clone, Copy, Debug, Default, Deserialize, Eq, PartialEq)] +#[serde(rename_all = "kebab-case")] +pub enum Evaluation { + /// This dependency has not been evaluated + #[default] + Unknown, + /// This dependency should be ignored because it's not a real dependency -- + /// i.e., it's a false positive resulting from our methodology + Bogus, + /// This dependency should be ignored because it's not used in deployed + /// systems + NotDeployed, + /// This dependency should not be part of the update DAG + NonDag, + /// This dependency should be part of the update DAG + Dag, +} diff --git a/dev-tools/ls-apis/src/bin/ls-apis.rs b/dev-tools/ls-apis/src/bin/ls-apis.rs new file mode 100644 index 0000000000..39d1865d5b --- /dev/null +++ b/dev-tools/ls-apis/src/bin/ls-apis.rs @@ -0,0 +1,248 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Show information about Progenitor-based APIs + +use anyhow::{Context, Result}; +use camino::Utf8PathBuf; +use clap::{Args, Parser, Subcommand}; +use omicron_ls_apis::{ + AllApiMetadata, ApiDependencyFilter, LoadArgs, ServerComponentName, + SystemApis, +}; +use parse_display::{Display, FromStr}; + +#[derive(Parser)] +#[command( + name = "ls-apis", + bin_name = "ls-apis", + about = "Show information about Progenitor-based APIs" +)] +struct LsApis { + /// path to metadata about APIs + #[arg(long)] + api_manifest: Option, + + #[command(subcommand)] + cmd: Cmds, +} + +#[derive(Subcommand)] +enum Cmds { + /// print out an Asciidoc table summarizing the APIs + Adoc, + /// print out each API, what exports it, and what consumes it + Apis(ShowDepsArgs), + /// print out APIs exported and consumed by each deployment unit + DeploymentUnits(DotArgs), + /// print out APIs exported and consumed, by server component + Servers(DotArgs), +} + +#[derive(Args)] +pub struct ShowDepsArgs { + /// Show the Rust dependency path resulting in the API dependency + #[arg(long)] + show_deps: bool, + + /// Show only API dependencies matching the filter + #[arg(long, default_value_t)] + filter: ApiDependencyFilter, +} + +#[derive(Args)] +pub struct DotArgs { + /// What kind of output format to use + #[arg(long, default_value_t)] + output_format: OutputFormat, + /// Show the Rust dependency path resulting in the API dependency + #[arg(long)] + show_deps: bool, + + /// Show only API dependencies matching the filter + #[arg(long, default_value_t)] + filter: ApiDependencyFilter, +} + +#[derive(Clone, Copy, Debug, Default, Display, FromStr)] +#[display(style = "kebab-case")] +pub enum OutputFormat { + Dot, + #[default] + Text, +} + +fn main() -> Result<()> { + let cli_args = LsApis::parse(); + let load_args = LoadArgs::try_from(&cli_args)?; + let apis = SystemApis::load(load_args)?; + + match cli_args.cmd { + Cmds::Adoc => run_adoc(&apis), + Cmds::Apis(args) => run_apis(&apis, args), + Cmds::DeploymentUnits(args) => run_deployment_units(&apis, args), + Cmds::Servers(args) => run_servers(&apis, args), + } +} + +fn run_adoc(apis: &SystemApis) -> Result<()> { + println!("// BEGIN auto-generated by Omicron's `cargo xtask ls-apis adoc`"); + println!("// DO NOT EDIT."); + println!("// in the Omicron repo."); + println!( + ".List of OpenAPI/Progenitor-based interfaces for online upgrade." + ); + println!(r#"[cols="1h,2,2,2a,2", options="header"]"#); + println!("|==="); + println!("|API"); + println!("|Server location (`repo:path`)"); + println!("|Client packages (`repo:path`)"); + println!("|Consumers (`repo:path`; excluding omdb and tests)"); + println!("|Notes"); + println!(""); + + let metadata = apis.api_metadata(); + for api in metadata.apis() { + let Some(server_component) = + apis.api_producer(&api.client_package_name) + else { + continue; + }; + println!("// DO NOT EDIT. This table is auto-generated. See above."); + println!("|{}", api.label); + println!("|{}", apis.adoc_label(server_component)?); + println!("|{}", apis.adoc_label(&api.client_package_name)?); + println!("|"); + + for (c, _) in apis.api_consumers( + &api.client_package_name, + ApiDependencyFilter::default(), + )? { + println!("* {}", apis.adoc_label(c)?); + } + + print!("|{}", api.notes.as_deref().unwrap_or("-\n")); + println!(""); + } + + println!("|===\n"); + println!("// END auto-generated by Omicron's `cargo xtask ls-apis adoc`"); + + Ok(()) +} + +fn run_apis(apis: &SystemApis, args: ShowDepsArgs) -> Result<()> { + let metadata = apis.api_metadata(); + for api in metadata.apis() { + println!("{} (client: {})", api.label, api.client_package_name); + for (s, path) in + apis.api_consumers(&api.client_package_name, args.filter)? + { + let (repo_name, package_path) = apis.package_label(s)?; + println!(" consumed by: {} ({}/{})", s, repo_name, package_path); + if args.show_deps { + for p in path.nodes() { + println!(" via {}", p); + } + } + } + println!(""); + } + Ok(()) +} + +fn run_deployment_units(apis: &SystemApis, args: DotArgs) -> Result<()> { + match &args.output_format { + OutputFormat::Dot => println!("{}", apis.dot_by_unit(args.filter)?), + OutputFormat::Text => { + let metadata = apis.api_metadata(); + for unit in apis.deployment_units() { + let server_components = apis.deployment_unit_servers(unit)?; + println!("{}", unit); + print_server_components( + apis, + metadata, + server_components, + " ", + args.show_deps, + args.filter, + )?; + println!(""); + } + } + }; + + Ok(()) +} + +fn print_server_components<'a>( + apis: &SystemApis, + metadata: &AllApiMetadata, + server_components: impl IntoIterator, + prefix: &str, + show_deps: bool, + filter: ApiDependencyFilter, +) -> Result<()> { + for s in server_components.into_iter() { + let (repo_name, pkg_path) = apis.package_label(s)?; + println!("{}{} ({}/{})", prefix, s, repo_name, pkg_path); + for api in metadata.apis().filter(|a| { + matches!( + apis.api_producer(&a.client_package_name), + Some (name) if name == s + ) + }) { + println!( + "{} exposes: {} (client = {})", + prefix, api.label, api.client_package_name + ); + } + for (c, path) in apis.component_apis_consumed(s, filter)? { + println!("{} consumes: {}", prefix, c); + if show_deps { + for p in path.nodes() { + println!("{} via: {}", prefix, p); + } + } + } + + println!(""); + } + Ok(()) +} + +fn run_servers(apis: &SystemApis, args: DotArgs) -> Result<()> { + match &args.output_format { + OutputFormat::Dot => { + println!("{}", apis.dot_by_server_component(args.filter)?) + } + OutputFormat::Text => { + let metadata = apis.api_metadata(); + print_server_components( + apis, + metadata, + metadata.server_components(), + "", + args.show_deps, + args.filter, + )?; + } + }; + Ok(()) +} + +impl TryFrom<&LsApis> for LoadArgs { + type Error = anyhow::Error; + + fn try_from(args: &LsApis) -> Result { + let self_manifest_dir_str = std::env::var("CARGO_MANIFEST_DIR") + .context("expected CARGO_MANIFEST_DIR in environment")?; + let self_manifest_dir = Utf8PathBuf::from(self_manifest_dir_str); + let api_manifest_path = args + .api_manifest + .clone() + .unwrap_or_else(|| self_manifest_dir.join("api-manifest.toml")); + Ok(LoadArgs { api_manifest_path }) + } +} diff --git a/dev-tools/ls-apis/src/cargo.rs b/dev-tools/ls-apis/src/cargo.rs new file mode 100644 index 0000000000..edff43ff12 --- /dev/null +++ b/dev-tools/ls-apis/src/cargo.rs @@ -0,0 +1,377 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Extract API metadata from Cargo metadata + +use crate::ClientPackageName; +use anyhow::bail; +use anyhow::{anyhow, ensure, Context, Result}; +use camino::Utf8Path; +use camino::Utf8PathBuf; +use cargo_metadata::Package; +use cargo_metadata::{DependencyKind, PackageId}; +use std::collections::BTreeSet; +use std::collections::{BTreeMap, VecDeque}; + +/// Query package and dependency-related information about a Cargo workspace +pub struct Workspace { + /// human-readable label for the workspace + /// (generally the basename of the repo's URL) + name: String, + + /// local path to the root of the workspace + workspace_root: Utf8PathBuf, + + /// list of all package metadata, by package id + /// + /// The dependency information in `Package` should not be used. It + /// describes what's written in the Cargo files. `nodes_by_id` reflects + /// precisely what Cargo actually resolved instead. + packages_by_id: BTreeMap, + + /// list of all packages' dependency information, by package id + nodes_by_id: BTreeMap, + + /// list of all workspace-level packages, by name + workspace_packages_by_name: BTreeMap, + + /// list of all packages that appear to be Progenitor-based clients + /// (having a direct dependency on `progenitor`) + progenitor_clients: BTreeSet, +} + +impl Workspace { + /// Use `cargo metadata` to load information about a workspace called `name` + /// + /// If `workspace_manifest` is `None`, then information is loaded about the + /// current workspace. Otherwise, that path is used as the workspace + /// manifest. + pub fn load( + name: &str, + manifest_path: Option<&Utf8Path>, + ignored_non_clients: &BTreeSet, + ) -> Result { + eprintln!( + "loading metadata for workspace {name} from {}", + manifest_path + .as_ref() + .map(|p| p.to_string()) + .as_deref() + .unwrap_or("current workspace") + ); + + let mut cmd = cargo_metadata::MetadataCommand::new(); + if let Some(manifest_path) = manifest_path { + cmd.manifest_path(manifest_path); + } + let metadata = cmd.exec().context("loading metadata")?; + let workspace_root = metadata.workspace_root; + + // Build an index of all packages by id. Identify duplicates because we + // assume there shouldn't be any but we want to know if that assumption + // is wrong. + // + // Also build an index of workspaces packages by name so that we can + // quickly find their id. + let mut packages_by_id = BTreeMap::new(); + let mut workspace_packages_by_name = BTreeMap::new(); + for pkg in metadata.packages { + if pkg.source.is_none() { + if workspace_packages_by_name + .insert(pkg.name.clone(), pkg.id.clone()) + .is_some() + { + bail!( + "workspace {:?}: unexpected duplicate workspace \ + package with name {:?}", + name, + pkg.name, + ); + } + } + + if let Some(previous) = packages_by_id.insert(pkg.id.clone(), pkg) { + bail!( + "workspace {:?}: unexpected duplicate package with id {:?}", + name, + previous.id + ); + } + } + + // Build an index mapping packages to their corresponding node in the + // resolved dependency tree. + // + // While we're walking the resolved dependency tree, identify any + // Progenitor clients. + let mut progenitor_clients = BTreeSet::new(); + let mut nodes_by_id = BTreeMap::new(); + let resolve = metadata.resolve.ok_or_else(|| { + anyhow!( + "workspace {:?}: has no package resolution information", + name + ) + })?; + for node in resolve.nodes { + let Some(pkg) = packages_by_id.get(&node.id) else { + bail!( + "workspace {:?}: found resolution information for package \ + with id {:?}, but no associated package", + name, + node.id, + ); + }; + + if node.deps.iter().any(|d| { + d.name == "progenitor" + && d.dep_kinds.iter().any(|k| { + matches!( + k.kind, + DependencyKind::Normal | DependencyKind::Build + ) + }) + }) { + if pkg.name.ends_with("-client") { + progenitor_clients + .insert(ClientPackageName::from(pkg.name.clone())); + } else if !ignored_non_clients.contains(pkg.name.as_str()) { + eprintln!( + "workspace {:?}: ignoring apparent non-client that \ + uses progenitor: {}", + name, pkg.name + ); + } + } + + if let Some(previous) = nodes_by_id.insert(node.id.clone(), node) { + bail!( + "workspace {:?}: unexpected duplicate resolution for \ + package {:?}", + name, + previous.id, + ); + } + } + + // There should be resolution information for every package that we + // found. + for pkgid in packages_by_id.keys() { + ensure!( + nodes_by_id.contains_key(pkgid), + "workspace {:?}: found package {:?} with no resolution \ + information", + name, + pkgid, + ); + } + + Ok(Workspace { + name: name.to_owned(), + workspace_root, + packages_by_id, + nodes_by_id, + progenitor_clients, + workspace_packages_by_name, + }) + } + + /// Return the name of this workspace + pub fn name(&self) -> &str { + &self.name + } + + /// Returns a list of workspace packages that appear to be Progenitor + /// clients + pub fn client_packages(&self) -> impl Iterator { + self.progenitor_clients.iter() + } + + /// Returns information about package `pkgname` in the workspace + /// + /// Note that this only returns information about workspace packages (i.e., + /// packages that are defined in the workspace itself). To find information + /// about transitive dependencies, you need to be more specific about which + /// version you want. Use `pkgids()` for that. + pub fn find_workspace_package(&self, pkgname: &str) -> Option<&Package> { + self.workspace_packages_by_name + .get(pkgname) + .and_then(|pkgid| self.packages_by_id.get(pkgid)) + } + + /// Given a workspace package, return the relative path from the root of the + /// workspace to that package. + pub fn find_workspace_package_path( + &self, + pkgname: &str, + ) -> Result { + let pkg = self.find_workspace_package(pkgname).ok_or_else(|| { + anyhow!("workspace {:?} has no package {:?}", self.name, pkgname) + })?; + let manifest_path = &pkg.manifest_path; + let relative_path = + manifest_path.strip_prefix(&self.workspace_root).map_err(|_| { + anyhow!( + "workspace {:?} package {:?} manifest is not under \ + the workspace root ({:?})", + self.name, + pkgname, + &self.workspace_root, + ) + })?; + let path = cargo_toml_parent(&relative_path, &manifest_path)?; + Ok(path) + } + + /// Iterate over the required dependencies of package `root`, invoking + /// `func` for each one as: + /// + /// ```ignore + /// func(package: &Package, dep_path: &DepPath) + /// ``` + /// + /// where `package` is the package that is (directly or indirectly) a + /// dependency of `root` and `dep_path` describes the dependency path from + /// `root` to `package`. + pub fn walk_required_deps_recursively( + &self, + root: &Package, + func: &mut dyn FnMut(&Package, &DepPath), + ) -> Result<()> { + struct Remaining<'a> { + node: &'a cargo_metadata::Node, + path: DepPath, + } + + let root_node = self.nodes_by_id.get(&root.id).ok_or_else(|| { + anyhow!( + "workspace {:?}: walking dependencies for package {:?}: \ + package is not known in this workspace", + self.name, + root.name + ) + })?; + + let mut remaining = vec![Remaining { + node: root_node, + path: DepPath::for_pkg(root.id.clone()), + }]; + let mut seen: BTreeSet = BTreeSet::new(); + + while let Some(Remaining { node: next, path }) = remaining.pop() { + for d in &next.deps { + let did = &d.pkg; + if seen.contains(did) { + continue; + } + + seen.insert(did.clone()); + if !d.dep_kinds.iter().any(|k| { + matches!( + k.kind, + DependencyKind::Normal | DependencyKind::Build + ) + }) { + continue; + } + + // unwraps: We verified during loading that we have metadata for + // all package ids for which we have nodes in the dependency + // tree. We also verified during loading that we have nodes in + // the dependency tree for all package ids for which we have + // package metadata. + let dep_pkg = self.packages_by_id.get(did).unwrap(); + let dep_node = self.nodes_by_id.get(did).unwrap(); + func(dep_pkg, &path); + let dep_path = path.with_dependency_on(did.clone()); + remaining.push(Remaining { node: dep_node, path: dep_path }) + } + } + + Ok(()) + } + + /// Return all package ids for the given `pkgname` + /// + /// `pkgname` does not need to be a workspace package. There may be many + /// packages with this name, generally at different versions. + pub fn pkgids<'a>( + &'a self, + pkgname: &'a str, + ) -> impl Iterator + 'a { + self.packages_by_id.iter().filter_map(move |(pkgid, pkg)| { + if pkg.name == pkgname { + Some(pkgid) + } else { + None + } + }) + } + + /// Return information about a package by id + /// + /// This does not need to be a workspace package. + pub fn pkg_by_id(&self, pkgid: &PackageId) -> Option<&Package> { + self.packages_by_id.get(pkgid) + } +} + +/// Given a path to a `Cargo.toml` file for a package, return the parent +/// directory +/// +/// Fails explicitly if the path doesn't match what we'd expect. +fn cargo_toml_parent( + path: &Utf8Path, + label_path: &Utf8Path, +) -> Result { + ensure!( + path.file_name() == Some("Cargo.toml"), + "unexpected manifest path: {:?}", + label_path + ); + let path = path + .parent() + .ok_or_else(|| anyhow!("unexpected manifest path: {:?}", label_path))? + .to_owned(); + Ok(path) +} + +/// Describes a "dependency path": a path through the Cargo dependency graph +/// from one package to another, which describes how one package depends on +/// another +#[derive(Debug, Clone)] +pub struct DepPath(VecDeque); + +impl DepPath { + /// Creates a new `DepPath` for package `pkgid` + pub fn for_pkg(pkgid: PackageId) -> DepPath { + DepPath(VecDeque::from([pkgid])) + } + + /// Returns the bottom-most node in this path + /// + /// In a dependency chain from root package `p1` to its dependency `p2` that + /// depends on `p3`, the bottom-most node would be `p3`. + pub fn bottom(&self) -> &PackageId { + &self.0[0] + } + + /// Iterates over the nodes in this path, from the bottom to the root + pub fn nodes(&self) -> impl Iterator { + self.0.iter() + } + + /// Creates a new dependency path based on this one, but where the bottom of + /// this path depends on package `pkgid` + pub fn with_dependency_on(&self, pkgid: PackageId) -> DepPath { + let mut rv = self.clone(); + rv.0.push_front(pkgid); + rv + } + + /// Returns whether any component of the path contains any of the given + /// pkgids + pub fn contains_any(&self, pkgids: &BTreeSet<&PackageId>) -> bool { + self.0.iter().any(|p| pkgids.contains(p)) + } +} diff --git a/dev-tools/ls-apis/src/lib.rs b/dev-tools/ls-apis/src/lib.rs new file mode 100644 index 0000000000..8d4f760c8b --- /dev/null +++ b/dev-tools/ls-apis/src/lib.rs @@ -0,0 +1,87 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Collect information about Progenitor-based APIs + +mod api_metadata; +mod cargo; +mod system_apis; +mod workspaces; + +pub use api_metadata::AllApiMetadata; +pub use system_apis::ApiDependencyFilter; +pub use system_apis::SystemApis; + +use anyhow::{Context, Result}; +use camino::Utf8Path; +use camino::Utf8PathBuf; +use serde::de::DeserializeOwned; +use serde::Deserialize; +use std::borrow::Borrow; + +#[macro_use] +extern crate newtype_derive; + +#[derive(Clone, Deserialize, Ord, PartialOrd, Eq, PartialEq)] +#[serde(transparent)] +pub struct ClientPackageName(String); +NewtypeDebug! { () pub struct ClientPackageName(String); } +NewtypeDeref! { () pub struct ClientPackageName(String); } +NewtypeDerefMut! { () pub struct ClientPackageName(String); } +NewtypeDisplay! { () pub struct ClientPackageName(String); } +NewtypeFrom! { () pub struct ClientPackageName(String); } +impl Borrow for ClientPackageName { + fn borrow(&self) -> &str { + self.0.as_str() + } +} + +#[derive(Clone, Deserialize, Ord, PartialOrd, Eq, PartialEq)] +#[serde(transparent)] +pub struct DeploymentUnitName(String); +NewtypeDebug! { () pub struct DeploymentUnitName(String); } +NewtypeDeref! { () pub struct DeploymentUnitName(String); } +NewtypeDerefMut! { () pub struct DeploymentUnitName(String); } +NewtypeDisplay! { () pub struct DeploymentUnitName(String); } +NewtypeFrom! { () pub struct DeploymentUnitName(String); } + +#[derive(Clone, Deserialize, Ord, PartialOrd, Eq, PartialEq)] +#[serde(transparent)] +pub struct ServerPackageName(String); +NewtypeDebug! { () pub struct ServerPackageName(String); } +NewtypeDeref! { () pub struct ServerPackageName(String); } +NewtypeDerefMut! { () pub struct ServerPackageName(String); } +NewtypeDisplay! { () pub struct ServerPackageName(String); } +NewtypeFrom! { () pub struct ServerPackageName(String); } +impl Borrow for ServerPackageName { + fn borrow(&self) -> &str { + self.0.as_str() + } +} + +#[derive(Clone, Deserialize, Ord, PartialOrd, Eq, PartialEq)] +#[serde(transparent)] +pub struct ServerComponentName(String); +NewtypeDebug! { () pub struct ServerComponentName(String); } +NewtypeDeref! { () pub struct ServerComponentName(String); } +NewtypeDerefMut! { () pub struct ServerComponentName(String); } +NewtypeDisplay! { () pub struct ServerComponentName(String); } +NewtypeFrom! { () pub struct ServerComponentName(String); } +impl Borrow for ServerPackageName { + fn borrow(&self) -> &String { + &self.0 + } +} + +/// Parameters for loading information about system APIs +pub struct LoadArgs { + /// path to developer-maintained API metadata + pub api_manifest_path: Utf8PathBuf, +} + +fn parse_toml_file(path: &Utf8Path) -> Result { + let s = std::fs::read_to_string(path) + .with_context(|| format!("read {:?}", path))?; + toml::from_str(&s).with_context(|| format!("parse {:?}", path)) +} diff --git a/dev-tools/ls-apis/src/system_apis.rs b/dev-tools/ls-apis/src/system_apis.rs new file mode 100644 index 0000000000..6d624d4e57 --- /dev/null +++ b/dev-tools/ls-apis/src/system_apis.rs @@ -0,0 +1,629 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Query information about the Dropshot/OpenAPI/Progenitor-based APIs within +//! the Oxide system + +use crate::api_metadata::AllApiMetadata; +use crate::api_metadata::ApiMetadata; +use crate::api_metadata::Evaluation; +use crate::cargo::DepPath; +use crate::parse_toml_file; +use crate::workspaces::Workspaces; +use crate::ClientPackageName; +use crate::DeploymentUnitName; +use crate::LoadArgs; +use crate::ServerComponentName; +use crate::ServerPackageName; +use anyhow::{anyhow, bail, Context, Result}; +use camino::Utf8PathBuf; +use cargo_metadata::Package; +use parse_display::{Display, FromStr}; +use petgraph::dot::Dot; +use std::collections::BTreeMap; +use std::collections::BTreeSet; + +/// Query information about the Dropshot/OpenAPI/Progenitor-based APIs within +/// the Oxide system +pub struct SystemApis { + /// maps a deployment unit to its list of service components + unit_server_components: + BTreeMap>, + /// maps a server component to the deployment unit that it's part of + /// (reverse of `unit_server_components`) + server_component_units: BTreeMap, + + /// maps a server component to the list of APIs it uses (using the client + /// package name as a primary key for the API) + apis_consumed: BTreeMap< + ServerComponentName, + BTreeMap>, + >, + /// maps an API name (using the client package name as primary key) to the + /// list of server components that use it + /// (reverse of `apis_consumed`) + api_consumers: BTreeMap< + ClientPackageName, + BTreeMap>, + >, + + /// maps an API name to the server component that exposes that API + api_producers: BTreeMap, + + /// source of developer-maintained API metadata + api_metadata: AllApiMetadata, + /// source of Cargo package metadata + workspaces: Workspaces, +} + +impl SystemApis { + /// Load information about APIs in the system based on both developer- + /// maintained metadata and Cargo-provided metadata + pub fn load(args: LoadArgs) -> Result { + // Load the API manifest. + let api_metadata: AllApiMetadata = + parse_toml_file(&args.api_manifest_path)?; + // Load Cargo metadata and validate it against the manifest. + let (workspaces, warnings) = Workspaces::load(&api_metadata)?; + if !warnings.is_empty() { + for e in warnings { + eprintln!("warning: {:#}", e); + } + } + + // Create an index of server package names, mapping each one to the API + // that it corresponds to. + let server_packages: BTreeMap<_, _> = api_metadata + .apis() + .map(|api| (api.server_package_name.clone(), api)) + .collect(); + + // Walk the deployment units, then walk each one's list of packages, and + // then walk all of its dependencies. Along the way, record whenever we + // find a package whose name matches a known server package. If we find + // this, we've found which deployment unit (and which top-level package) + // contains that server. The result of this process is a set of data + // structures that allow us to look up the components in a deployment + // unit, the deployment unit for any component, the servers in each + // component, etc. + let mut tracker = ServerComponentsTracker::new(&server_packages); + for (deployment_unit, dunit_info) in api_metadata.deployment_units() { + for dunit_pkg in &dunit_info.packages { + tracker.found_deployment_unit_package( + deployment_unit, + dunit_pkg, + )?; + let (workspace, server_pkg) = + workspaces.find_package_workspace(dunit_pkg)?; + let dep_path = DepPath::for_pkg(server_pkg.id.clone()); + tracker.found_package(dunit_pkg, dunit_pkg, &dep_path); + + workspace.walk_required_deps_recursively( + server_pkg, + &mut |p: &Package, dep_path: &DepPath| { + tracker.found_package(dunit_pkg, &p.name, dep_path); + }, + )?; + } + } + + if !tracker.errors.is_empty() { + for e in tracker.errors { + eprintln!("error: {:#}", e); + } + + bail!("found at least one API exported by multiple servers"); + } + + let (server_component_units, unit_server_components, api_producers) = ( + tracker.server_component_units, + tracker.unit_server_components, + tracker.api_producers, + ); + + // Now that we've figured out what servers are where, walk dependencies + // of each server component and assemble structures to find which APIs + // are produced and consumed by which components. + let mut deps_tracker = ClientDependenciesTracker::new(&api_metadata); + for server_pkgname in server_component_units.keys() { + let (workspace, pkg) = + workspaces.find_package_workspace(server_pkgname)?; + workspace + .walk_required_deps_recursively( + pkg, + &mut |p: &Package, dep_path: &DepPath| { + deps_tracker.found_dependency( + server_pkgname, + &p.name, + dep_path, + ); + }, + ) + .with_context(|| { + format!( + "iterating dependencies of workspace {:?} package {:?}", + workspace.name(), + server_pkgname + ) + })?; + } + + let (apis_consumed, api_consumers) = + (deps_tracker.apis_consumed, deps_tracker.api_consumers); + + Ok(SystemApis { + server_component_units, + unit_server_components, + apis_consumed, + api_consumers, + api_producers, + api_metadata, + workspaces, + }) + } + + /// Iterate over the deployment units + pub fn deployment_units( + &self, + ) -> impl Iterator { + self.unit_server_components.keys() + } + + /// For one deployment unit, iterate over the servers contained in it + pub fn deployment_unit_servers( + &self, + unit: &DeploymentUnitName, + ) -> Result> { + Ok(self + .unit_server_components + .get(unit) + .ok_or_else(|| anyhow!("unknown deployment unit: {}", unit))? + .iter()) + } + + /// Returns the developer-maintained API metadata + pub fn api_metadata(&self) -> &AllApiMetadata { + &self.api_metadata + } + + /// Given a server component, return the APIs consumed by this component + pub fn component_apis_consumed( + &self, + server_component: &ServerComponentName, + filter: ApiDependencyFilter, + ) -> Result + '_> { + let mut rv = Vec::new(); + let Some(apis_consumed) = self.apis_consumed.get(server_component) + else { + return Ok(rv.into_iter()); + }; + + for (client_pkgname, dep_paths) in apis_consumed { + let mut include = None; + for p in dep_paths { + if filter.should_include( + &self.api_metadata, + &self.workspaces, + client_pkgname, + p, + )? { + include = Some(p); + break; + }; + } + if let Some(p) = include { + rv.push((client_pkgname, p)); + } + } + + Ok(rv.into_iter()) + } + + /// Given the client package name for an API, return the name of the server + /// component that provides it + pub fn api_producer( + &self, + client: &ClientPackageName, + ) -> Option<&ServerComponentName> { + self.api_producers.get(client).map(|s| &s.0) + } + + /// Given the client package name for an API, return the list of server + /// components that consume it, along with the Cargo dependency path that + /// connects each server to the client package + pub fn api_consumers( + &self, + client: &ClientPackageName, + filter: ApiDependencyFilter, + ) -> Result + '_> + { + let mut rv = Vec::new(); + + let Some(api_consumers) = self.api_consumers.get(client) else { + return Ok(rv.into_iter()); + }; + + for (server_pkgname, dep_paths) in api_consumers { + let mut include = None; + for p in dep_paths { + if filter.should_include( + &self.api_metadata, + &self.workspaces, + &client, + p, + )? { + include = Some(p); + break; + } + } + + if let Some(p) = include { + rv.push((server_pkgname, p)) + } + } + + Ok(rv.into_iter()) + } + + /// Given the name of any package defined in one of our workspaces, return + /// information used to construct a label + /// + /// Returns `(name, rel_path)`, where `name` is the name of the workspace + /// containing the package and `rel_path` is the relative path of the + /// package within that workspace. + pub fn package_label(&self, pkgname: &str) -> Result<(&str, Utf8PathBuf)> { + let (workspace, _) = self.workspaces.find_package_workspace(pkgname)?; + let pkgpath = workspace.find_workspace_package_path(pkgname)?; + Ok((workspace.name(), pkgpath)) + } + + /// Given the name of any package defined in one of our workspaces, return + /// an Asciidoc snippet that's usable to render the name of the package. + /// This just uses `package_label()` but may in the future create links, + /// too. + pub fn adoc_label(&self, pkgname: &str) -> Result { + let (workspace, _) = self.workspaces.find_package_workspace(pkgname)?; + let pkgpath = workspace.find_workspace_package_path(pkgname)?; + Ok(format!( + "https://github.com/oxidecomputer/{}/tree/main/{}[{}:{}]", + workspace.name(), + pkgpath, + workspace.name(), + pkgpath + )) + } + + /// Returns a string that can be passed to `dot(1)` to render a graph of + /// API dependencies among deployment units + pub fn dot_by_unit(&self, filter: ApiDependencyFilter) -> Result { + let mut graph = petgraph::graph::Graph::new(); + let nodes: BTreeMap<_, _> = self + .deployment_units() + .map(|name| (name, graph.add_node(name))) + .collect(); + + // Now walk through the deployment units, walk through each one's server + // packages, walk through each one of the clients used by those, and + // create a corresponding edge. + for deployment_unit in self.deployment_units() { + let server_components = + self.deployment_unit_servers(deployment_unit).unwrap(); + let my_node = nodes.get(deployment_unit).unwrap(); + for server_pkg in server_components { + for (client_pkg, _) in + self.component_apis_consumed(server_pkg, filter)? + { + let other_component = + self.api_producer(client_pkg).unwrap(); + let other_unit = self + .server_component_units + .get(other_component) + .unwrap(); + let other_node = nodes.get(other_unit).unwrap(); + graph.add_edge(*my_node, *other_node, client_pkg.clone()); + } + } + } + + Ok(Dot::new(&graph).to_string()) + } + + /// Returns a string that can be passed to `dot(1)` to render a graph of + /// API dependencies among server components + pub fn dot_by_server_component( + &self, + filter: ApiDependencyFilter, + ) -> Result { + let mut graph = petgraph::graph::Graph::new(); + let nodes: BTreeMap<_, _> = self + .server_component_units + .keys() + .map(|server_component| { + (server_component.clone(), graph.add_node(server_component)) + }) + .collect(); + + // Now walk through the server components, walk through each one of the + // clients used by those, and create a corresponding edge. + for server_component in self.apis_consumed.keys() { + // unwrap(): we created a node for each server component above. + let my_node = nodes.get(server_component).unwrap(); + let consumed_apis = + self.component_apis_consumed(server_component, filter)?; + for (client_pkg, _) in consumed_apis { + let other_component = self.api_producer(client_pkg).unwrap(); + let other_node = nodes.get(other_component).unwrap(); + graph.add_edge(*my_node, *other_node, client_pkg.clone()); + } + } + + Ok(Dot::new(&graph).to_string()) + } +} + +/// Helper for building structures to index which deployment units contain which +/// server components and what APIs those components expose +/// +/// See `SystemApis::load()` for how this is used. +struct ServerComponentsTracker<'a> { + // inputs + known_server_packages: &'a BTreeMap, + + // outputs (structures that we're building up) + errors: Vec, + server_component_units: BTreeMap, + unit_server_components: + BTreeMap>, + api_producers: BTreeMap, +} + +impl<'a> ServerComponentsTracker<'a> { + pub fn new( + known_server_packages: &'a BTreeMap, + ) -> ServerComponentsTracker<'a> { + ServerComponentsTracker { + known_server_packages, + errors: Vec::new(), + server_component_units: BTreeMap::new(), + unit_server_components: BTreeMap::new(), + api_producers: BTreeMap::new(), + } + } + + /// Record that `server_pkgname` exposes API `api` by virtue of the + /// dependency chain `dep_path` + pub fn found_api_producer( + &mut self, + api: &ApiMetadata, + server_pkgname: &ServerComponentName, + dep_path: &DepPath, + ) { + // TODO dns-server is used by both the dns-server component *and* + // omicron-sled-agent's simulated sled agent. This program does not + // support that. But we don't care about the simulated sled agent, + // either, so just ignore it. + // + // This exception cannot currently be encoded in the + // "dependency_filter_rules" metadata because that metadata is applied + // as a postprocessing step. But we can't even build up our data model + // in the first place unless we ignore this here. + if **server_pkgname == "omicron-sled-agent" + && *api.client_package_name == "dns-service-client" + { + eprintln!( + "note: ignoring Cargo dependency from omicron-sled-agent -> \ + dns-server", + ); + return; + } + + // TODO Crucible Pantry depends on Crucible (Upstairs). But Crucible + // Upstairs exposes an API (the Crucible Control API). That makes it + // look (from tracking Cargo dependencies) like Crucible Pantry exposes + // that API. But it doesn't. + // + // Like the above dns-server dependency, we can't build up our data + // model without ignoring this, so it can't currently be encoded in the + // "dependency_filter_rules" metadata. + if **server_pkgname == "crucible-pantry" + && *api.client_package_name == "crucible-control-client" + { + eprintln!( + "note: ignoring Cargo dependency from crucible-pantry -> \ + ... -> crucible-control-client", + ); + return; + } + + if let Some((previous, _)) = self.api_producers.insert( + api.client_package_name.clone(), + (server_pkgname.clone(), dep_path.clone()), + ) { + self.errors.push(anyhow!( + "API for client {} appears to be exported by multiple \ + components: at least {} and {} ({:?})", + api.client_package_name, + previous, + server_pkgname, + dep_path + )); + } + } + + /// Record that deployment unit package `dunit_pkgname` depends on package + /// `pkgname` via dependency chain `dep_path` + /// + /// This only records anything if `pkgname` turns out to be a known API + /// client package name, in which case this records that the server + /// component consumes the corresponding API. + pub fn found_package( + &mut self, + dunit_pkgname: &ServerComponentName, + pkgname: &str, + dep_path: &DepPath, + ) { + let Some(api) = self.known_server_packages.get(pkgname) else { + return; + }; + + self.found_api_producer(api, dunit_pkgname, dep_path); + } + + /// Record that the given package is one of the deployment unit's top-level + /// packages (server components) + pub fn found_deployment_unit_package( + &mut self, + deployment_unit: &DeploymentUnitName, + server_component: &ServerComponentName, + ) -> Result<()> { + if let Some(previous) = self + .server_component_units + .insert(server_component.clone(), deployment_unit.clone()) + { + bail!( + "server component {:?} found in multiple deployment \ + units (at least {} and {})", + server_component, + deployment_unit, + previous + ); + } + + assert!(self + .unit_server_components + .entry(deployment_unit.clone()) + .or_default() + .insert(server_component.clone())); + Ok(()) + } +} + +/// Helper for building structures to track which APIs are consumed by which +/// server components +struct ClientDependenciesTracker<'a> { + // inputs + api_metadata: &'a AllApiMetadata, + + // outputs (structures that we're building up) + apis_consumed: BTreeMap< + ServerComponentName, + BTreeMap>, + >, + api_consumers: BTreeMap< + ClientPackageName, + BTreeMap>, + >, +} + +impl<'a> ClientDependenciesTracker<'a> { + fn new(api_metadata: &'a AllApiMetadata) -> ClientDependenciesTracker<'a> { + ClientDependenciesTracker { + api_metadata, + apis_consumed: BTreeMap::new(), + api_consumers: BTreeMap::new(), + } + } + + /// Record that comopnent `server_pkgname` consumes package `pkgname` via + /// dependency chain `dep_path` + /// + /// This only records cases where `pkgname` is a known client package for + /// one of our APIs, in which case it records that this server component + /// consumes the corresponding API. + fn found_dependency( + &mut self, + server_pkgname: &ServerComponentName, + pkgname: &str, + dep_path: &DepPath, + ) { + if self.api_metadata.client_pkgname_lookup(pkgname).is_none() { + return; + } + + // This is the name of a known client package. Record it. + let client_pkgname = ClientPackageName::from(pkgname.to_owned()); + self.api_consumers + .entry(client_pkgname.clone()) + .or_insert_with(BTreeMap::new) + .entry(server_pkgname.clone()) + .or_insert_with(Vec::new) + .push(dep_path.clone()); + self.apis_consumed + .entry(server_pkgname.clone()) + .or_insert_with(BTreeMap::new) + .entry(client_pkgname) + .or_insert_with(Vec::new) + .push(dep_path.clone()); + } +} + +/// Specifies which API dependencies to include vs. ignore when iterating +/// dependencies +#[derive(Clone, Copy, Debug, Default, Display, FromStr)] +#[display(style = "kebab-case")] +pub enum ApiDependencyFilter { + /// Include all dependencies found from Cargo package metadata + All, + + /// Include _only_ bogus dependencies (mainly useful for seeing what's + /// normally being excluded) + Bogus, + + /// Include all dependencies found from Cargo package metadata that have not + /// been explicitly marked as bogus (false positives) + /// + /// Relative to the default, this includes dependencies that have been + /// explicitly excluded from the online update DAG as well as dependencies + /// from programs that are not deployed (but within packages that are + /// deployed). + NonBogus, + + /// Include dependencies that have been explicitly excluded from the online + /// update DAG + IncludeNonDag, + + /// Exclude found dependencies that are: + /// + /// - explicitly marked as outside the update DAG + /// - bogus (do not reflect real dependencies) + /// - not part of production deployments + #[default] + Default, +} + +impl ApiDependencyFilter { + /// Return whether this filter should include a dependency on + /// `client_pkgname` that goes through dependency path `dep_path` + fn should_include( + &self, + api_metadata: &AllApiMetadata, + workspaces: &Workspaces, + client_pkgname: &ClientPackageName, + dep_path: &DepPath, + ) -> Result { + let evaluation = api_metadata + .evaluate_dependency(workspaces, client_pkgname, dep_path) + .with_context(|| format!("error applying filter {:?}", self))?; + + Ok(match self { + ApiDependencyFilter::All => true, + ApiDependencyFilter::Bogus => { + matches!(evaluation, Evaluation::Bogus) + } + ApiDependencyFilter::NonBogus => { + !matches!(evaluation, Evaluation::Bogus) + } + ApiDependencyFilter::IncludeNonDag => !matches!( + evaluation, + Evaluation::Bogus | Evaluation::NotDeployed + ), + ApiDependencyFilter::Default => !matches!( + evaluation, + Evaluation::NonDag + | Evaluation::Bogus + | Evaluation::NotDeployed + ), + }) + } +} diff --git a/dev-tools/ls-apis/src/workspaces.rs b/dev-tools/ls-apis/src/workspaces.rs new file mode 100644 index 0000000000..ef1ba0ee79 --- /dev/null +++ b/dev-tools/ls-apis/src/workspaces.rs @@ -0,0 +1,292 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Combines information about multiple `Workspace`s + +use crate::api_metadata::AllApiMetadata; +use crate::cargo::Workspace; +use crate::ClientPackageName; +use anyhow::{anyhow, ensure, Context, Result}; +use camino::Utf8Path; +use cargo_metadata::Package; +use cargo_metadata::PackageId; +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::sync::Arc; + +/// Thin wrapper around a list of workspaces that makes it easy to query which +/// workspace has which package +pub(crate) struct Workspaces { + workspaces: BTreeMap, +} + +impl Workspaces { + /// Use `cargo metadata` to load workspace metadata for all the workspaces + /// that we care about + /// + /// The data found is validated against `api_metadata`. + /// + /// On success, returns `(workspaces, warnings)`, where `warnings` is a list + /// of potential inconsistencies between API metadata and Cargo metadata. + pub fn load( + api_metadata: &AllApiMetadata, + ) -> Result<(Workspaces, Vec)> { + // First, load information about the "omicron" workspace. This is the + // current workspace so we don't need to provide the path to it. + let ignored_non_clients = api_metadata.ignored_non_clients(); + let omicron = + Arc::new(Workspace::load("omicron", None, ignored_non_clients)?); + + // In order to assemble this metadata, Cargo already has a clone of most + // of the other workspaces that we care about. We'll use those clones + // rather than manage our own. + // + // To find each of these other repos, we'll need to look up a package + // that comes from each of these workspaces and look at where its local + // manifest file is. + // + // Loading each workspace involves running `cargo metadata`, which is + // pretty I/O intensive. Latency benefits significantly from + // parallelizing, though we have to respect the dependencies. We can't + // look up a package in "maghemite" before we've loaded Maghemite. + // + // If we had many more repos than this, we'd probably want to limit the + // concurrency. + let handles: Vec<_> = [ + // To find this repo ... look up this package in Omicron + // v v + ("crucible", "crucible-agent-client"), + ("propolis", "propolis-client"), + ("maghemite", "mg-admin-client"), + ] + .into_iter() + .map(|(repo, omicron_pkg)| { + let mine = omicron.clone(); + let my_ignored = ignored_non_clients.clone(); + std::thread::spawn(move || { + load_dependent_repo(&mine, repo, omicron_pkg, my_ignored) + }) + }) + .collect(); + + let mut workspaces: BTreeMap<_, _> = handles + .into_iter() + .map(|join_handle| { + let thr_result = join_handle.join().map_err(|e| { + anyhow!("workspace load thread panicked: {:?}", e) + })?; + let workspace = thr_result?; + Ok::<_, anyhow::Error>((workspace.name().to_owned(), workspace)) + }) + .collect::, _>>()?; + workspaces.insert( + String::from("omicron"), + Arc::into_inner(omicron).expect("no more Omicron Arc references"), + ); + + // To load Dendrite, we need to look something up in Maghemite (loaded + // above). + let maghemite = workspaces + .get("maghemite") + .ok_or_else(|| anyhow!("missing maghemite workspaces"))?; + + workspaces.insert( + String::from("dendrite"), + load_dependent_repo( + &maghemite, + "dendrite", + "dpd-client", + ignored_non_clients.clone(), + )?, + ); + + // Validate the metadata against what we found in the workspaces. + let mut client_pkgnames_unused: BTreeSet<_> = + api_metadata.client_pkgnames().collect(); + let mut warnings = Vec::new(); + for (_, workspace) in &workspaces { + for client_pkgname in workspace.client_packages() { + if api_metadata.client_pkgname_lookup(client_pkgname).is_some() + { + // It's possible that we will find multiple references + // to the same client package name. That's okay. + client_pkgnames_unused.remove(client_pkgname); + } else { + warnings.push(anyhow!( + "workspace {}: found client package missing from API \ + manifest: {}", + workspace.name(), + client_pkgname + )); + } + } + } + + for c in client_pkgnames_unused { + warnings.push(anyhow!( + "API manifest refers to unknown client package: {}", + c + )); + } + + Ok((Workspaces { workspaces }, warnings)) + } + + /// Given the name of a workspace package from one of our workspaces, return + /// the corresponding `Workspace` and `Package` + /// + /// This is only for finding packages defined *in* one of these workspaces. + /// For any other kind of package (e.g., transitive dependencies, which + /// might come from crates.io or other Git repositories), the name is not + /// unique and you'd need to use some other mechanism to get information + /// about it. + pub fn find_package_workspace( + &self, + server_pkgname: &str, + ) -> Result<(&Workspace, &Package)> { + // Figure out which workspace has this package. + let found_in_workspaces: Vec<_> = self + .workspaces + .values() + .filter_map(|w| { + w.find_workspace_package(&server_pkgname).map(|p| (w, p)) + }) + .collect(); + + // TODO As of this writing, we have two distinct packages called + // "dpd-client": + // + // - There's one in the "dendrite" repo. This is used by: + // - `swadm` (in Dendrite) + // - `tfportd` (in Dendrite)` + // - `ddm` (in Maghemite) + // - `ddmd` (in Maghemite) + // - `mgd` (via `mg-lower`) (in Maghemite) + // - There's one in the "omicron" repo. This is used by: + // - `wicketd` (in Omicron) + // - `omicron-sled-agent` (in Omicron) + // - `omicron-nexus` (in Omicron) + // + // This is problematic for two reasons: + // + // 1. This tool assumes that every API has exactly one client and it + // uses the client as the primary key to identify the API. + // + // 2. The Rust package name is supposed to be unique. This happens to + // work, probably in part because the packages in the above two + // groups are never built in the same workspace. This tool _does_ + // merge information from all these workspaces, and it's likely + // conflating the two packages. That happens to be a good thing + // because it keeps (1) from being an actual problem. That is: if + // this tool actually realized they were separate Rust packages, then + // it would be upset that we had two different clients for the same + // API. + // + // To keep things working, we just have this function always report the + // one in the Omicron repo. + if server_pkgname == "dpd-client" && found_in_workspaces.len() == 2 { + if found_in_workspaces[0].0.name() == "omicron" { + return Ok(found_in_workspaces[0]); + } + if found_in_workspaces[1].0.name() == "omicron" { + return Ok(found_in_workspaces[1]); + } + } + ensure!( + !found_in_workspaces.is_empty(), + "server package {:?} was not found in any workspace", + server_pkgname + ); + ensure!( + found_in_workspaces.len() == 1, + "server package {:?} was found in more than one workspace: {}", + server_pkgname, + found_in_workspaces + .into_iter() + .map(|(w, _)| w.name()) + .collect::>() + .join(", ") + ); + Ok(found_in_workspaces[0]) + } + + /// Returns the set of distinct pkgids for package "pkg" among all + /// workspaces. + pub fn workspace_pkgids<'a>( + &'a self, + pkgname: &'a str, + ) -> BTreeSet<&'a PackageId> { + self.workspaces.values().flat_map(move |w| w.pkgids(pkgname)).collect() + } +} + +/// Load a `Workspace` for a repo `repo` using the manifest path inferred by +/// looking up one of its packages `pkgname` in `workspace` +/// +/// For example, we might locate the Crucible repo by looking up the +/// `crucible-pantry-client` package in the Omicron workspace, finding its +/// manifest path, and locating the containing Crucible workspace. +fn load_dependent_repo( + workspace: &Workspace, + repo: &str, + pkgname: &str, + ignored_non_clients: BTreeSet, +) -> Result { + // `Workspace` doesn't let us look up a non-workspace package by name + // because there may be many of them. So list all the pkgids and take any + // one of them -- any of them should work for our purpoes. + let pkgid = workspace.pkgids(pkgname).next().ok_or_else(|| { + anyhow!( + "workspace {} did not contain expected package {}", + workspace.name(), + pkgname + ) + })?; + + // Now we can look up the package metadata. + let pkg = workspace.pkg_by_id(pkgid).ok_or_else(|| { + anyhow!( + "workspace {}: did not contain expected package id {}", + workspace.name(), + pkgname + ) + })?; + + // The package metadata should show where the package's manifest file should + // be. This may be buried deep in the workspace. How do we find the root + // of the workspace? Fortunately, `cargo locate-project` can do this. + let cargo_var = std::env::var("CARGO"); + let cargo = cargo_var.as_deref().unwrap_or("cargo"); + let output = std::process::Command::new(cargo) + .arg("locate-project") + .arg("--workspace") + .arg("--manifest-path") + .arg(&pkg.manifest_path) + .arg("--message-format") + .arg("plain") + .output() + .context("`cargo locate-project`") + .and_then(|output| { + if !output.status.success() { + Err(anyhow!( + "`cargo locate-project` exited with {:?}: stderr: {:?}", + output.status, + String::from_utf8_lossy(&output.stderr), + )) + } else { + String::from_utf8(output.stdout).map_err(|_| { + anyhow!("`cargo locate-project` output was not UTF-8") + }) + } + }) + .with_context(|| { + format!( + "locating workspace for {:?} (from {:?}) with \ + `cargo locate-project`", + pkgname, &pkg.manifest_path + ) + })?; + let workspace_manifest = Utf8Path::new(output.trim_end()); + Workspace::load(repo, Some(workspace_manifest), &ignored_non_clients) +} diff --git a/dev-tools/openapi-manager/src/spec.rs b/dev-tools/openapi-manager/src/spec.rs index 644138f58c..7d734218fc 100644 --- a/dev-tools/openapi-manager/src/spec.rs +++ b/dev-tools/openapi-manager/src/spec.rs @@ -13,6 +13,7 @@ use openapi_manager_types::{ValidationBackend, ValidationContext}; use openapiv3::OpenAPI; /// All APIs managed by openapi-manager. +// TODO The metadata here overlaps with metadata in api-manifest.toml. pub fn all_apis() -> Vec { vec![ ApiSpec { diff --git a/dev-tools/xtask/src/main.rs b/dev-tools/xtask/src/main.rs index 9880adeb67..2d359202de 100644 --- a/dev-tools/xtask/src/main.rs +++ b/dev-tools/xtask/src/main.rs @@ -51,6 +51,9 @@ enum Cmds { /// Utilities for working with CockroachDB databases. DbDev(external::External), + /// Show information about Progenitor-based APIs + LsApis(external::External), + /// Check that all features are flagged correctly CheckFeatures(check_features::Args), /// Check that dependencies are not duplicated in any packages in the @@ -133,6 +136,7 @@ fn main() -> Result<()> { } } Cmds::LiveTests(args) => live_tests::run_cmd(args), + Cmds::LsApis(external) => external.exec_bin("ls-apis"), Cmds::MgsDev(external) => external.exec_bin("mgs-dev"), Cmds::OmicronDev(external) => external.exec_bin("omicron-dev"), Cmds::Openapi(external) => external.exec_bin("openapi-manager"), diff --git a/package-manifest.toml b/package-manifest.toml index 8ce1bf76e0..4cb6a93e20 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -33,6 +33,13 @@ # This file defines all of the packages that make up Omicron and how to build # each one. `omicron-package` and `thing-flinger` process this file to build # and deploy these packages. +# +# **For a visual of how all this works, run:** +# +# $ cargo run --bin=omicron-package -- dot > packages.dot +# $ dot -Tpng -o packages.png packages.dot +# +# and open up `packages.png`. [package.omicron-sled-agent] service_name = "sled-agent"