From 72a0429debfaf4feeec2f952fefe3ffbffeb06f6 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 10 Oct 2023 17:50:06 -0700 Subject: [PATCH 01/13] [update-engine] fix buffer tests (#4163) Apparently I'd made a couple of mistakes while writing tests: * I was adding all events a second time by accident, which was hiding the fact that... * A couple not signs were flipped, whoops. --- update-engine/src/buffer.rs | 46 ++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/update-engine/src/buffer.rs b/update-engine/src/buffer.rs index 3de0e45f24..1779ef7da6 100644 --- a/update-engine/src/buffer.rs +++ b/update-engine/src/buffer.rs @@ -1389,7 +1389,10 @@ mod tests { test_cx .run_filtered_test( "all events passed in", - |buffer, event| buffer.add_event(event.clone()), + |buffer, event| { + buffer.add_event(event.clone()); + true + }, WithDeltas::No, ) .unwrap(); @@ -1397,10 +1400,12 @@ mod tests { test_cx .run_filtered_test( "progress events skipped", - |buffer, event| { - if let Event::Step(event) = event { + |buffer, event| match event { + Event::Step(event) => { buffer.add_step_event(event.clone()); + true } + Event::Progress(_) => false, }, WithDeltas::Both, ) @@ -1410,13 +1415,16 @@ mod tests { .run_filtered_test( "low-priority events skipped", |buffer, event| match event { - Event::Step(event) => { - if event.kind.priority() == StepEventPriority::Low { + Event::Step(event) => match event.kind.priority() { + StepEventPriority::High => { buffer.add_step_event(event.clone()); + true } - } + StepEventPriority::Low => false, + }, Event::Progress(event) => { buffer.add_progress_event(event.clone()); + true } }, WithDeltas::Both, @@ -1427,13 +1435,16 @@ mod tests { .run_filtered_test( "low-priority and progress events skipped", |buffer, event| match event { - Event::Step(event) => { - if event.kind.priority() == StepEventPriority::Low { + Event::Step(event) => match event.kind.priority() { + StepEventPriority::High => { buffer.add_step_event(event.clone()); + true } - } + StepEventPriority::Low => false, + }, Event::Progress(_) => { - // Don't add progress events either. + // Don't add progress events. + false } }, WithDeltas::Both, @@ -1565,7 +1576,10 @@ mod tests { fn run_filtered_test( &self, event_fn_description: &str, - mut event_fn: impl FnMut(&mut EventBuffer, &Event), + mut event_fn: impl FnMut( + &mut EventBuffer, + &Event, + ) -> bool, with_deltas: WithDeltas, ) -> anyhow::Result<()> { match with_deltas { @@ -1590,7 +1604,10 @@ mod tests { fn run_filtered_test_inner( &self, - mut event_fn: impl FnMut(&mut EventBuffer, &Event), + mut event_fn: impl FnMut( + &mut EventBuffer, + &Event, + ) -> bool, with_deltas: bool, ) -> anyhow::Result<()> { let description = format!("with deltas = {with_deltas}"); @@ -1608,8 +1625,9 @@ mod tests { let mut last_seen_opt = with_deltas.then_some(None); for (i, event) in self.generated_events.iter().enumerate() { - (event_fn)(&mut buffer, event); - buffer.add_event(event.clone()); + // Going to use event_added in an upcoming commit. + let _event_added = (event_fn)(&mut buffer, event); + let report = match last_seen_opt { Some(last_seen) => buffer.generate_report_since(last_seen), None => buffer.generate_report(), From 194889b956abbb3e01ce25b11b733c02598c3215 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 10 Oct 2023 19:27:33 -0700 Subject: [PATCH 02/13] [buildomat] authorize PRs generated by oxide-renovate (#4244) Means that PRs like https://github.com/oxidecomputer/omicron/pull/4241 will be automatically authorized. Also skip cargo-hakari update if cargo isn't present. --- .github/buildomat/config.toml | 1 + tools/renovate-post-upgrade.sh | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/.github/buildomat/config.toml b/.github/buildomat/config.toml index 922de631f2..419173fa50 100644 --- a/.github/buildomat/config.toml +++ b/.github/buildomat/config.toml @@ -17,5 +17,6 @@ org_only = true allow_users = [ "dependabot[bot]", "oxide-reflector-bot[bot]", + "oxide-renovate[bot]", "renovate[bot]", ] diff --git a/tools/renovate-post-upgrade.sh b/tools/renovate-post-upgrade.sh index c21832e0a9..2699f9f6a0 100755 --- a/tools/renovate-post-upgrade.sh +++ b/tools/renovate-post-upgrade.sh @@ -22,6 +22,13 @@ function retry_command { done } +# If cargo isn't present, skip this -- it implies that a non-Rust dependency was +# updated. +if ! command -v cargo &> /dev/null; then + echo "Skipping cargo-hakari update because cargo is not present." + exit 0 +fi + # Download and install cargo-hakari if it is not already installed. if ! command -v cargo-hakari &> /dev/null; then # Need cargo-binstall to install cargo-hakari. From a972c80c407b68848c178aca236ae00067bc4d3b Mon Sep 17 00:00:00 2001 From: Andy Fiddaman Date: Wed, 11 Oct 2023 17:59:57 +0100 Subject: [PATCH 03/13] destroy_virtual_hardware.sh needs to unmount backing filesystems (#4255) The backing filesystems added in d624bce9af2 prevent the destroy_virtual_hardware.sh script from properly cleaning up all ZFS pools and cause the fmd service to go into maintenance which delays control plane startup. This updates the script to unwind the backing datasets as part of its work. --- tools/destroy_virtual_hardware.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tools/destroy_virtual_hardware.sh b/tools/destroy_virtual_hardware.sh index ae6fef0673..46c6f117c4 100755 --- a/tools/destroy_virtual_hardware.sh +++ b/tools/destroy_virtual_hardware.sh @@ -56,7 +56,23 @@ function remove_softnpu_zone { --ports sc0_1,tfportqsfp0_0 } +# Some services have their working data overlaid by backing mounts from the +# internal boot disk. Before we can destroy the ZFS pools, we need to unmount +# these. + +BACKED_SERVICES="svc:/system/fmd:default" + +function demount_backingfs { + svcadm disable -st $BACKED_SERVICES + zpool list -Hpo name | grep '^oxi_' \ + | xargs -i zfs list -Hpo name,canmount,mounted -r {}/backing \ + | awk '$3 == "yes" && $2 == "noauto" { print $1 }' \ + | xargs -l zfs umount + svcadm enable -st $BACKED_SERVICES +} + verify_omicron_uninstalled +demount_backingfs unload_xde_driver remove_softnpu_zone try_remove_vnics From 1a21fdd581d80d92287c9f29e095dbee11f65b28 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 11 Oct 2023 12:23:46 -0700 Subject: [PATCH 04/13] Update Rust crate proptest to 1.3.1 (#4243) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.lock | 10 +++++----- Cargo.toml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 306e953049..421bbd5e16 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6737,19 +6737,19 @@ dependencies = [ [[package]] name = "proptest" -version = "1.2.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e35c06b98bf36aba164cc17cb25f7e232f5c4aeea73baa14b8a9f0d92dbfa65" +checksum = "7c003ac8c77cb07bb74f5f198bce836a689bcd5a42574612bf14d17bfd08c20e" dependencies = [ "bit-set", - "bitflags 1.3.2", - "byteorder", + "bit-vec", + "bitflags 2.4.0", "lazy_static", "num-traits", "rand 0.8.5", "rand_chacha 0.3.1", "rand_xorshift", - "regex-syntax 0.6.29", + "regex-syntax 0.7.5", "rusty-fork", "tempfile", "unarray", diff --git a/Cargo.toml b/Cargo.toml index da7b582fe3..fdd67c3b5c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -283,7 +283,7 @@ progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branc bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "901b710b6e5bd05a94a323693c2b971e7e7b240e" } propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "901b710b6e5bd05a94a323693c2b971e7e7b240e", features = [ "generated-migration" ] } propolis-server = { git = "https://github.com/oxidecomputer/propolis", rev = "901b710b6e5bd05a94a323693c2b971e7e7b240e", default-features = false, features = ["mock-only"] } -proptest = "1.2.0" +proptest = "1.3.1" quote = "1.0" rand = "0.8.5" ratatui = "0.23.0" From 02aef4bec751b47b6d19adbeef9e51c42c10204d Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 11 Oct 2023 12:34:34 -0700 Subject: [PATCH 05/13] Update Rust crate predicates to 3.0.4 (#4254) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.lock | 12 ++++++------ Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 421bbd5e16..d58ba77133 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -283,7 +283,7 @@ dependencies = [ "anstyle", "bstr 1.6.0", "doc-comment", - "predicates 3.0.3", + "predicates 3.0.4", "predicates-core", "predicates-tree", "wait-timeout", @@ -5442,7 +5442,7 @@ dependencies = [ "phf_shared 0.11.2", "postgres-types", "ppv-lite86", - "predicates 3.0.3", + "predicates 3.0.4", "rand 0.8.5", "rand_chacha 0.3.1", "regex", @@ -6437,14 +6437,14 @@ dependencies = [ [[package]] name = "predicates" -version = "3.0.3" +version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09963355b9f467184c04017ced4a2ba2d75cbcb4e7462690d388233253d4b1a9" +checksum = "6dfc28575c2e3f19cb3c73b93af36460ae898d426eba6fc15b9bd2a5220758a0" dependencies = [ "anstyle", "difflib", "float-cmp", - "itertools 0.10.5", + "itertools 0.11.0", "normalize-line-endings", "predicates-core", "regex", @@ -9374,7 +9374,7 @@ dependencies = [ "omicron-common 0.1.0", "omicron-test-utils", "omicron-workspace-hack", - "predicates 3.0.3", + "predicates 3.0.4", "slog", "slog-async", "slog-envlogger", diff --git a/Cargo.toml b/Cargo.toml index fdd67c3b5c..832b8663e6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -274,7 +274,7 @@ percent-encoding = "2.2.0" pem = "1.1" petgraph = "0.6.4" postgres-protocol = "0.6.6" -predicates = "3.0.3" +predicates = "3.0.4" pretty_assertions = "1.4.0" pretty-hex = "0.3.0" proc-macro2 = "1.0" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 106da92f62..a91477678b 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -73,7 +73,7 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } phf_shared = { version = "0.11.2" } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } -predicates = { version = "3.0.3" } +predicates = { version = "3.0.4" } rand = { version = "0.8.5", features = ["min_const_gen", "small_rng"] } rand_chacha = { version = "0.3.1" } regex = { version = "1.9.5" } @@ -171,7 +171,7 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } phf_shared = { version = "0.11.2" } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } -predicates = { version = "3.0.3" } +predicates = { version = "3.0.4" } rand = { version = "0.8.5", features = ["min_const_gen", "small_rng"] } rand_chacha = { version = "0.3.1" } regex = { version = "1.9.5" } From d12cb0ffceeb09c1cccdada29ca24c3829a2c9fa Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 11 Oct 2023 12:37:42 -0700 Subject: [PATCH 06/13] Pin GitHub Actions dependencies (#4240) --- .github/workflows/check-opte-ver.yml | 2 +- .github/workflows/check-workspace-deps.yml | 2 +- .github/workflows/hakari.yml | 10 +++++----- .github/workflows/rust.yml | 14 +++++++------- .github/workflows/update-dendrite.yml | 2 +- .github/workflows/update-maghemite.yml | 2 +- .github/workflows/validate-openapi-spec.yml | 4 ++-- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/.github/workflows/check-opte-ver.yml b/.github/workflows/check-opte-ver.yml index 3b57f2795f..a8e18f080e 100644 --- a/.github/workflows/check-opte-ver.yml +++ b/.github/workflows/check-opte-ver.yml @@ -9,7 +9,7 @@ jobs: check-opte-ver: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3.5.0 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - name: Install jq run: sudo apt-get install -y jq - name: Install toml-cli diff --git a/.github/workflows/check-workspace-deps.yml b/.github/workflows/check-workspace-deps.yml index 9611c4103c..521afa7359 100644 --- a/.github/workflows/check-workspace-deps.yml +++ b/.github/workflows/check-workspace-deps.yml @@ -10,6 +10,6 @@ jobs: check-workspace-deps: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3.5.0 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - name: Check Workspace Dependencies run: cargo xtask check-workspace-deps diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index d79196d318..df4cbc9b59 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -17,21 +17,21 @@ jobs: env: RUSTFLAGS: -D warnings steps: - - uses: actions/checkout@v4 - - uses: actions-rs/toolchain@v1 + - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4 + - uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1 with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@v2 + uses: taiki-e/install-action@e659bf85ee986e37e35cc1c53bfeebe044d8133e # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date - uses: actions-rs/cargo@v1 + uses: actions-rs/cargo@844f36862e911db73fe0815f00a4a2602c279505 # v1 with: command: hakari args: generate --diff - name: Check all crates depend on workspace-hack - uses: actions-rs/cargo@v1 + uses: actions-rs/cargo@844f36862e911db73fe0815f00a4a2602c279505 # v1 with: command: hakari args: manage-deps --dry-run diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index f5cf1dc885..873b316e16 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -9,7 +9,7 @@ jobs: check-style: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3.5.0 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - name: Report cargo version run: cargo --version - name: Report rustfmt version @@ -27,8 +27,8 @@ jobs: # This repo is unstable and unnecessary: https://github.com/microsoft/linux-package-repositories/issues/34 - name: Disable packages.microsoft.com repo run: sudo rm -f /etc/apt/sources.list.d/microsoft-prod.list - - uses: actions/checkout@v3.5.0 - - uses: Swatinem/rust-cache@v2.2.1 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: Swatinem/rust-cache@6fd3edff6979b79f87531400ad694fb7f2c84b1f # v2.2.1 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version run: cargo --version @@ -53,8 +53,8 @@ jobs: # This repo is unstable and unnecessary: https://github.com/microsoft/linux-package-repositories/issues/34 - name: Disable packages.microsoft.com repo run: sudo rm -f /etc/apt/sources.list.d/microsoft-prod.list - - uses: actions/checkout@v3.5.0 - - uses: Swatinem/rust-cache@v2.2.1 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: Swatinem/rust-cache@6fd3edff6979b79f87531400ad694fb7f2c84b1f # v2.2.1 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version run: cargo --version @@ -79,8 +79,8 @@ jobs: # This repo is unstable and unnecessary: https://github.com/microsoft/linux-package-repositories/issues/34 - name: Disable packages.microsoft.com repo run: sudo rm -f /etc/apt/sources.list.d/microsoft-prod.list - - uses: actions/checkout@v3.5.0 - - uses: Swatinem/rust-cache@v2.2.1 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: Swatinem/rust-cache@6fd3edff6979b79f87531400ad694fb7f2c84b1f # v2.2.1 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version run: cargo --version diff --git a/.github/workflows/update-dendrite.yml b/.github/workflows/update-dendrite.yml index 86049dcafc..10d8ef7618 100644 --- a/.github/workflows/update-dendrite.yml +++ b/.github/workflows/update-dendrite.yml @@ -29,7 +29,7 @@ jobs: steps: # Checkout both the target and integration branches - - uses: actions/checkout@v3.5.0 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 with: token: ${{ inputs.reflector_access_token }} fetch-depth: 0 diff --git a/.github/workflows/update-maghemite.yml b/.github/workflows/update-maghemite.yml index 07fe329af3..7aa2b8b6c8 100644 --- a/.github/workflows/update-maghemite.yml +++ b/.github/workflows/update-maghemite.yml @@ -29,7 +29,7 @@ jobs: steps: # Checkout both the target and integration branches - - uses: actions/checkout@v3.5.0 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 with: token: ${{ inputs.reflector_access_token }} fetch-depth: 0 diff --git a/.github/workflows/validate-openapi-spec.yml b/.github/workflows/validate-openapi-spec.yml index 06fc7526a8..1d6c152296 100644 --- a/.github/workflows/validate-openapi-spec.yml +++ b/.github/workflows/validate-openapi-spec.yml @@ -10,8 +10,8 @@ jobs: format: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3.5.0 - - uses: actions/setup-node@v3.6.0 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: actions/setup-node@64ed1c7eab4cce3362f8c340dee64e5eaeef8f7c # v3.6.0 with: node-version: '18' - name: Install our tools From 7d335441ad87b17e7ff1bea3ea04b16d47e5567e Mon Sep 17 00:00:00 2001 From: Rain Date: Wed, 11 Oct 2023 15:29:43 -0700 Subject: [PATCH 07/13] [renovate] download install-from-binstall-release.sh into a temp dir (#4260) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Whoops, this would leave an untracked file in the repo if run (and then accidentally be checked in 😬) --- tools/renovate-post-upgrade.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/renovate-post-upgrade.sh b/tools/renovate-post-upgrade.sh index 2699f9f6a0..4a9e3aa2f2 100755 --- a/tools/renovate-post-upgrade.sh +++ b/tools/renovate-post-upgrade.sh @@ -35,8 +35,10 @@ if ! command -v cargo-hakari &> /dev/null; then if ! command -v cargo-binstall &> /dev/null; then # Fetch cargo binstall. echo "Installing cargo-binstall..." - curl --retry 3 -L --proto '=https' --tlsv1.2 -sSfO https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh - retry_command bash install-from-binstall-release.sh + tempdir=$(mktemp -d) + curl --retry 3 -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh -o "$tempdir"/install-from-binstall-release.sh + retry_command bash "$tempdir"/install-from-binstall-release.sh + rm -rf "$tempdir" fi # Install cargo-hakari. From a903d61bcc8813f0ef4fbe974f469ff4619e1cc0 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 11 Oct 2023 18:31:55 -0700 Subject: [PATCH 08/13] Simplify Diesel Error management (#4210) Depends on https://github.com/oxidecomputer/async-bb8-diesel/pull/54 As of https://github.com/oxidecomputer/omicron/pull/4140 , we check out connections before issuing queries to the underlying database. This means that when we receive errors from the database, they are not overloaded as "connection checkout" OR "database" errors - they are now always database errors. --- Cargo.lock | 2 +- Cargo.toml | 2 +- nexus/db-queries/src/db/collection_attach.rs | 10 +- nexus/db-queries/src/db/collection_detach.rs | 5 +- .../src/db/collection_detach_many.rs | 8 +- nexus/db-queries/src/db/collection_insert.rs | 18 ++- .../src/db/datastore/address_lot.rs | 29 ++--- .../src/db/datastore/db_metadata.rs | 2 +- .../src/db/datastore/device_auth.rs | 2 +- nexus/db-queries/src/db/datastore/dns.rs | 2 +- .../src/db/datastore/external_ip.rs | 3 +- nexus/db-queries/src/db/datastore/ip_pool.rs | 64 +++++------ nexus/db-queries/src/db/datastore/mod.rs | 13 +-- .../src/db/datastore/network_interface.rs | 3 +- nexus/db-queries/src/db/datastore/project.rs | 24 ++-- nexus/db-queries/src/db/datastore/rack.rs | 7 +- .../src/db/datastore/region_snapshot.rs | 2 +- nexus/db-queries/src/db/datastore/role.rs | 2 +- nexus/db-queries/src/db/datastore/silo.rs | 24 ++-- .../db-queries/src/db/datastore/silo_group.rs | 5 +- nexus/db-queries/src/db/datastore/sled.rs | 2 +- nexus/db-queries/src/db/datastore/snapshot.rs | 16 +-- .../src/db/datastore/switch_interface.rs | 24 ++-- .../src/db/datastore/switch_port.rs | 104 +++++++----------- nexus/db-queries/src/db/datastore/update.rs | 2 +- nexus/db-queries/src/db/datastore/volume.rs | 8 +- nexus/db-queries/src/db/datastore/vpc.rs | 49 ++++----- nexus/db-queries/src/db/error.rs | 79 ++++--------- nexus/db-queries/src/db/explain.rs | 7 +- nexus/db-queries/src/db/pool.rs | 4 +- .../db-queries/src/db/queries/external_ip.rs | 3 +- .../src/db/queries/network_interface.rs | 96 +++++++--------- .../src/db/queries/region_allocation.rs | 3 +- nexus/db-queries/src/db/queries/vpc_subnet.rs | 20 ++-- nexus/db-queries/src/db/true_or_cast_error.rs | 9 +- nexus/db-queries/src/db/update_and_check.rs | 3 +- nexus/src/app/sagas/disk_create.rs | 5 +- nexus/src/app/sagas/instance_create.rs | 4 +- nexus/src/app/sagas/project_create.rs | 5 +- nexus/src/app/sagas/snapshot_create.rs | 6 +- nexus/src/app/sagas/vpc_create.rs | 6 +- 41 files changed, 291 insertions(+), 391 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d58ba77133..d5a90f7f85 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -298,7 +298,7 @@ checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" [[package]] name = "async-bb8-diesel" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/async-bb8-diesel?rev=da04c087f835a51e0441addb19c5ef4986e1fcf2#da04c087f835a51e0441addb19c5ef4986e1fcf2" +source = "git+https://github.com/oxidecomputer/async-bb8-diesel?rev=1446f7e0c1f05f33a0581abd51fa873c7652ab61#1446f7e0c1f05f33a0581abd51fa873c7652ab61" dependencies = [ "async-trait", "bb8", diff --git a/Cargo.toml b/Cargo.toml index 832b8663e6..7521bb4d45 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -136,7 +136,7 @@ api_identity = { path = "api_identity" } approx = "0.5.1" assert_matches = "1.5.0" assert_cmd = "2.0.12" -async-bb8-diesel = { git = "https://github.com/oxidecomputer/async-bb8-diesel", rev = "da04c087f835a51e0441addb19c5ef4986e1fcf2" } +async-bb8-diesel = { git = "https://github.com/oxidecomputer/async-bb8-diesel", rev = "1446f7e0c1f05f33a0581abd51fa873c7652ab61" } async-trait = "0.1.73" atomicwrites = "0.4.1" authz-macros = { path = "nexus/authz-macros" } diff --git a/nexus/db-queries/src/db/collection_attach.rs b/nexus/db-queries/src/db/collection_attach.rs index 40ec659bf9..ea4d9d5beb 100644 --- a/nexus/db-queries/src/db/collection_attach.rs +++ b/nexus/db-queries/src/db/collection_attach.rs @@ -17,7 +17,7 @@ use super::cte_utils::{ QueryFromClause, QuerySqlType, TableDefaultWhereClause, }; use super::pool::DbConnection; -use async_bb8_diesel::{AsyncRunQueryDsl, ConnectionError}; +use async_bb8_diesel::AsyncRunQueryDsl; use diesel::associations::HasTable; use diesel::expression::{AsExpression, Expression}; use diesel::helper_types::*; @@ -26,6 +26,7 @@ use diesel::prelude::*; use diesel::query_builder::*; use diesel::query_dsl::methods as query_methods; use diesel::query_source::Table; +use diesel::result::Error as DieselError; use diesel::sql_types::{BigInt, Nullable, SingleValue}; use nexus_db_model::DatastoreAttachTargetConfig; use std::fmt::Debug; @@ -299,7 +300,7 @@ where /// Result of [`AttachToCollectionStatement`] when executed asynchronously pub type AsyncAttachToCollectionResult = - Result<(C, ResourceType), AttachError>; + Result<(C, ResourceType), AttachError>; /// Errors returned by [`AttachToCollectionStatement`]. #[derive(Debug)] @@ -998,9 +999,8 @@ mod test { .set(resource::dsl::collection_id.eq(collection_id)), ); - type TxnError = TransactionError< - AttachError, - >; + type TxnError = + TransactionError>; let result = conn .transaction_async(|conn| async move { attach_query.attach_and_get_result_async(&conn).await.map_err( diff --git a/nexus/db-queries/src/db/collection_detach.rs b/nexus/db-queries/src/db/collection_detach.rs index df157040e6..03e09d41ca 100644 --- a/nexus/db-queries/src/db/collection_detach.rs +++ b/nexus/db-queries/src/db/collection_detach.rs @@ -16,7 +16,7 @@ use super::cte_utils::{ QueryFromClause, QuerySqlType, }; use super::pool::DbConnection; -use async_bb8_diesel::{AsyncRunQueryDsl, ConnectionError}; +use async_bb8_diesel::AsyncRunQueryDsl; use diesel::associations::HasTable; use diesel::expression::{AsExpression, Expression}; use diesel::helper_types::*; @@ -25,6 +25,7 @@ use diesel::prelude::*; use diesel::query_builder::*; use diesel::query_dsl::methods as query_methods; use diesel::query_source::Table; +use diesel::result::Error as DieselError; use diesel::sql_types::{Nullable, SingleValue}; use nexus_db_model::DatastoreAttachTargetConfig; use std::fmt::Debug; @@ -230,7 +231,7 @@ where /// Result of [`DetachFromCollectionStatement`] when executed asynchronously pub type AsyncDetachFromCollectionResult = - Result>; + Result>; /// Errors returned by [`DetachFromCollectionStatement`]. #[derive(Debug)] diff --git a/nexus/db-queries/src/db/collection_detach_many.rs b/nexus/db-queries/src/db/collection_detach_many.rs index 0b65c404c5..8df6d4aed4 100644 --- a/nexus/db-queries/src/db/collection_detach_many.rs +++ b/nexus/db-queries/src/db/collection_detach_many.rs @@ -25,6 +25,7 @@ use diesel::prelude::*; use diesel::query_builder::*; use diesel::query_dsl::methods as query_methods; use diesel::query_source::Table; +use diesel::result::Error as DieselError; use diesel::sql_types::{Nullable, SingleValue}; use nexus_db_model::DatastoreAttachTargetConfig; use std::fmt::Debug; @@ -241,7 +242,7 @@ where /// Result of [`DetachManyFromCollectionStatement`] when executed asynchronously pub type AsyncDetachManyFromCollectionResult = - Result>; + Result>; /// Errors returned by [`DetachManyFromCollectionStatement`]. #[derive(Debug)] @@ -918,9 +919,8 @@ mod test { .set(resource::dsl::collection_id.eq(Option::::None)), ); - type TxnError = TransactionError< - DetachManyError, - >; + type TxnError = + TransactionError>; let result = conn .transaction_async(|conn| async move { detach_query.detach_and_get_result_async(&conn).await.map_err( diff --git a/nexus/db-queries/src/db/collection_insert.rs b/nexus/db-queries/src/db/collection_insert.rs index 993f16e048..b295f0574d 100644 --- a/nexus/db-queries/src/db/collection_insert.rs +++ b/nexus/db-queries/src/db/collection_insert.rs @@ -10,7 +10,7 @@ //! 3) inserts the child resource row use super::pool::DbConnection; -use async_bb8_diesel::{AsyncRunQueryDsl, ConnectionError}; +use async_bb8_diesel::AsyncRunQueryDsl; use diesel::associations::HasTable; use diesel::helper_types::*; use diesel::pg::Pg; @@ -18,6 +18,7 @@ use diesel::prelude::*; use diesel::query_builder::*; use diesel::query_dsl::methods as query_methods; use diesel::query_source::Table; +use diesel::result::Error as DieselError; use diesel::sql_types::SingleValue; use nexus_db_model::DatastoreCollectionConfig; use std::fmt::Debug; @@ -170,7 +171,7 @@ pub enum AsyncInsertError { /// The collection that the query was inserting into does not exist CollectionNotFound, /// Other database error - DatabaseError(ConnectionError), + DatabaseError(DieselError), } impl InsertIntoCollectionStatement @@ -238,14 +239,11 @@ where /// Translate from diesel errors into AsyncInsertError, handling the /// intentional division-by-zero error in the CTE. - fn translate_async_error(err: ConnectionError) -> AsyncInsertError { - match err { - ConnectionError::Query(err) - if Self::error_is_division_by_zero(&err) => - { - AsyncInsertError::CollectionNotFound - } - other => AsyncInsertError::DatabaseError(other), + fn translate_async_error(err: DieselError) -> AsyncInsertError { + if Self::error_is_division_by_zero(&err) { + AsyncInsertError::CollectionNotFound + } else { + AsyncInsertError::DatabaseError(err) } } } diff --git a/nexus/db-queries/src/db/datastore/address_lot.rs b/nexus/db-queries/src/db/datastore/address_lot.rs index 9d264dbf6b..97dfb59eba 100644 --- a/nexus/db-queries/src/db/datastore/address_lot.rs +++ b/nexus/db-queries/src/db/datastore/address_lot.rs @@ -13,9 +13,7 @@ use crate::db::error::TransactionError; use crate::db::model::Name; use crate::db::model::{AddressLot, AddressLotBlock, AddressLotReservedBlock}; use crate::db::pagination::paginated; -use async_bb8_diesel::{ - AsyncConnection, AsyncRunQueryDsl, Connection, ConnectionError, -}; +use async_bb8_diesel::{AsyncConnection, AsyncRunQueryDsl, Connection}; use chrono::Utc; use diesel::result::Error as DieselError; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; @@ -84,15 +82,13 @@ impl DataStore { }) .await .map_err(|e| match e { - ConnectionError::Query(DieselError::DatabaseError(_, _)) => { - public_error_from_diesel( - e, - ErrorHandler::Conflict( - ResourceType::AddressLot, - ¶ms.identity.name.as_str(), - ), - ) - } + DieselError::DatabaseError(_, _) => public_error_from_diesel( + e, + ErrorHandler::Conflict( + ResourceType::AddressLot, + ¶ms.identity.name.as_str(), + ), + ), _ => public_error_from_diesel(e, ErrorHandler::Server), }) } @@ -151,7 +147,7 @@ impl DataStore { }) .await .map_err(|e| match e { - TxnError::Connection(e) => { + TxnError::Database(e) => { public_error_from_diesel(e, ErrorHandler::Server) } TxnError::CustomError(AddressLotDeleteError::LotInUse) => { @@ -252,11 +248,10 @@ pub(crate) async fn try_reserve_block( .limit(1) .first_async::(conn) .await - .map_err(|e| match e { - ConnectionError::Query(_) => ReserveBlockTxnError::CustomError( + .map_err(|_e| { + ReserveBlockTxnError::CustomError( ReserveBlockError::AddressNotInLot, - ), - e => e.into(), + ) })?; // Ensure the address is not already taken. diff --git a/nexus/db-queries/src/db/datastore/db_metadata.rs b/nexus/db-queries/src/db/datastore/db_metadata.rs index 181b3c1798..9e4e8b1a48 100644 --- a/nexus/db-queries/src/db/datastore/db_metadata.rs +++ b/nexus/db-queries/src/db/datastore/db_metadata.rs @@ -351,7 +351,7 @@ impl DataStore { match result { Ok(()) => Ok(()), Err(TransactionError::CustomError(())) => panic!("No custom error"), - Err(TransactionError::Connection(e)) => { + Err(TransactionError::Database(e)) => { Err(public_error_from_diesel(e, ErrorHandler::Server)) } } diff --git a/nexus/db-queries/src/db/datastore/device_auth.rs b/nexus/db-queries/src/db/datastore/device_auth.rs index e084834833..e1facb43f6 100644 --- a/nexus/db-queries/src/db/datastore/device_auth.rs +++ b/nexus/db-queries/src/db/datastore/device_auth.rs @@ -103,7 +103,7 @@ impl DataStore { TxnError::CustomError(TokenGrantError::TooManyRequests) => { Error::internal_error("unexpectedly found multiple device auth requests for the same user code") } - TxnError::Connection(e) => { + TxnError::Database(e) => { public_error_from_diesel(e, ErrorHandler::Server) } }) diff --git a/nexus/db-queries/src/db/datastore/dns.rs b/nexus/db-queries/src/db/datastore/dns.rs index d9704594b1..f7ad97593e 100644 --- a/nexus/db-queries/src/db/datastore/dns.rs +++ b/nexus/db-queries/src/db/datastore/dns.rs @@ -395,7 +395,7 @@ impl DataStore { match result { Ok(()) => Ok(()), Err(TransactionError::CustomError(e)) => Err(e), - Err(TransactionError::Connection(e)) => { + Err(TransactionError::Database(e)) => { Err(public_error_from_diesel(e, ErrorHandler::Server)) } } diff --git a/nexus/db-queries/src/db/datastore/external_ip.rs b/nexus/db-queries/src/db/datastore/external_ip.rs index 268b284a0a..e663130a84 100644 --- a/nexus/db-queries/src/db/datastore/external_ip.rs +++ b/nexus/db-queries/src/db/datastore/external_ip.rs @@ -143,10 +143,9 @@ impl DataStore { ) -> CreateResult { let explicit_ip = data.explicit_ip().is_some(); NextExternalIp::new(data).get_result_async(conn).await.map_err(|e| { - use async_bb8_diesel::ConnectionError::Query; use diesel::result::Error::NotFound; match e { - Query(NotFound) => { + NotFound => { if explicit_ip { Error::invalid_request( "Requested external IP address not available", diff --git a/nexus/db-queries/src/db/datastore/ip_pool.rs b/nexus/db-queries/src/db/datastore/ip_pool.rs index bd3148f2f7..fb300ef833 100644 --- a/nexus/db-queries/src/db/datastore/ip_pool.rs +++ b/nexus/db-queries/src/db/datastore/ip_pool.rs @@ -10,7 +10,6 @@ use crate::context::OpContext; use crate::db; use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; -use crate::db::error::diesel_result_optional; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::fixed_data::silo::INTERNAL_SILO_ID; @@ -183,18 +182,17 @@ impl DataStore { opctx.authorize(authz::Action::Delete, authz_pool).await?; // Verify there are no IP ranges still in this pool - let range = diesel_result_optional( - ip_pool_range::dsl::ip_pool_range - .filter(ip_pool_range::dsl::ip_pool_id.eq(authz_pool.id())) - .filter(ip_pool_range::dsl::time_deleted.is_null()) - .select(ip_pool_range::dsl::id) - .limit(1) - .first_async::( - &*self.pool_connection_authorized(opctx).await?, - ) - .await, - ) - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + let range = ip_pool_range::dsl::ip_pool_range + .filter(ip_pool_range::dsl::ip_pool_id.eq(authz_pool.id())) + .filter(ip_pool_range::dsl::time_deleted.is_null()) + .select(ip_pool_range::dsl::id) + .limit(1) + .first_async::( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; if range.is_some() { return Err(Error::InvalidRequest { message: @@ -313,7 +311,6 @@ impl DataStore { .insert_and_get_result_async(conn) .await .map_err(|e| { - use async_bb8_diesel::ConnectionError::Query; use diesel::result::Error::NotFound; match e { @@ -323,7 +320,7 @@ impl DataStore { lookup_type: LookupType::ById(pool_id), } } - AsyncInsertError::DatabaseError(Query(NotFound)) => { + AsyncInsertError::DatabaseError(NotFound) => { // We've filtered out the IP addresses the client provided, // i.e., there's some overlap with existing addresses. Error::invalid_request( @@ -363,26 +360,25 @@ impl DataStore { // concurrent inserts of new external IPs from the target range by // comparing the rcgen. let conn = self.pool_connection_authorized(opctx).await?; - let range = diesel_result_optional( - dsl::ip_pool_range - .filter(dsl::ip_pool_id.eq(pool_id)) - .filter(dsl::first_address.eq(first_net)) - .filter(dsl::last_address.eq(last_net)) - .filter(dsl::time_deleted.is_null()) - .select(IpPoolRange::as_select()) - .get_result_async::(&*conn) - .await, - ) - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? - .ok_or_else(|| { - Error::invalid_request( - format!( - "The provided range {}-{} does not exist", - first_address, last_address, + let range = dsl::ip_pool_range + .filter(dsl::ip_pool_id.eq(pool_id)) + .filter(dsl::first_address.eq(first_net)) + .filter(dsl::last_address.eq(last_net)) + .filter(dsl::time_deleted.is_null()) + .select(IpPoolRange::as_select()) + .get_result_async::(&*conn) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? + .ok_or_else(|| { + Error::invalid_request( + format!( + "The provided range {}-{} does not exist", + first_address, last_address, + ) + .as_str(), ) - .as_str(), - ) - })?; + })?; // Find external IPs allocated out of this pool and range. let range_id = range.id; diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index b1f3203c60..7d5e32cad9 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -1670,7 +1670,6 @@ mod test { async fn test_external_ip_check_constraints() { use crate::db::model::IpKind; use crate::db::schema::external_ip::dsl; - use async_bb8_diesel::ConnectionError::Query; use diesel::result::DatabaseErrorKind::CheckViolation; use diesel::result::Error::DatabaseError; @@ -1756,10 +1755,10 @@ mod test { assert!( matches!( err, - Query(DatabaseError( + DatabaseError( CheckViolation, _ - )) + ) ), "Expected a CHECK violation when inserting a \ Floating IP record with NULL name and/or description", @@ -1805,10 +1804,10 @@ mod test { assert!( matches!( err, - Query(DatabaseError( + DatabaseError( CheckViolation, _ - )) + ) ), "Expected a CHECK violation when inserting an \ Ephemeral Service IP", @@ -1836,10 +1835,10 @@ mod test { assert!( matches!( err, - Query(DatabaseError( + DatabaseError( CheckViolation, _ - )) + ) ), "Expected a CHECK violation when inserting a \ {:?} IP record with non-NULL name, description, \ diff --git a/nexus/db-queries/src/db/datastore/network_interface.rs b/nexus/db-queries/src/db/datastore/network_interface.rs index 3d7b8afa71..4a46b23529 100644 --- a/nexus/db-queries/src/db/datastore/network_interface.rs +++ b/nexus/db-queries/src/db/datastore/network_interface.rs @@ -29,6 +29,7 @@ use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use diesel::result::Error as DieselError; use omicron_common::api::external; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::DeleteResult; @@ -463,7 +464,7 @@ impl DataStore { #[derive(Debug)] enum NetworkInterfaceUpdateError { InstanceNotStopped, - FailedToUnsetPrimary(async_bb8_diesel::ConnectionError), + FailedToUnsetPrimary(DieselError), } type TxnError = TransactionError; diff --git a/nexus/db-queries/src/db/datastore/project.rs b/nexus/db-queries/src/db/datastore/project.rs index 0285679cd5..c447b5bf98 100644 --- a/nexus/db-queries/src/db/datastore/project.rs +++ b/nexus/db-queries/src/db/datastore/project.rs @@ -11,7 +11,6 @@ use crate::context::OpContext; use crate::db; use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; -use crate::db::error::diesel_result_optional; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::error::TransactionError; @@ -60,16 +59,15 @@ macro_rules! generate_fn_to_ensure_none_in_project { ) -> DeleteResult { use db::schema::$i; - let maybe_label = diesel_result_optional( - $i::dsl::$i - .filter($i::dsl::project_id.eq(authz_project.id())) - .filter($i::dsl::time_deleted.is_null()) - .select($i::dsl::$label) - .limit(1) - .first_async::<$label_ty>(&*self.pool_connection_authorized(opctx).await?) - .await, - ) - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + let maybe_label = $i::dsl::$i + .filter($i::dsl::project_id.eq(authz_project.id())) + .filter($i::dsl::time_deleted.is_null()) + .select($i::dsl::$label) + .limit(1) + .first_async::<$label_ty>(&*self.pool_connection_authorized(opctx).await?) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; if let Some(label) = maybe_label { let object = stringify!($i).replace('_', " "); @@ -193,7 +191,7 @@ impl DataStore { .await .map_err(|e| match e { TransactionError::CustomError(e) => e, - TransactionError::Connection(e) => { + TransactionError::Database(e) => { public_error_from_diesel(e, ErrorHandler::Server) } })?; @@ -270,7 +268,7 @@ impl DataStore { .await .map_err(|e| match e { TxnError::CustomError(e) => e, - TxnError::Connection(e) => { + TxnError::Database(e) => { public_error_from_diesel(e, ErrorHandler::Server) } })?; diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index 1be3e1ee4c..f5f7524aab 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -30,6 +30,7 @@ use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use diesel::result::Error as DieselError; use diesel::upsert::excluded; use nexus_db_model::DnsGroup; use nexus_db_model::DnsZone; @@ -79,7 +80,7 @@ enum RackInitError { AddingNic(Error), ServiceInsert(Error), DatasetInsert { err: AsyncInsertError, zpool_id: Uuid }, - RackUpdate { err: async_bb8_diesel::ConnectionError, rack_id: Uuid }, + RackUpdate { err: DieselError, rack_id: Uuid }, DnsSerialization(Error), Silo(Error), RoleAssignment(Error), @@ -137,7 +138,7 @@ impl From for Error { err )) } - TxnError::Connection(e) => { + TxnError::Database(e) => { Error::internal_error(&format!("Transaction error: {}", e)) } } @@ -631,7 +632,7 @@ impl DataStore { .await .map_err(|error: TxnError| match error { TransactionError::CustomError(err) => err, - TransactionError::Connection(e) => { + TransactionError::Database(e) => { public_error_from_diesel(e, ErrorHandler::Server) } }) diff --git a/nexus/db-queries/src/db/datastore/region_snapshot.rs b/nexus/db-queries/src/db/datastore/region_snapshot.rs index 148cfe4812..3d328a6206 100644 --- a/nexus/db-queries/src/db/datastore/region_snapshot.rs +++ b/nexus/db-queries/src/db/datastore/region_snapshot.rs @@ -10,8 +10,8 @@ use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::model::RegionSnapshot; use async_bb8_diesel::AsyncRunQueryDsl; -use async_bb8_diesel::OptionalExtension; use diesel::prelude::*; +use diesel::OptionalExtension; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::LookupResult; diff --git a/nexus/db-queries/src/db/datastore/role.rs b/nexus/db-queries/src/db/datastore/role.rs index f1198c239b..b2ad441475 100644 --- a/nexus/db-queries/src/db/datastore/role.rs +++ b/nexus/db-queries/src/db/datastore/role.rs @@ -280,7 +280,7 @@ impl DataStore { .await .map_err(|e| match e { TransactionError::CustomError(e) => e, - TransactionError::Connection(e) => { + TransactionError::Database(e) => { public_error_from_diesel(e, ErrorHandler::Server) } }) diff --git a/nexus/db-queries/src/db/datastore/silo.rs b/nexus/db-queries/src/db/datastore/silo.rs index 5e909b84c4..ec3658c067 100644 --- a/nexus/db-queries/src/db/datastore/silo.rs +++ b/nexus/db-queries/src/db/datastore/silo.rs @@ -10,7 +10,6 @@ use crate::authz; use crate::context::OpContext; use crate::db; use crate::db::datastore::RunnableQuery; -use crate::db::error::diesel_result_optional; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::error::TransactionError; @@ -261,7 +260,7 @@ impl DataStore { .await .map_err(|e| match e { TransactionError::CustomError(e) => e, - TransactionError::Connection(e) => { + TransactionError::Database(e) => { public_error_from_diesel(e, ErrorHandler::Server) } }) @@ -338,16 +337,15 @@ impl DataStore { // Make sure there are no projects present within this silo. let id = authz_silo.id(); let rcgen = db_silo.rcgen; - let project_found = diesel_result_optional( - project::dsl::project - .filter(project::dsl::silo_id.eq(id)) - .filter(project::dsl::time_deleted.is_null()) - .select(project::dsl::id) - .limit(1) - .first_async::(&*conn) - .await, - ) - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + let project_found = project::dsl::project + .filter(project::dsl::silo_id.eq(id)) + .filter(project::dsl::time_deleted.is_null()) + .select(project::dsl::id) + .limit(1) + .first_async::(&*conn) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; if project_found.is_some() { return Err(Error::InvalidRequest { @@ -395,7 +393,7 @@ impl DataStore { .await .map_err(|e| match e { TxnError::CustomError(e) => e, - TxnError::Connection(e) => { + TxnError::Database(e) => { public_error_from_diesel(e, ErrorHandler::Server) } })?; diff --git a/nexus/db-queries/src/db/datastore/silo_group.rs b/nexus/db-queries/src/db/datastore/silo_group.rs index d13986bb2d..46f4aae7c9 100644 --- a/nexus/db-queries/src/db/datastore/silo_group.rs +++ b/nexus/db-queries/src/db/datastore/silo_group.rs @@ -15,8 +15,8 @@ use crate::db::error::TransactionError; use crate::db::model::SiloGroup; use crate::db::model::SiloGroupMembership; use crate::db::pagination::paginated; +use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; -use async_bb8_diesel::{AsyncConnection, OptionalExtension}; use chrono::Utc; use diesel::prelude::*; use omicron_common::api::external::CreateResult; @@ -237,8 +237,7 @@ impl DataStore { "group {0} still has memberships", id )), - - TxnError::Connection(error) => { + TxnError::Database(error) => { public_error_from_diesel(error, ErrorHandler::Server) } }) diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index ec6cca0071..a52d1b7772 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -183,7 +183,7 @@ impl DataStore { "No sleds can fit the requested instance", ) } - TxnError::Connection(e) => { + TxnError::Database(e) => { public_error_from_diesel(e, ErrorHandler::Server) } }) diff --git a/nexus/db-queries/src/db/datastore/snapshot.rs b/nexus/db-queries/src/db/datastore/snapshot.rs index 29fbb38e88..59fb00c84d 100644 --- a/nexus/db-queries/src/db/datastore/snapshot.rs +++ b/nexus/db-queries/src/db/datastore/snapshot.rs @@ -22,10 +22,9 @@ use crate::db::update_and_check::UpdateAndCheck; use crate::db::TransactionError; use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; -use async_bb8_diesel::ConnectionError; use chrono::Utc; use diesel::prelude::*; -use diesel::result::Error as DieselError; +use diesel::OptionalExtension; use nexus_types::identity::Resource; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::CreateResult; @@ -101,7 +100,7 @@ impl DataStore { // does not match, but a project and name that does, return // ObjectAlreadyExists here. - let existing_snapshot_id: Option = match dsl::snapshot + let existing_snapshot_id: Option = dsl::snapshot .filter(dsl::time_deleted.is_null()) .filter(dsl::name.eq(snapshot.name().to_string())) .filter(dsl::project_id.eq(snapshot.project_id)) @@ -109,13 +108,7 @@ impl DataStore { .limit(1) .first_async(&conn) .await - { - Ok(v) => Ok(Some(v)), - Err(ConnectionError::Query(DieselError::NotFound)) => { - Ok(None) - } - Err(e) => Err(e), - }?; + .optional()?; if let Some(existing_snapshot_id) = existing_snapshot_id { if existing_snapshot_id != snapshot.id() { @@ -161,8 +154,7 @@ impl DataStore { } }, }, - - TxnError::Connection(e) => { + TxnError::Database(e) => { public_error_from_diesel(e, ErrorHandler::Server) } })?; diff --git a/nexus/db-queries/src/db/datastore/switch_interface.rs b/nexus/db-queries/src/db/datastore/switch_interface.rs index 498064ce37..88cff50471 100644 --- a/nexus/db-queries/src/db/datastore/switch_interface.rs +++ b/nexus/db-queries/src/db/datastore/switch_interface.rs @@ -14,7 +14,7 @@ use crate::db::error::ErrorHandler; use crate::db::error::TransactionError; use crate::db::model::LoopbackAddress; use crate::db::pagination::paginated; -use async_bb8_diesel::{AsyncConnection, AsyncRunQueryDsl, ConnectionError}; +use async_bb8_diesel::{AsyncConnection, AsyncRunQueryDsl}; use diesel::result::Error as DieselError; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; use ipnetwork::IpNetwork; @@ -65,8 +65,8 @@ impl DataStore { LoopbackAddressCreateError::ReserveBlock(err), ) } - ReserveBlockTxnError::Connection(err) => { - TxnError::Connection(err) + ReserveBlockTxnError::Database(err) => { + TxnError::Database(err) } })?; @@ -103,16 +103,14 @@ impl DataStore { ReserveBlockError::AddressNotInLot, ), ) => Error::invalid_request("address not in lot"), - TxnError::Connection(e) => match e { - ConnectionError::Query(DieselError::DatabaseError(_, _)) => { - public_error_from_diesel( - e, - ErrorHandler::Conflict( - ResourceType::LoopbackAddress, - &format!("lo {}", inet), - ), - ) - } + TxnError::Database(e) => match e { + DieselError::DatabaseError(_, _) => public_error_from_diesel( + e, + ErrorHandler::Conflict( + ResourceType::LoopbackAddress, + &format!("lo {}", inet), + ), + ), _ => public_error_from_diesel(e, ErrorHandler::Server), }, }) diff --git a/nexus/db-queries/src/db/datastore/switch_port.rs b/nexus/db-queries/src/db/datastore/switch_port.rs index 940fedb473..45be594be6 100644 --- a/nexus/db-queries/src/db/datastore/switch_port.rs +++ b/nexus/db-queries/src/db/datastore/switch_port.rs @@ -20,7 +20,7 @@ use crate::db::model::{ SwitchVlanInterfaceConfig, }; use crate::db::pagination::paginated; -use async_bb8_diesel::{AsyncConnection, AsyncRunQueryDsl, ConnectionError}; +use async_bb8_diesel::{AsyncConnection, AsyncRunQueryDsl}; use diesel::result::Error as DieselError; use diesel::{ ExpressionMethods, JoinOnDsl, NullableExpressionMethods, QueryDsl, @@ -279,11 +279,10 @@ impl DataStore { .limit(1) .first_async::(&conn) .await - .map_err(|e| match e { - ConnectionError::Query(_) => TxnError::CustomError( + .map_err(|_| { + TxnError::CustomError( SwitchPortSettingsCreateError::BgpAnnounceSetNotFound, - ), - e => e.into(), + ) })? } }; @@ -300,12 +299,11 @@ impl DataStore { .limit(1) .first_async::(&conn) .await - .map_err(|e| match e { - ConnectionError::Query(_) => TxnError::CustomError( + .map_err(|_| + TxnError::CustomError( SwitchPortSettingsCreateError::BgpConfigNotFound, - ), - e => e.into(), - })? + ) + )? } }; @@ -341,14 +339,11 @@ impl DataStore { .limit(1) .first_async::(&conn) .await - .map_err(|e| match e { - ConnectionError::Query(_) => { - TxnError::CustomError( - SwitchPortSettingsCreateError::AddressLotNotFound, - ) - } - e => e.into() - })? + .map_err(|_| + TxnError::CustomError( + SwitchPortSettingsCreateError::AddressLotNotFound, + ) + )? } }; // TODO: Reduce DB round trips needed for reserving ip blocks @@ -369,7 +364,7 @@ impl DataStore { SwitchPortSettingsCreateError::ReserveBlock(err) ) } - ReserveBlockTxnError::Connection(err) => TxnError::Connection(err), + ReserveBlockTxnError::Database(err) => TxnError::Database(err), })?; address_config.push(SwitchPortAddressConfig::new( @@ -416,10 +411,8 @@ impl DataStore { ReserveBlockError::AddressNotInLot ) ) => Error::invalid_request("address not in lot"), - TxnError::Connection(e) => match e { - ConnectionError::Query( - DieselError::DatabaseError(_, _), - ) => public_error_from_diesel( + TxnError::Database(e) => match e { + DieselError::DatabaseError(_, _) => public_error_from_diesel( e, ErrorHandler::Conflict( ResourceType::SwitchPortSettings, @@ -467,12 +460,11 @@ impl DataStore { .limit(1) .first_async::(&conn) .await - .map_err(|e| match e { - ConnectionError::Query(_) => TxnError::CustomError( + .map_err(|_| + TxnError::CustomError( SwitchPortSettingsDeleteError::SwitchPortSettingsNotFound, - ), - e => e.into() - })? + ) + )? } }; @@ -599,10 +591,8 @@ impl DataStore { SwitchPortSettingsDeleteError::SwitchPortSettingsNotFound) => { Error::invalid_request("port settings not found") } - TxnError::Connection(e) => match e { - ConnectionError::Query( - DieselError::DatabaseError(_, _), - ) => { + TxnError::Database(e) => match e { + DieselError::DatabaseError(_, _) => { let name = match ¶ms.port_settings { Some(name_or_id) => name_or_id.to_string(), None => String::new(), @@ -676,11 +666,10 @@ impl DataStore { .limit(1) .first_async::(&conn) .await - .map_err(|e| match e { - ConnectionError::Query(_) => TxnError::CustomError( + .map_err(|_| { + TxnError::CustomError( SwitchPortSettingsGetError::NotFound(name.clone()) - ), - e => e.into() + ) })? } }; @@ -804,10 +793,8 @@ impl DataStore { SwitchPortSettingsGetError::NotFound(name)) => { Error::not_found_by_name(ResourceType::SwitchPortSettings, &name) } - TxnError::Connection(e) => match e { - ConnectionError::Query( - DieselError::DatabaseError(_, _), - ) => { + TxnError::Database(e) => match e { + DieselError::DatabaseError(_, _) => { let name = name_or_id.to_string(); public_error_from_diesel( e, @@ -855,11 +842,8 @@ impl DataStore { .limit(1) .first_async::(&conn) .await - .map_err(|e| match e { - ConnectionError::Query(_) => TxnError::CustomError( - SwitchPortCreateError::RackNotFound, - ), - e => e.into(), + .map_err(|_| { + TxnError::CustomError(SwitchPortCreateError::RackNotFound) })?; // insert switch port @@ -878,19 +862,14 @@ impl DataStore { TxnError::CustomError(SwitchPortCreateError::RackNotFound) => { Error::invalid_request("rack not found") } - TxnError::Connection(e) => match e { - ConnectionError::Query(DieselError::DatabaseError(_, _)) => { - public_error_from_diesel( - e, - ErrorHandler::Conflict( - ResourceType::SwitchPort, - &format!( - "{}/{}/{}", - rack_id, &switch_location, &port, - ), - ), - ) - } + TxnError::Database(e) => match e { + DieselError::DatabaseError(_, _) => public_error_from_diesel( + e, + ErrorHandler::Conflict( + ResourceType::SwitchPort, + &format!("{}/{}/{}", rack_id, &switch_location, &port,), + ), + ), _ => public_error_from_diesel(e, ErrorHandler::Server), }, }) @@ -929,11 +908,8 @@ impl DataStore { .limit(1) .first_async::(&conn) .await - .map_err(|e| match e { - ConnectionError::Query(_) => { - TxnError::CustomError(SwitchPortDeleteError::NotFound) - } - e => e.into(), + .map_err(|_| { + TxnError::CustomError(SwitchPortDeleteError::NotFound) })?; if port.port_settings_id.is_some() { @@ -958,7 +934,7 @@ impl DataStore { TxnError::CustomError(SwitchPortDeleteError::ActiveSettings) => { Error::invalid_request("must clear port settings first") } - TxnError::Connection(e) => { + TxnError::Database(e) => { public_error_from_diesel(e, ErrorHandler::Server) } }) diff --git a/nexus/db-queries/src/db/datastore/update.rs b/nexus/db-queries/src/db/datastore/update.rs index 5a3e3b27e4..8b1eecb781 100644 --- a/nexus/db-queries/src/db/datastore/update.rs +++ b/nexus/db-queries/src/db/datastore/update.rs @@ -164,7 +164,7 @@ impl DataStore { .await .map_err(|e| match e { TransactionError::CustomError(e) => e, - TransactionError::Connection(e) => public_error_from_diesel( + TransactionError::Database(e) => public_error_from_diesel( e, ErrorHandler::Conflict( ResourceType::ComponentUpdate, diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs index b97b8451cf..38e3875036 100644 --- a/nexus/db-queries/src/db/datastore/volume.rs +++ b/nexus/db-queries/src/db/datastore/volume.rs @@ -16,9 +16,9 @@ use crate::db::model::RegionSnapshot; use crate::db::model::Volume; use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; -use async_bb8_diesel::OptionalExtension; use chrono::Utc; use diesel::prelude::*; +use diesel::OptionalExtension; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; @@ -336,7 +336,7 @@ impl DataStore { .await .map_err(|e| match e { TxnError::CustomError(VolumeGetError::DieselError(e)) => { - public_error_from_diesel(e.into(), ErrorHandler::Server) + public_error_from_diesel(e, ErrorHandler::Server) } _ => { @@ -757,7 +757,7 @@ impl DataStore { .map_err(|e| match e { TxnError::CustomError( DecreaseCrucibleResourcesError::DieselError(e), - ) => public_error_from_diesel(e.into(), ErrorHandler::Server), + ) => public_error_from_diesel(e, ErrorHandler::Server), _ => { Error::internal_error(&format!("Transaction error: {}", e)) @@ -955,7 +955,7 @@ impl DataStore { TxnError::CustomError( RemoveReadOnlyParentError::DieselError(e), ) => public_error_from_diesel( - e.into(), + e, ErrorHandler::Server, ), diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index af7ea93456..46c3d2504e 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -10,7 +10,6 @@ use crate::context::OpContext; use crate::db; use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; -use crate::db::error::diesel_result_optional; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::error::TransactionError; @@ -389,19 +388,18 @@ impl DataStore { // but we can't have NICs be a child of both tables at this point, and // we need to prevent VPC Subnets from being deleted while they have // NICs in them as well. - if diesel_result_optional( - vpc_subnet::dsl::vpc_subnet - .filter(vpc_subnet::dsl::vpc_id.eq(authz_vpc.id())) - .filter(vpc_subnet::dsl::time_deleted.is_null()) - .select(vpc_subnet::dsl::id) - .limit(1) - .first_async::( - &*self.pool_connection_authorized(opctx).await?, - ) - .await, - ) - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? - .is_some() + if vpc_subnet::dsl::vpc_subnet + .filter(vpc_subnet::dsl::vpc_id.eq(authz_vpc.id())) + .filter(vpc_subnet::dsl::time_deleted.is_null()) + .select(vpc_subnet::dsl::id) + .limit(1) + .first_async::( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? + .is_some() { return Err(Error::InvalidRequest { message: String::from( @@ -556,7 +554,7 @@ impl DataStore { TxnError::CustomError( FirewallUpdateError::CollectionNotFound, ) => Error::not_found_by_id(ResourceType::Vpc, &authz_vpc.id()), - TxnError::Connection(e) => public_error_from_diesel( + TxnError::Database(e) => public_error_from_diesel( e, ErrorHandler::NotFoundByResource(authz_vpc), ), @@ -700,17 +698,16 @@ impl DataStore { let conn = self.pool_connection_authorized(opctx).await?; // Verify there are no child network interfaces in this VPC Subnet - if diesel_result_optional( - network_interface::dsl::network_interface - .filter(network_interface::dsl::subnet_id.eq(authz_subnet.id())) - .filter(network_interface::dsl::time_deleted.is_null()) - .select(network_interface::dsl::id) - .limit(1) - .first_async::(&*conn) - .await, - ) - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? - .is_some() + if network_interface::dsl::network_interface + .filter(network_interface::dsl::subnet_id.eq(authz_subnet.id())) + .filter(network_interface::dsl::time_deleted.is_null()) + .select(network_interface::dsl::id) + .limit(1) + .first_async::(&*conn) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? + .is_some() { return Err(Error::InvalidRequest { message: String::from( diff --git a/nexus/db-queries/src/db/error.rs b/nexus/db-queries/src/db/error.rs index f7402bb8c7..cbe2b0a71f 100644 --- a/nexus/db-queries/src/db/error.rs +++ b/nexus/db-queries/src/db/error.rs @@ -4,7 +4,6 @@ //! Error handling and conversions. -use async_bb8_diesel::ConnectionError; use diesel::result::DatabaseErrorInformation; use diesel::result::DatabaseErrorKind as DieselErrorKind; use diesel::result::Error as DieselError; @@ -25,16 +24,8 @@ pub enum TransactionError { /// /// This error covers failure due to accessing the DB pool or errors /// propagated from the DB itself. - #[error("Connection error: {0}")] - Connection(#[from] async_bb8_diesel::ConnectionError), -} - -// Maps a "diesel error" into a "pool error", which -// is already contained within the error type. -impl From for TransactionError { - fn from(err: DieselError) -> Self { - Self::Connection(ConnectionError::Query(err)) - } + #[error("Database error: {0}")] + Database(#[from] DieselError), } impl From for TransactionError { @@ -50,8 +41,9 @@ impl TransactionError { /// [1]: https://www.cockroachlabs.com/docs/v23.1/transaction-retry-error-reference#client-side-retry-handling pub fn retry_transaction(&self) -> bool { match &self { - TransactionError::Connection(ConnectionError::Query( - DieselError::DatabaseError(kind, boxed_error_information), + Self::Database(DieselError::DatabaseError( + kind, + boxed_error_information, )) => match kind { DieselErrorKind::SerializationFailure => { return boxed_error_information @@ -93,19 +85,6 @@ fn format_database_error( rv } -/// Like [`diesel::result::OptionalExtension::optional`]. This turns Ok(v) -/// into Ok(Some(v)), Err("NotFound") into Ok(None), and leave all other values -/// unchanged. -pub fn diesel_result_optional( - result: Result, -) -> Result, ConnectionError> { - match result { - Ok(v) => Ok(Some(v)), - Err(ConnectionError::Query(DieselError::NotFound)) => Ok(None), - Err(e) => Err(e), - } -} - /// Allows the caller to handle user-facing errors, and provide additional /// context which may be used to populate more informative errors. /// @@ -142,41 +121,27 @@ pub enum ErrorHandler<'a> { /// [`ErrorHandler`] may be used to add additional handlers for the error /// being returned. pub fn public_error_from_diesel( - error: ConnectionError, + error: DieselError, handler: ErrorHandler<'_>, ) -> PublicError { - match error { - ConnectionError::Connection(error) => PublicError::unavail(&format!( - "Failed to access connection pool: {}", + match handler { + ErrorHandler::NotFoundByResource(resource) => { + public_error_from_diesel_lookup( + error, + resource.resource_type(), + resource.lookup_type(), + ) + } + ErrorHandler::NotFoundByLookup(resource_type, lookup_type) => { + public_error_from_diesel_lookup(error, resource_type, &lookup_type) + } + ErrorHandler::Conflict(resource_type, object_name) => { + public_error_from_diesel_create(error, resource_type, object_name) + } + ErrorHandler::Server => PublicError::internal_error(&format!( + "unexpected database error: {:#}", error )), - ConnectionError::Query(error) => match handler { - ErrorHandler::NotFoundByResource(resource) => { - public_error_from_diesel_lookup( - error, - resource.resource_type(), - resource.lookup_type(), - ) - } - ErrorHandler::NotFoundByLookup(resource_type, lookup_type) => { - public_error_from_diesel_lookup( - error, - resource_type, - &lookup_type, - ) - } - ErrorHandler::Conflict(resource_type, object_name) => { - public_error_from_diesel_create( - error, - resource_type, - object_name, - ) - } - ErrorHandler::Server => PublicError::internal_error(&format!( - "unexpected database error: {:#}", - error - )), - }, } } diff --git a/nexus/db-queries/src/db/explain.rs b/nexus/db-queries/src/db/explain.rs index fc8098b876..3de5b4f280 100644 --- a/nexus/db-queries/src/db/explain.rs +++ b/nexus/db-queries/src/db/explain.rs @@ -5,11 +5,12 @@ //! Utility allowing Diesel to EXPLAIN queries. use super::pool::DbConnection; -use async_bb8_diesel::{AsyncRunQueryDsl, ConnectionError}; +use async_bb8_diesel::AsyncRunQueryDsl; use async_trait::async_trait; use diesel::pg::Pg; use diesel::prelude::*; use diesel::query_builder::*; +use diesel::result::Error as DieselError; /// A wrapper around a runnable Diesel query, which EXPLAINs what it is doing. /// @@ -49,7 +50,7 @@ pub trait ExplainableAsync { async fn explain_async( self, conn: &async_bb8_diesel::Connection, - ) -> Result; + ) -> Result; } #[async_trait] @@ -65,7 +66,7 @@ where async fn explain_async( self, conn: &async_bb8_diesel::Connection, - ) -> Result { + ) -> Result { Ok(ExplainStatement { query: self } .get_results_async::(conn) .await? diff --git a/nexus/db-queries/src/db/pool.rs b/nexus/db-queries/src/db/pool.rs index 6311121bd1..73c95f4e91 100644 --- a/nexus/db-queries/src/db/pool.rs +++ b/nexus/db-queries/src/db/pool.rs @@ -99,7 +99,9 @@ impl CustomizeConnection, ConnectionError> &self, conn: &mut Connection, ) -> Result<(), ConnectionError> { - conn.batch_execute_async(DISALLOW_FULL_TABLE_SCAN_SQL).await + conn.batch_execute_async(DISALLOW_FULL_TABLE_SCAN_SQL) + .await + .map_err(|e| e.into()) } } diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index 18360e1045..cf182e080d 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -20,6 +20,7 @@ use diesel::query_builder::AstPass; use diesel::query_builder::Query; use diesel::query_builder::QueryFragment; use diesel::query_builder::QueryId; +use diesel::result::Error as DieselError; use diesel::sql_types; use diesel::Column; use diesel::Expression; @@ -42,7 +43,7 @@ const REALLOCATION_WITH_DIFFERENT_IP_SENTINEL: &'static str = "Reallocation of IP with different value"; /// Translates a generic pool error to an external error. -pub fn from_diesel(e: async_bb8_diesel::ConnectionError) -> external::Error { +pub fn from_diesel(e: DieselError) -> external::Error { use crate::db::error; let sentinels = [REALLOCATION_WITH_DIFFERENT_IP_SENTINEL]; diff --git a/nexus/db-queries/src/db/queries/network_interface.rs b/nexus/db-queries/src/db/queries/network_interface.rs index 877daad9e3..bac2610b41 100644 --- a/nexus/db-queries/src/db/queries/network_interface.rs +++ b/nexus/db-queries/src/db/queries/network_interface.rs @@ -17,6 +17,7 @@ use diesel::prelude::Column; use diesel::query_builder::AstPass; use diesel::query_builder::QueryFragment; use diesel::query_builder::QueryId; +use diesel::result::Error as DieselError; use diesel::sql_types; use diesel::Insertable; use diesel::QueryResult; @@ -126,16 +127,14 @@ impl InsertError { /// address exhaustion or an attempt to attach an interface to an instance /// that is already associated with another VPC. pub fn from_diesel( - e: async_bb8_diesel::ConnectionError, + e: DieselError, interface: &IncompleteNetworkInterface, ) -> Self { use crate::db::error; - use async_bb8_diesel::ConnectionError; - use diesel::result::Error; match e { // Catch the specific errors designed to communicate the failures we // want to distinguish - ConnectionError::Query(Error::DatabaseError(_, _)) => { + DieselError::DatabaseError(_, _) => { decode_database_error(e, interface) } // Any other error at all is a bug @@ -223,13 +222,11 @@ impl InsertError { /// As such, it naturally is extremely tightly coupled to the database itself, /// including the software version and our schema. fn decode_database_error( - err: async_bb8_diesel::ConnectionError, + err: DieselError, interface: &IncompleteNetworkInterface, ) -> InsertError { use crate::db::error; - use async_bb8_diesel::ConnectionError; use diesel::result::DatabaseErrorKind; - use diesel::result::Error; // Error message generated when we attempt to insert an interface in a // different VPC from the interface(s) already associated with the instance @@ -292,10 +289,10 @@ fn decode_database_error( // If the address allocation subquery fails, we'll attempt to insert // NULL for the `ip` column. This checks that the non-NULL constraint on // that colum has been violated. - ConnectionError::Query(Error::DatabaseError( + DieselError::DatabaseError( DatabaseErrorKind::NotNullViolation, - ref info, - )) if info.message() == IP_EXHAUSTION_ERROR_MESSAGE => { + info, + ) if info.message() == IP_EXHAUSTION_ERROR_MESSAGE => { InsertError::NoAvailableIpAddresses } @@ -303,29 +300,27 @@ fn decode_database_error( // `push_ensure_unique_vpc_expression` subquery, which generates a // UUID parsing error if the resource (e.g. instance) we want to attach // to is already associated with another VPC. - ConnectionError::Query(Error::DatabaseError( - DatabaseErrorKind::Unknown, - ref info, - )) if info.message() == MULTIPLE_VPC_ERROR_MESSAGE => { + DieselError::DatabaseError(DatabaseErrorKind::Unknown, info) + if info.message() == MULTIPLE_VPC_ERROR_MESSAGE => + { InsertError::ResourceSpansMultipleVpcs(interface.parent_id) } // This checks the constraint on the interface slot numbers, used to // limit total number of interfaces per resource to a maximum number. - ConnectionError::Query(Error::DatabaseError( - DatabaseErrorKind::CheckViolation, - ref info, - )) if info.message() == NO_SLOTS_AVAILABLE_ERROR_MESSAGE => { + DieselError::DatabaseError(DatabaseErrorKind::CheckViolation, info) + if info.message() == NO_SLOTS_AVAILABLE_ERROR_MESSAGE => + { InsertError::NoSlotsAvailable } // If the MAC allocation subquery fails, we'll attempt to insert NULL // for the `mac` column. This checks that the non-NULL constraint on // that column has been violated. - ConnectionError::Query(Error::DatabaseError( + DieselError::DatabaseError( DatabaseErrorKind::NotNullViolation, - ref info, - )) if info.message() == MAC_EXHAUSTION_ERROR_MESSAGE => { + info, + ) if info.message() == MAC_EXHAUSTION_ERROR_MESSAGE => { InsertError::NoMacAddrressesAvailable } @@ -333,39 +328,36 @@ fn decode_database_error( // `push_ensure_unique_vpc_subnet_expression` subquery, which generates // a UUID parsing error if the resource has another interface in the VPC // Subnet of the one we're trying to insert. - ConnectionError::Query(Error::DatabaseError( - DatabaseErrorKind::Unknown, - ref info, - )) if info.message() == NON_UNIQUE_VPC_SUBNET_ERROR_MESSAGE => { + DieselError::DatabaseError(DatabaseErrorKind::Unknown, info) + if info.message() == NON_UNIQUE_VPC_SUBNET_ERROR_MESSAGE => + { InsertError::NonUniqueVpcSubnets } // This catches the UUID-cast failure intentionally introduced by // `push_instance_state_verification_subquery`, which verifies that // the instance is actually stopped when running this query. - ConnectionError::Query(Error::DatabaseError( - DatabaseErrorKind::Unknown, - ref info, - )) if info.message() == INSTANCE_BAD_STATE_ERROR_MESSAGE => { + DieselError::DatabaseError(DatabaseErrorKind::Unknown, info) + if info.message() == INSTANCE_BAD_STATE_ERROR_MESSAGE => + { assert_eq!(interface.kind, NetworkInterfaceKind::Instance); InsertError::InstanceMustBeStopped(interface.parent_id) } // This catches the UUID-cast failure intentionally introduced by // `push_instance_state_verification_subquery`, which verifies that // the instance doesn't even exist when running this query. - ConnectionError::Query(Error::DatabaseError( - DatabaseErrorKind::Unknown, - ref info, - )) if info.message() == NO_INSTANCE_ERROR_MESSAGE => { + DieselError::DatabaseError(DatabaseErrorKind::Unknown, info) + if info.message() == NO_INSTANCE_ERROR_MESSAGE => + { assert_eq!(interface.kind, NetworkInterfaceKind::Instance); InsertError::InstanceNotFound(interface.parent_id) } // This path looks specifically at constraint names. - ConnectionError::Query(Error::DatabaseError( + DieselError::DatabaseError( DatabaseErrorKind::UniqueViolation, ref info, - )) => match info.constraint_name() { + ) => match info.constraint_name() { // Constraint violated if a user-requested IP address has // already been assigned within the same VPC Subnet. Some(constraint) if constraint == IP_NOT_AVAILABLE_CONSTRAINT => { @@ -1550,17 +1542,12 @@ impl DeleteError { /// can generate, specifically the intentional errors that indicate that /// either the instance is still running, or that the instance has one or /// more secondary interfaces. - pub fn from_diesel( - e: async_bb8_diesel::ConnectionError, - query: &DeleteQuery, - ) -> Self { + pub fn from_diesel(e: DieselError, query: &DeleteQuery) -> Self { use crate::db::error; - use async_bb8_diesel::ConnectionError; - use diesel::result::Error; match e { // Catch the specific errors designed to communicate the failures we // want to distinguish - ConnectionError::Query(Error::DatabaseError(_, _)) => { + DieselError::DatabaseError(_, _) => { decode_delete_network_interface_database_error( e, query.parent_id, @@ -1608,13 +1595,11 @@ impl DeleteError { /// As such, it naturally is extremely tightly coupled to the database itself, /// including the software version and our schema. fn decode_delete_network_interface_database_error( - err: async_bb8_diesel::ConnectionError, + err: DieselError, parent_id: Uuid, ) -> DeleteError { use crate::db::error; - use async_bb8_diesel::ConnectionError; use diesel::result::DatabaseErrorKind; - use diesel::result::Error; // Error message generated when we're attempting to delete a primary // interface, and that instance also has one or more secondary interfaces @@ -1627,29 +1612,26 @@ fn decode_delete_network_interface_database_error( // first CTE, which generates a UUID parsing error if we're trying to // delete the primary interface, and the instance also has one or more // secondaries. - ConnectionError::Query(Error::DatabaseError( - DatabaseErrorKind::Unknown, - ref info, - )) if info.message() == HAS_SECONDARIES_ERROR_MESSAGE => { + DieselError::DatabaseError(DatabaseErrorKind::Unknown, ref info) + if info.message() == HAS_SECONDARIES_ERROR_MESSAGE => + { DeleteError::SecondariesExist(parent_id) } // This catches the UUID-cast failure intentionally introduced by // `push_instance_state_verification_subquery`, which verifies that // the instance can be worked on when running this query. - ConnectionError::Query(Error::DatabaseError( - DatabaseErrorKind::Unknown, - ref info, - )) if info.message() == INSTANCE_BAD_STATE_ERROR_MESSAGE => { + DieselError::DatabaseError(DatabaseErrorKind::Unknown, ref info) + if info.message() == INSTANCE_BAD_STATE_ERROR_MESSAGE => + { DeleteError::InstanceBadState(parent_id) } // This catches the UUID-cast failure intentionally introduced by // `push_instance_state_verification_subquery`, which verifies that // the instance doesn't even exist when running this query. - ConnectionError::Query(Error::DatabaseError( - DatabaseErrorKind::Unknown, - ref info, - )) if info.message() == NO_INSTANCE_ERROR_MESSAGE => { + DieselError::DatabaseError(DatabaseErrorKind::Unknown, ref info) + if info.message() == NO_INSTANCE_ERROR_MESSAGE => + { DeleteError::InstanceNotFound(parent_id) } diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index 7f7b2ea9bf..a080af4c37 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -14,6 +14,7 @@ use crate::db::true_or_cast_error::{matches_sentinel, TrueOrCastError}; use db_macros::Subquery; use diesel::pg::Pg; use diesel::query_builder::{AstPass, Query, QueryFragment, QueryId}; +use diesel::result::Error as DieselError; use diesel::PgBinaryExpressionMethods; use diesel::{ sql_types, BoolExpressionMethods, Column, CombineDsl, ExpressionMethods, @@ -36,7 +37,7 @@ const NOT_ENOUGH_UNIQUE_ZPOOLS_SENTINEL: &'static str = /// Translates a generic pool error to an external error based /// on messages which may be emitted during region provisioning. -pub fn from_diesel(e: async_bb8_diesel::ConnectionError) -> external::Error { +pub fn from_diesel(e: DieselError) -> external::Error { use crate::db::error; let sentinels = [ diff --git a/nexus/db-queries/src/db/queries/vpc_subnet.rs b/nexus/db-queries/src/db/queries/vpc_subnet.rs index bbb229da1e..9ddec32080 100644 --- a/nexus/db-queries/src/db/queries/vpc_subnet.rs +++ b/nexus/db-queries/src/db/queries/vpc_subnet.rs @@ -11,6 +11,7 @@ use chrono::{DateTime, Utc}; use diesel::pg::Pg; use diesel::prelude::*; use diesel::query_builder::*; +use diesel::result::Error as DieselError; use diesel::sql_types; use omicron_common::api::external; use ref_cast::RefCast; @@ -28,14 +29,9 @@ pub enum SubnetError { impl SubnetError { /// Construct a `SubnetError` from a Diesel error, catching the desired /// cases and building useful errors. - pub fn from_diesel( - e: async_bb8_diesel::ConnectionError, - subnet: &VpcSubnet, - ) -> Self { + pub fn from_diesel(e: DieselError, subnet: &VpcSubnet) -> Self { use crate::db::error; - use async_bb8_diesel::ConnectionError; use diesel::result::DatabaseErrorKind; - use diesel::result::Error; const IPV4_OVERLAP_ERROR_MESSAGE: &str = r#"null value in column "ipv4_block" violates not-null constraint"#; const IPV6_OVERLAP_ERROR_MESSAGE: &str = @@ -43,26 +39,26 @@ impl SubnetError { const NAME_CONFLICT_CONSTRAINT: &str = "vpc_subnet_vpc_id_name_key"; match e { // Attempt to insert overlapping IPv4 subnet - ConnectionError::Query(Error::DatabaseError( + DieselError::DatabaseError( DatabaseErrorKind::NotNullViolation, ref info, - )) if info.message() == IPV4_OVERLAP_ERROR_MESSAGE => { + ) if info.message() == IPV4_OVERLAP_ERROR_MESSAGE => { SubnetError::OverlappingIpRange(subnet.ipv4_block.0 .0.into()) } // Attempt to insert overlapping IPv6 subnet - ConnectionError::Query(Error::DatabaseError( + DieselError::DatabaseError( DatabaseErrorKind::NotNullViolation, ref info, - )) if info.message() == IPV6_OVERLAP_ERROR_MESSAGE => { + ) if info.message() == IPV6_OVERLAP_ERROR_MESSAGE => { SubnetError::OverlappingIpRange(subnet.ipv6_block.0 .0.into()) } // Conflicting name for the subnet within a VPC - ConnectionError::Query(Error::DatabaseError( + DieselError::DatabaseError( DatabaseErrorKind::UniqueViolation, ref info, - )) if info.constraint_name() == Some(NAME_CONFLICT_CONSTRAINT) => { + ) if info.constraint_name() == Some(NAME_CONFLICT_CONSTRAINT) => { SubnetError::External(error::public_error_from_diesel( e, error::ErrorHandler::Conflict( diff --git a/nexus/db-queries/src/db/true_or_cast_error.rs b/nexus/db-queries/src/db/true_or_cast_error.rs index e04d865182..6d7b2a1dbd 100644 --- a/nexus/db-queries/src/db/true_or_cast_error.rs +++ b/nexus/db-queries/src/db/true_or_cast_error.rs @@ -9,6 +9,7 @@ use diesel::pg::Pg; use diesel::query_builder::AstPass; use diesel::query_builder::QueryFragment; use diesel::query_builder::QueryId; +use diesel::result::Error as DieselError; use diesel::Expression; use diesel::SelectableExpression; @@ -77,10 +78,9 @@ where /// Returns one of the sentinels if it matches the expected value from /// a [`TrueOrCastError`]. pub fn matches_sentinel( - e: &async_bb8_diesel::ConnectionError, + e: &DieselError, sentinels: &[&'static str], ) -> Option<&'static str> { - use async_bb8_diesel::ConnectionError; use diesel::result::DatabaseErrorKind; use diesel::result::Error; @@ -93,10 +93,7 @@ pub fn matches_sentinel( match e { // Catch the specific errors designed to communicate the failures we // want to distinguish. - ConnectionError::Query(Error::DatabaseError( - DatabaseErrorKind::Unknown, - ref info, - )) => { + Error::DatabaseError(DatabaseErrorKind::Unknown, info) => { for sentinel in sentinels { if info.message() == bool_parse_error(sentinel) { return Some(sentinel); diff --git a/nexus/db-queries/src/db/update_and_check.rs b/nexus/db-queries/src/db/update_and_check.rs index 96cb3e4c79..d6bf14c083 100644 --- a/nexus/db-queries/src/db/update_and_check.rs +++ b/nexus/db-queries/src/db/update_and_check.rs @@ -12,6 +12,7 @@ use diesel::prelude::*; use diesel::query_builder::*; use diesel::query_dsl::methods::LoadQuery; use diesel::query_source::Table; +use diesel::result::Error as DieselError; use diesel::sql_types::Nullable; use diesel::QuerySource; use std::marker::PhantomData; @@ -156,7 +157,7 @@ where pub async fn execute_and_check( self, conn: &async_bb8_diesel::Connection, - ) -> Result, async_bb8_diesel::ConnectionError> + ) -> Result, DieselError> where // We require this bound to ensure that "Self" is runnable as query. Self: LoadQuery<'static, DbConnection, (Option, Option, Q)>, diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index 275c8738cc..fe403a7d41 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -832,9 +832,10 @@ pub(crate) mod test { }; use async_bb8_diesel::{ AsyncConnection, AsyncRunQueryDsl, AsyncSimpleConnection, - OptionalExtension, }; - use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; + use diesel::{ + ExpressionMethods, OptionalExtension, QueryDsl, SelectableHelper, + }; use dropshot::test_util::ClientTestContext; use nexus_db_queries::context::OpContext; use nexus_db_queries::{authn::saga::Serialized, db::datastore::DataStore}; diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index 6fc93ce8db..2762ecaff3 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -1372,10 +1372,10 @@ pub mod test { }; use async_bb8_diesel::{ AsyncConnection, AsyncRunQueryDsl, AsyncSimpleConnection, - OptionalExtension, }; use diesel::{ - BoolExpressionMethods, ExpressionMethods, QueryDsl, SelectableHelper, + BoolExpressionMethods, ExpressionMethods, OptionalExtension, QueryDsl, + SelectableHelper, }; use dropshot::test_util::ClientTestContext; use nexus_db_queries::authn::saga::Serialized; diff --git a/nexus/src/app/sagas/project_create.rs b/nexus/src/app/sagas/project_create.rs index 1cbf9070ee..135e20ff06 100644 --- a/nexus/src/app/sagas/project_create.rs +++ b/nexus/src/app/sagas/project_create.rs @@ -159,9 +159,10 @@ mod test { }; use async_bb8_diesel::{ AsyncConnection, AsyncRunQueryDsl, AsyncSimpleConnection, - OptionalExtension, }; - use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; + use diesel::{ + ExpressionMethods, OptionalExtension, QueryDsl, SelectableHelper, + }; use nexus_db_queries::{ authn::saga::Serialized, authz, context::OpContext, db::datastore::DataStore, diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 9c8a33fb17..0b3c5c99d7 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -1568,8 +1568,10 @@ mod test { use crate::app::sagas::test_helpers; use crate::app::test_interfaces::TestInterfaces; use crate::external_api::shared::IpRange; - use async_bb8_diesel::{AsyncRunQueryDsl, OptionalExtension}; - use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; + use async_bb8_diesel::AsyncRunQueryDsl; + use diesel::{ + ExpressionMethods, OptionalExtension, QueryDsl, SelectableHelper, + }; use dropshot::test_util::ClientTestContext; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; diff --git a/nexus/src/app/sagas/vpc_create.rs b/nexus/src/app/sagas/vpc_create.rs index 85eed6616d..4b5bedf41e 100644 --- a/nexus/src/app/sagas/vpc_create.rs +++ b/nexus/src/app/sagas/vpc_create.rs @@ -445,8 +445,10 @@ pub(crate) mod test { app::saga::create_saga_dag, app::sagas::vpc_create::Params, app::sagas::vpc_create::SagaVpcCreate, external_api::params, }; - use async_bb8_diesel::{AsyncRunQueryDsl, OptionalExtension}; - use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; + use async_bb8_diesel::AsyncRunQueryDsl; + use diesel::{ + ExpressionMethods, OptionalExtension, QueryDsl, SelectableHelper, + }; use dropshot::test_util::ClientTestContext; use nexus_db_queries::{ authn::saga::Serialized, authz, context::OpContext, From c6955a5a0452c958059ae1de9376389c0286c14e Mon Sep 17 00:00:00 2001 From: Rain Date: Wed, 11 Oct 2023 18:51:25 -0700 Subject: [PATCH 09/13] [dependabot] remove in favor of Renovate (#4264) For folks with access to Oxide RFDs, see https://rfd.shared.oxide.computer/rfd/0434 for more. --- .github/dependabot.yml | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index 1b94f4bd27..0000000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,17 +0,0 @@ -# -# Dependabot configuration file -# - -version: 2 -updates: - - package-ecosystem: "cargo" - directory: "/" - schedule: - interval: "weekly" - open-pull-requests-limit: 20 - groups: - russh: - # russh and russh-keys must be updated in lockstep - patterns: - - "russh" - - "russh-keys" From 876e8ca7c601a79beef0a787e83ec13a6cc1db7f Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Wed, 11 Oct 2023 22:25:18 -0700 Subject: [PATCH 10/13] Split instance state into Instance and VMM tables (#4194) Refactor the definition of an `Instance` throughout the control plane so that an `Instance` is separate from the `Vmm`s that incarnate it. This confers several advantages: - VMMs have their own state that sled agent can update without necessarily changing their instance's state. It's also possible to change an instance's active Propolis ID without having to know or update an instance's Propolis IP or current sled ID, since these change when an instance's active Propolis ID changes. This removes a great deal of complexity in sled agent, especially when live migrating an instance, and also simplifies the live migration saga considerably. - Resource reservations for instances have much clearer lifetimes: a reservation can be released when its VMM has moved to a terminal state. Nexus no longer has to reason about VMM lifetimes from changes to an instance's Propolis ID columns. - It's now possible for an Instance not to have an active Propolis at all! This allows an instance not to reserve sled resources when it's not running. It also allows an instance to stop and restart on a different sled. - It's also possible to get a history of an instance's VMMs for, e.g., zone bundle examination purposes ("my VMM had a problem two days ago but it went away when I stopped and restarted it; can you investigate?"). Rework callers throughout Nexus who depend on knowing an instance's current state and/or its current sled ID. In many cases (e.g. disk and NIC attach and detach), the relevant detail is whether the instance has an active Propolis; for simplicity, augment these checks with "has an active Propolis ID" instead of trying to grab both instance and VMM states. ## Known issues/remaining work - The virtual provisioning table is still updated only at instance creation/deletion time. Usage metrics that depend on this table might report strange and wonderful values if a user creates many more instances than can be started at one time. - Instances still use the generic "resource attachment" CTE to manage attaching and detaching disks. Previously these queries looked at instance states; now they look at an instance's state and whether it has an active Propolis, but not at the active Propolis's state. This will need to be revisited in the future to support disk hotplug. - `handle_instance_put_result` is still very aggressive about setting instances to the Failed state if sled agent returns errors other than invalid-request-flavored errors. I think we should reconsider this behavior, but this change is big enough as it is. I will file a TODO for this and update the new comments accordingly before this merges. - The new live migration logic is not tested yet and differs from the "two-table" TLA+ model in RFD 361. More work will be needed here before we can declare live migration fully ready for selfhosting. - It would be nice to have an `omdb vmm` command; for now I've just updated existing `omdb` commands to deal with the optionality of Propolises and sleds. Tests: - Unit/integration tests - On a single-machine dev cluster, created two instances and verified that: - The instances only have resource reservations while they're running (and they reserve reservoir space now) - The instances can reach each other over their internal and external IPs when they're both running (and can still reach each other even if you try to delete one while it's active) - `scadm` shows the appropriate IP mappings being added/deleted as the instances start/stop - The instances' serial consoles work as expected - Attaching a new disk to an instance is only possible if the instance is stopped - Disk snapshot succeeds when invoked on a running instance's attached disk - Deleting an instance detaches its disks - `omicron-stress` on a single-machine dev cluster ran for about an hour and created ~800 instances without any instances going to the Failed state (previously this would happen in the first 5-10 minutes) --- clients/nexus-client/src/lib.rs | 43 +- clients/sled-agent-client/src/lib.rs | 38 +- common/src/api/external/mod.rs | 31 +- common/src/api/internal/nexus.rs | 69 +- dev-tools/omdb/src/bin/omdb/db.rs | 132 ++- nexus/db-model/src/instance.rs | 219 ++-- nexus/db-model/src/lib.rs | 2 + nexus/db-model/src/schema.rs | 28 +- nexus/db-model/src/vmm.rs | 137 +++ nexus/db-queries/src/db/datastore/disk.rs | 32 +- nexus/db-queries/src/db/datastore/instance.rs | 207 +++- nexus/db-queries/src/db/datastore/mod.rs | 2 + .../src/db/datastore/network_interface.rs | 22 +- nexus/db-queries/src/db/datastore/sled.rs | 15 +- nexus/db-queries/src/db/datastore/vmm.rs | 161 +++ nexus/db-queries/src/db/datastore/vpc.rs | 9 +- nexus/db-queries/src/db/queries/instance.rs | 255 +++++ nexus/db-queries/src/db/queries/mod.rs | 1 + .../src/db/queries/network_interface.rs | 160 +-- nexus/src/app/instance.rs | 956 +++++++++++------- nexus/src/app/instance_network.rs | 210 ++++ nexus/src/app/sagas/finalize_disk.rs | 2 +- nexus/src/app/sagas/instance_common.rs | 135 +++ nexus/src/app/sagas/instance_create.rs | 530 +--------- nexus/src/app/sagas/instance_delete.rs | 151 +-- nexus/src/app/sagas/instance_migrate.rs | 501 ++++----- nexus/src/app/sagas/instance_start.rs | 546 +++++----- nexus/src/app/sagas/mod.rs | 3 +- nexus/src/app/sagas/snapshot_create.rs | 208 ++-- nexus/src/app/sagas/test_helpers.rs | 43 +- nexus/src/app/snapshot.rs | 60 +- nexus/src/app/test_interfaces.rs | 54 +- nexus/src/cidata.rs | 2 +- nexus/src/external_api/http_entrypoints.rs | 12 +- nexus/src/internal_api/http_entrypoints.rs | 4 +- nexus/tests/integration_tests/disks.rs | 6 +- nexus/tests/integration_tests/instances.rs | 374 ++++--- nexus/tests/integration_tests/ip_pools.rs | 6 +- nexus/tests/integration_tests/pantry.rs | 6 +- nexus/tests/integration_tests/schema.rs | 12 +- openapi/nexus-internal.json | 138 +-- openapi/sled-agent.json | 202 ++-- schema/crdb/6.0.0/README.adoc | 14 + schema/crdb/6.0.0/up01.sql | 6 + schema/crdb/6.0.0/up02.sql | 13 + schema/crdb/6.0.0/up03.sql | 11 + schema/crdb/6.0.0/up04.sql | 23 + schema/crdb/6.0.0/up05.sql | 8 + schema/crdb/6.0.0/up06.sql | 1 + schema/crdb/6.0.0/up07.sql | 1 + schema/crdb/6.0.0/up08.sql | 1 + schema/crdb/6.0.0/up09.sql | 10 + schema/crdb/README.adoc | 64 +- schema/crdb/dbinit.sql | 120 ++- sled-agent/src/common/instance.rs | 865 ++++++++++------ sled-agent/src/http_entrypoints.rs | 16 +- sled-agent/src/instance.rs | 301 +++--- sled-agent/src/instance_manager.rs | 59 +- sled-agent/src/params.rs | 32 +- sled-agent/src/sim/collection.rs | 191 ++-- sled-agent/src/sim/http_entrypoints.rs | 15 +- sled-agent/src/sim/instance.rs | 209 ++-- sled-agent/src/sim/sled_agent.rs | 73 +- sled-agent/src/sled_agent.rs | 24 +- 64 files changed, 4694 insertions(+), 3087 deletions(-) create mode 100644 nexus/db-model/src/vmm.rs create mode 100644 nexus/db-queries/src/db/datastore/vmm.rs create mode 100644 nexus/db-queries/src/db/queries/instance.rs create mode 100644 nexus/src/app/sagas/instance_common.rs create mode 100644 schema/crdb/6.0.0/README.adoc create mode 100644 schema/crdb/6.0.0/up01.sql create mode 100644 schema/crdb/6.0.0/up02.sql create mode 100644 schema/crdb/6.0.0/up03.sql create mode 100644 schema/crdb/6.0.0/up04.sql create mode 100644 schema/crdb/6.0.0/up05.sql create mode 100644 schema/crdb/6.0.0/up06.sql create mode 100644 schema/crdb/6.0.0/up07.sql create mode 100644 schema/crdb/6.0.0/up08.sql create mode 100644 schema/crdb/6.0.0/up09.sql diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index 412ca70497..33a68cb3ce 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -88,22 +88,41 @@ impl From s: omicron_common::api::internal::nexus::InstanceRuntimeState, ) -> Self { Self { - run_state: s.run_state.into(), - sled_id: s.sled_id, - propolis_id: s.propolis_id, dst_propolis_id: s.dst_propolis_id, - propolis_addr: s.propolis_addr.map(|addr| addr.to_string()), + gen: s.gen.into(), migration_id: s.migration_id, - propolis_gen: s.propolis_gen.into(), - ncpus: s.ncpus.into(), - memory: s.memory.into(), - hostname: s.hostname, + propolis_id: s.propolis_id, + time_updated: s.time_updated, + } + } +} + +impl From + for types::VmmRuntimeState +{ + fn from(s: omicron_common::api::internal::nexus::VmmRuntimeState) -> Self { + Self { gen: s.gen.into(), + state: s.state.into(), time_updated: s.time_updated, } } } +impl From + for types::SledInstanceState +{ + fn from( + s: omicron_common::api::internal::nexus::SledInstanceState, + ) -> Self { + Self { + instance_state: s.instance_state.into(), + propolis_id: s.propolis_id, + vmm_state: s.vmm_state.into(), + } + } +} + impl From for types::InstanceState { @@ -124,14 +143,6 @@ impl From } } -impl From - for types::InstanceCpuCount -{ - fn from(s: omicron_common::api::external::InstanceCpuCount) -> Self { - Self(s.0) - } -} - impl From for types::Generation { fn from(s: omicron_common::api::external::Generation) -> Self { Self(i64::from(&s) as u64) diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index 68e60e8d95..3daac7dd60 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -24,16 +24,9 @@ impl From s: omicron_common::api::internal::nexus::InstanceRuntimeState, ) -> Self { Self { - run_state: s.run_state.into(), - sled_id: s.sled_id, propolis_id: s.propolis_id, dst_propolis_id: s.dst_propolis_id, - propolis_addr: s.propolis_addr.map(|addr| addr.to_string()), migration_id: s.migration_id, - propolis_gen: s.propolis_gen.into(), - ncpus: s.ncpus.into(), - memory: s.memory.into(), - hostname: s.hostname, gen: s.gen.into(), time_updated: s.time_updated, } @@ -85,22 +78,39 @@ impl From { fn from(s: types::InstanceRuntimeState) -> Self { Self { - run_state: s.run_state.into(), - sled_id: s.sled_id, propolis_id: s.propolis_id, dst_propolis_id: s.dst_propolis_id, - propolis_addr: s.propolis_addr.map(|addr| addr.parse().unwrap()), migration_id: s.migration_id, - propolis_gen: s.propolis_gen.into(), - ncpus: s.ncpus.into(), - memory: s.memory.into(), - hostname: s.hostname, gen: s.gen.into(), time_updated: s.time_updated, } } } +impl From + for omicron_common::api::internal::nexus::VmmRuntimeState +{ + fn from(s: types::VmmRuntimeState) -> Self { + Self { + state: s.state.into(), + gen: s.gen.into(), + time_updated: s.time_updated, + } + } +} + +impl From + for omicron_common::api::internal::nexus::SledInstanceState +{ + fn from(s: types::SledInstanceState) -> Self { + Self { + instance_state: s.instance_state.into(), + propolis_id: s.propolis_id, + vmm_state: s.vmm_state.into(), + } + } +} + impl From for omicron_common::api::external::InstanceState { diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 91ed7e4240..53512408af 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -739,6 +739,7 @@ pub enum ResourceType { UpdateableComponent, UserBuiltin, Zpool, + Vmm, } // IDENTITY METADATA @@ -866,25 +867,6 @@ impl InstanceState { InstanceState::Destroyed => "destroyed", } } - - /// Returns true if the given state represents a fully stopped Instance. - /// This means that a transition from an !is_stopped() state must go - /// through Stopping. - pub fn is_stopped(&self) -> bool { - match self { - InstanceState::Starting => false, - InstanceState::Running => false, - InstanceState::Stopping => false, - InstanceState::Rebooting => false, - InstanceState::Migrating => false, - - InstanceState::Creating => true, - InstanceState::Stopped => true, - InstanceState::Repairing => true, - InstanceState::Failed => true, - InstanceState::Destroyed => true, - } - } } /// The number of CPUs in an Instance @@ -912,17 +894,6 @@ pub struct InstanceRuntimeState { pub time_run_state_updated: DateTime, } -impl From - for InstanceRuntimeState -{ - fn from(state: crate::api::internal::nexus::InstanceRuntimeState) -> Self { - InstanceRuntimeState { - run_state: state.run_state, - time_run_state_updated: state.time_updated, - } - } -} - /// View of an Instance #[derive(ObjectIdentity, Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct Instance { diff --git a/common/src/api/internal/nexus.rs b/common/src/api/internal/nexus.rs index 983976bbb7..a4a539ad9b 100644 --- a/common/src/api/internal/nexus.rs +++ b/common/src/api/internal/nexus.rs @@ -29,40 +29,59 @@ pub struct DiskRuntimeState { pub time_updated: DateTime, } -/// Runtime state of the Instance, including the actual running state and minimal -/// metadata -/// -/// This state is owned by the sled agent running that Instance. +/// The "static" properties of an instance: information about the instance that +/// doesn't change while the instance is running. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct InstanceRuntimeState { - /// runtime state of the Instance - pub run_state: InstanceState, - /// which sled is running this Instance - pub sled_id: Uuid, - /// which propolis-server is running this Instance - pub propolis_id: Uuid, - /// the target propolis-server during a migration of this Instance - pub dst_propolis_id: Option, - /// address of propolis-server running this Instance - pub propolis_addr: Option, - /// migration id (if one in process) - pub migration_id: Option, - /// The generation number for the Propolis and sled identifiers for this - /// instance. - pub propolis_gen: Generation, - /// number of CPUs allocated for this Instance +pub struct InstanceProperties { pub ncpus: InstanceCpuCount, - /// memory allocated for this Instance pub memory: ByteCount, - /// RFC1035-compliant hostname for the Instance. + /// RFC1035-compliant hostname for the instance. // TODO-cleanup different type? pub hostname: String, - /// generation number for this state +} + +/// The dynamic runtime properties of an instance: its current VMM ID (if any), +/// migration information (if any), and the instance state to report if there is +/// no active VMM. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct InstanceRuntimeState { + /// The instance's currently active VMM ID. + pub propolis_id: Option, + /// If a migration is active, the ID of the target VMM. + pub dst_propolis_id: Option, + /// If a migration is active, the ID of that migration. + pub migration_id: Option, + /// Generation number for this state. pub gen: Generation, - /// timestamp for this information + /// Timestamp for this information. + pub time_updated: DateTime, +} + +/// The dynamic runtime properties of an individual VMM process. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct VmmRuntimeState { + /// The last state reported by this VMM. + pub state: InstanceState, + /// The generation number for this VMM's state. + pub gen: Generation, + /// Timestamp for the VMM's state. pub time_updated: DateTime, } +/// A wrapper type containing a sled's total knowledge of the state of a +/// specific VMM and the instance it incarnates. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct SledInstanceState { + /// The sled's conception of the state of the instance. + pub instance_state: InstanceRuntimeState, + + /// The ID of the VMM whose state is being reported. + pub propolis_id: Uuid, + + /// The most recent state of the sled's VMM process. + pub vmm_state: VmmRuntimeState, +} + // Oximeter producer/collector objects. /// Information announced by a metric server, used so that clients can contact it and collect diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 10e5546b6d..881b5831ba 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -26,7 +26,10 @@ use clap::Subcommand; use clap::ValueEnum; use diesel::expression::SelectableHelper; use diesel::query_dsl::QueryDsl; +use diesel::BoolExpressionMethods; use diesel::ExpressionMethods; +use diesel::JoinOnDsl; +use diesel::NullableExpressionMethods; use nexus_db_model::Dataset; use nexus_db_model::Disk; use nexus_db_model::DnsGroup; @@ -38,9 +41,11 @@ use nexus_db_model::Instance; use nexus_db_model::Project; use nexus_db_model::Region; use nexus_db_model::Sled; +use nexus_db_model::Vmm; use nexus_db_model::Zpool; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; +use nexus_db_queries::db::datastore::InstanceAndActiveVmm; use nexus_db_queries::db::identity::Asset; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::model::ServiceKind; @@ -61,6 +66,44 @@ use strum::IntoEnumIterator; use tabled::Tabled; use uuid::Uuid; +const NO_ACTIVE_PROPOLIS_MSG: &str = ""; +const NOT_ON_SLED_MSG: &str = ""; + +struct MaybePropolisId(Option); +struct MaybeSledId(Option); + +impl From<&InstanceAndActiveVmm> for MaybePropolisId { + fn from(value: &InstanceAndActiveVmm) -> Self { + Self(value.instance().runtime().propolis_id) + } +} + +impl Display for MaybePropolisId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if let Some(id) = self.0 { + write!(f, "{}", id) + } else { + write!(f, "{}", NO_ACTIVE_PROPOLIS_MSG) + } + } +} + +impl From<&InstanceAndActiveVmm> for MaybeSledId { + fn from(value: &InstanceAndActiveVmm) -> Self { + Self(value.sled_id()) + } +} + +impl Display for MaybeSledId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if let Some(id) = self.0 { + write!(f, "{}", id) + } else { + write!(f, "{}", NOT_ON_SLED_MSG) + } + } +} + #[derive(Debug, Args)] pub struct DbArgs { /// URL of the database SQL interface @@ -473,33 +516,54 @@ async fn cmd_db_disk_info( if let Some(instance_uuid) = disk.runtime().attach_instance_id { // Get the instance this disk is attached to use db::schema::instance::dsl as instance_dsl; - let instance = instance_dsl::instance + use db::schema::vmm::dsl as vmm_dsl; + let instances: Vec = instance_dsl::instance .filter(instance_dsl::id.eq(instance_uuid)) + .left_join( + vmm_dsl::vmm.on(vmm_dsl::id + .nullable() + .eq(instance_dsl::active_propolis_id) + .and(vmm_dsl::time_deleted.is_null())), + ) .limit(1) - .select(Instance::as_select()) + .select((Instance::as_select(), Option::::as_select())) .load_async(&*conn) .await - .context("loading requested instance")?; + .context("loading requested instance")? + .into_iter() + .map(|i: (Instance, Option)| i.into()) + .collect(); - let Some(instance) = instance.into_iter().next() else { + let Some(instance) = instances.into_iter().next() else { bail!("no instance: {} found", instance_uuid); }; - let instance_name = instance.name().to_string(); - let propolis_id = instance.runtime().propolis_id.to_string(); - let my_sled_id = instance.runtime().sled_id; + let instance_name = instance.instance().name().to_string(); + let disk_name = disk.name().to_string(); + let usr = if instance.vmm().is_some() { + let propolis_id = + instance.instance().runtime().propolis_id.unwrap(); + let my_sled_id = instance.sled_id().unwrap(); - let (_, my_sled) = LookupPath::new(opctx, datastore) - .sled_id(my_sled_id) - .fetch() - .await - .context("failed to look up sled")?; + let (_, my_sled) = LookupPath::new(opctx, datastore) + .sled_id(my_sled_id) + .fetch() + .await + .context("failed to look up sled")?; - let usr = UpstairsRow { - host_serial: my_sled.serial_number().to_string(), - disk_name: disk.name().to_string(), - instance_name, - propolis_zone: format!("oxz_propolis-server_{}", propolis_id), + UpstairsRow { + host_serial: my_sled.serial_number().to_string(), + disk_name, + instance_name, + propolis_zone: format!("oxz_propolis-server_{}", propolis_id), + } + } else { + UpstairsRow { + host_serial: NOT_ON_SLED_MSG.to_string(), + propolis_zone: NO_ACTIVE_PROPOLIS_MSG.to_string(), + disk_name, + instance_name, + } }; rows.push(usr); } else { @@ -691,7 +755,7 @@ async fn cmd_db_disk_physical( name: disk.name().to_string(), id: disk.id().to_string(), state: disk.runtime().disk_state, - instance_name: instance_name, + instance_name, }); } @@ -885,17 +949,17 @@ async fn cmd_db_sleds( struct CustomerInstanceRow { id: Uuid, state: String, - propolis_id: Uuid, - sled_id: Uuid, + propolis_id: MaybePropolisId, + sled_id: MaybeSledId, } -impl From for CustomerInstanceRow { - fn from(i: Instance) -> Self { +impl From for CustomerInstanceRow { + fn from(i: InstanceAndActiveVmm) -> Self { CustomerInstanceRow { - id: i.id(), - state: format!("{:?}", i.runtime_state.state.0), - propolis_id: i.runtime_state.propolis_id, - sled_id: i.runtime_state.sled_id, + id: i.instance().id(), + state: format!("{:?}", i.effective_state()), + propolis_id: (&i).into(), + sled_id: (&i).into(), } } } @@ -906,12 +970,22 @@ async fn cmd_db_instances( limit: NonZeroU32, ) -> Result<(), anyhow::Error> { use db::schema::instance::dsl; - let instances = dsl::instance + use db::schema::vmm::dsl as vmm_dsl; + let instances: Vec = dsl::instance + .left_join( + vmm_dsl::vmm.on(vmm_dsl::id + .nullable() + .eq(dsl::active_propolis_id) + .and(vmm_dsl::time_deleted.is_null())), + ) .limit(i64::from(u32::from(limit))) - .select(Instance::as_select()) + .select((Instance::as_select(), Option::::as_select())) .load_async(&*datastore.pool_connection_for_tests().await?) .await - .context("loading instances")?; + .context("loading instances")? + .into_iter() + .map(|i: (Instance, Option)| i.into()) + .collect(); let ctx = || "listing instances".to_string(); check_limit(&instances, limit, ctx); diff --git a/nexus/db-model/src/instance.rs b/nexus/db-model/src/instance.rs index d6aaa45de3..9252926547 100644 --- a/nexus/db-model/src/instance.rs +++ b/nexus/db-model/src/instance.rs @@ -8,18 +8,20 @@ use crate::schema::{disk, instance}; use chrono::{DateTime, Utc}; use db_macros::Resource; use nexus_types::external_api::params; -use nexus_types::identity::Resource; -use omicron_common::address::PROPOLIS_PORT; -use omicron_common::api::external; -use omicron_common::api::internal; use serde::Deserialize; use serde::Serialize; -use std::net::SocketAddr; use uuid::Uuid; /// An Instance (VM). #[derive( - Queryable, Insertable, Debug, Selectable, Resource, Serialize, Deserialize, + Clone, + Debug, + Queryable, + Insertable, + Selectable, + Resource, + Serialize, + Deserialize, )] #[diesel(table_name = instance)] pub struct Instance { @@ -32,25 +34,54 @@ pub struct Instance { /// user data for instance initialization systems (e.g. cloud-init) pub user_data: Vec, - /// runtime state of the Instance + /// The number of vCPUs (i.e., virtual logical processors) to allocate for + /// this instance. + #[diesel(column_name = ncpus)] + pub ncpus: InstanceCpuCount, + + /// The amount of guest memory to allocate for this instance. + #[diesel(column_name = memory)] + pub memory: ByteCount, + + /// The instance's hostname. + // TODO-cleanup: Different type? + #[diesel(column_name = hostname)] + pub hostname: String, + + #[diesel(column_name = boot_on_fault)] + pub boot_on_fault: bool, + #[diesel(embed)] pub runtime_state: InstanceRuntimeState, } impl Instance { + /// Constructs a new instance record with no VMM that will initially appear + /// to be in the Creating state. pub fn new( instance_id: Uuid, project_id: Uuid, params: ¶ms::InstanceCreate, - runtime: InstanceRuntimeState, ) -> Self { let identity = InstanceIdentity::new(instance_id, params.identity.clone()); + + let runtime_state = InstanceRuntimeState::new( + InstanceState::new( + omicron_common::api::external::InstanceState::Creating, + ), + identity.time_modified, + ); + Self { identity, project_id, user_data: params.user_data.clone(), - runtime_state: runtime, + ncpus: params.ncpus.into(), + memory: params.memory.into(), + hostname: params.hostname.clone(), + boot_on_fault: false, + runtime_state, } } @@ -59,20 +90,6 @@ impl Instance { } } -/// Conversion to the external API type. -impl Into for Instance { - fn into(self) -> external::Instance { - external::Instance { - identity: self.identity(), - project_id: self.project_id, - ncpus: self.runtime().ncpus.into(), - memory: self.runtime().memory.into(), - hostname: self.runtime().hostname.clone(), - runtime: self.runtime().clone().into(), - } - } -} - impl DatastoreAttachTargetConfig for Instance { type Id = Uuid; @@ -103,153 +120,95 @@ impl DatastoreAttachTargetConfig for Instance { // `diesel::prelude::AsChangeset`. #[diesel(table_name = instance, treat_none_as_null = true)] pub struct InstanceRuntimeState { - /// The instance's current user-visible instance state. + /// The instance state to fall back on if asked to compute this instance's + /// state while it has no active VMM. /// /// This field is guarded by the instance's `gen` field. #[diesel(column_name = state)] - pub state: InstanceState, + pub nexus_state: InstanceState, + /// The time at which the runtime state was last updated. This is distinct /// from the time the record was last modified, because some updates don't /// modify the runtime state. #[diesel(column_name = time_state_updated)] pub time_updated: DateTime, - /// The generation number for the instance's user-visible state. Each - /// successive state update from a single incarnation of an instance must - /// bear a new generation number. + + /// The generation number for the information stored in this structure, + /// including the fallback state, the instance's active Propolis ID, and its + /// migration IDs. #[diesel(column_name = state_generation)] pub gen: Generation, - /// The ID of the sled hosting the current incarnation of this instance. - /// - /// This field is guarded by the instance's `propolis_gen`. - // - // TODO(#2315): This should be optional so that it can be cleared when the - // instance is not active. - #[diesel(column_name = active_sled_id)] - pub sled_id: Uuid, + /// The ID of the Propolis server hosting the current incarnation of this - /// instance. + /// instance, or None if the instance has no active VMM. /// - /// This field is guarded by the instance's `propolis_gen`. + /// This field is guarded by the instance's `gen`. #[diesel(column_name = active_propolis_id)] - pub propolis_id: Uuid, - /// The IP of the instance's current Propolis server. - /// - /// This field is guarded by the instance's `propolis_gen`. - #[diesel(column_name = active_propolis_ip)] - pub propolis_ip: Option, + pub propolis_id: Option, + /// If a migration is in progress, the ID of the Propolis server that is - /// the migration target. Note that the target's sled agent will have a - /// runtime state where `propolis_id` and `dst_propolis_id` are equal. + /// the migration target. /// - /// This field is guarded by the instance's `propolis_gen`. + /// This field is guarded by the instance's `gen`. #[diesel(column_name = target_propolis_id)] pub dst_propolis_id: Option, + /// If a migration is in progress, a UUID identifying that migration. This /// can be used to provide mutual exclusion between multiple attempts to /// migrate and between an attempt to migrate an attempt to mutate an /// instance in a way that's incompatible with migration. /// - /// This field is guarded by the instance's `propolis_gen`. + /// This field is guarded by the instance's `gen`. #[diesel(column_name = migration_id)] pub migration_id: Option, - /// A generation number protecting the instance's "location" information: - /// its sled ID, Propolis ID and IP, and migration information. Each state - /// update that updates one or more of these fields must bear a new - /// Propolis generation. - /// - /// Records with new Propolis generations supersede records with older - /// generations irrespective of their state generations. That is, a record - /// with Propolis generation 4 and state generation 1 is "newer" than - /// a record with Propolis generation 3 and state generation 5. - #[diesel(column_name = propolis_generation)] - pub propolis_gen: Generation, - /// The number of vCPUs (i.e., virtual logical processors) to allocate for - /// this instance. - #[diesel(column_name = ncpus)] - pub ncpus: InstanceCpuCount, - /// The amount of guest memory to allocate for this instance. - #[diesel(column_name = memory)] - pub memory: ByteCount, - /// The instance's hostname. - // TODO-cleanup: Different type? - #[diesel(column_name = hostname)] - pub hostname: String, - #[diesel(column_name = boot_on_fault)] - pub boot_on_fault: bool, } -impl From - for sled_agent_client::types::InstanceRuntimeState -{ - fn from(s: InstanceRuntimeState) -> Self { +impl InstanceRuntimeState { + fn new(initial_state: InstanceState, creation_time: DateTime) -> Self { Self { - run_state: s.state.into(), - sled_id: s.sled_id, - propolis_id: s.propolis_id, - dst_propolis_id: s.dst_propolis_id, - propolis_addr: s - .propolis_ip - .map(|ip| SocketAddr::new(ip.ip(), PROPOLIS_PORT).to_string()), - migration_id: s.migration_id, - propolis_gen: s.propolis_gen.into(), - ncpus: s.ncpus.into(), - memory: s.memory.into(), - hostname: s.hostname, - gen: s.gen.into(), - time_updated: s.time_updated, + nexus_state: initial_state, + time_updated: creation_time, + propolis_id: None, + dst_propolis_id: None, + migration_id: None, + gen: Generation::new(), } } } -/// Conversion to the external API type. -impl Into for InstanceRuntimeState { - fn into(self) -> external::InstanceRuntimeState { - external::InstanceRuntimeState { - run_state: *self.state.state(), - time_run_state_updated: self.time_updated, - } - } -} +impl From + for InstanceRuntimeState +{ + fn from( + state: omicron_common::api::internal::nexus::InstanceRuntimeState, + ) -> Self { + let nexus_state = if state.propolis_id.is_some() { + omicron_common::api::external::InstanceState::Running + } else { + omicron_common::api::external::InstanceState::Stopped + }; -/// Conversion from the internal API type. -impl From for InstanceRuntimeState { - fn from(state: internal::nexus::InstanceRuntimeState) -> Self { Self { - state: InstanceState::new(state.run_state), - sled_id: state.sled_id, + nexus_state: InstanceState::new(nexus_state), + time_updated: state.time_updated, + gen: state.gen.into(), propolis_id: state.propolis_id, dst_propolis_id: state.dst_propolis_id, - propolis_ip: state.propolis_addr.map(|addr| addr.ip().into()), migration_id: state.migration_id, - propolis_gen: state.propolis_gen.into(), - ncpus: state.ncpus.into(), - memory: state.memory.into(), - hostname: state.hostname, - gen: state.gen.into(), - time_updated: state.time_updated, - boot_on_fault: false, } } } -/// Conversion to the internal API type. -impl Into for InstanceRuntimeState { - fn into(self) -> internal::nexus::InstanceRuntimeState { - internal::nexus::InstanceRuntimeState { - run_state: *self.state.state(), - sled_id: self.sled_id, - propolis_id: self.propolis_id, - dst_propolis_id: self.dst_propolis_id, - propolis_addr: self - .propolis_ip - .map(|ip| SocketAddr::new(ip.ip(), PROPOLIS_PORT)), - propolis_gen: self.propolis_gen.into(), - migration_id: self.migration_id, - ncpus: self.ncpus.into(), - memory: self.memory.into(), - hostname: self.hostname, - gen: self.gen.into(), - time_updated: self.time_updated, +impl From + for sled_agent_client::types::InstanceRuntimeState +{ + fn from(state: InstanceRuntimeState) -> Self { + Self { + dst_propolis_id: state.dst_propolis_id, + gen: state.gen.into(), + migration_id: state.migration_id, + propolis_id: state.propolis_id, + time_updated: state.time_updated, } } } diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 334dedad9f..f1447fc503 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -77,6 +77,7 @@ mod update_artifact; mod user_builtin; mod virtual_provisioning_collection; mod virtual_provisioning_resource; +mod vmm; mod vni; mod volume; mod vpc; @@ -156,6 +157,7 @@ pub use update_artifact::*; pub use user_builtin::*; pub use virtual_provisioning_collection::*; pub use virtual_provisioning_resource::*; +pub use vmm::*; pub use vni::*; pub use volume::*; pub use vpc::*; diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 0165ab1568..2d6970452d 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -344,19 +344,30 @@ table! { time_deleted -> Nullable, project_id -> Uuid, user_data -> Binary, + ncpus -> Int8, + memory -> Int8, + hostname -> Text, + boot_on_fault -> Bool, state -> crate::InstanceStateEnum, time_state_updated -> Timestamptz, state_generation -> Int8, - active_sled_id -> Uuid, - active_propolis_id -> Uuid, - active_propolis_ip -> Nullable, + active_propolis_id -> Nullable, target_propolis_id -> Nullable, migration_id -> Nullable, - propolis_generation -> Int8, - ncpus -> Int8, - memory -> Int8, - hostname -> Text, - boot_on_fault -> Bool, + } +} + +table! { + vmm (id) { + id -> Uuid, + time_created -> Timestamptz, + time_deleted -> Nullable, + instance_id -> Uuid, + sled_id -> Uuid, + propolis_ip -> Inet, + state -> crate::InstanceStateEnum, + time_state_updated -> Timestamptz, + state_generation -> Int8, } } @@ -1168,6 +1179,7 @@ allow_tables_to_appear_in_same_query!( sled, sled_resource, router_route, + vmm, volume, vpc, vpc_subnet, diff --git a/nexus/db-model/src/vmm.rs b/nexus/db-model/src/vmm.rs new file mode 100644 index 0000000000..fe1158d5bb --- /dev/null +++ b/nexus/db-model/src/vmm.rs @@ -0,0 +1,137 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Defines database model types for the Vmm table. +//! +//! A row in the Vmm table stores information about a single Propolis VMM +//! running on a specific sled that incarnates a specific instance. A VMM's +//! instance ID, sled assignment, and Propolis server IP are all fixed for the +//! lifetime of the VMM. As with instances, the VMM's lifecycle-related state is +//! broken out into a separate type that allows sled agent and Nexus to send VMM +//! state updates to each other without sending parameters that are useless to +//! sled agent or that sled agent will never update (like the sled ID). + +use super::{Generation, InstanceState}; +use crate::schema::vmm; +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// An individual VMM process that incarnates a specific instance. +#[derive( + Clone, Queryable, Debug, Selectable, Serialize, Deserialize, Insertable, +)] +#[diesel(table_name = vmm)] +pub struct Vmm { + /// This VMM's primary ID, referred to by an `Instance`'s `propolis_id` or + /// `target_propolis_id` fields. + pub id: Uuid, + + /// The time this VMM record was created. + pub time_created: DateTime, + + /// The time this VMM was destroyed. + pub time_deleted: Option>, + + /// The ID of the `Instance` that owns this VMM. + pub instance_id: Uuid, + + /// The sled assigned to the care and feeding of this VMM. + pub sled_id: Uuid, + + /// The IP address at which this VMM is serving the Propolis server API. + pub propolis_ip: ipnetwork::IpNetwork, + + /// Runtime state for the VMM. + #[diesel(embed)] + pub runtime: VmmRuntimeState, +} + +/// The set of states that a VMM can have when it is created. +pub enum VmmInitialState { + Starting, + Migrating, +} + +impl Vmm { + /// Creates a new VMM record. + pub fn new( + id: Uuid, + instance_id: Uuid, + sled_id: Uuid, + propolis_ip: ipnetwork::IpNetwork, + initial_state: VmmInitialState, + ) -> Self { + use omicron_common::api::external::InstanceState as ApiInstanceState; + + let now = Utc::now(); + let api_state = match initial_state { + VmmInitialState::Starting => ApiInstanceState::Starting, + VmmInitialState::Migrating => ApiInstanceState::Migrating, + }; + + Self { + id, + time_created: now, + time_deleted: None, + instance_id, + sled_id, + propolis_ip, + runtime: VmmRuntimeState { + state: InstanceState::new(api_state), + time_state_updated: now, + gen: Generation::new(), + }, + } + } +} + +/// Runtime state for a VMM, owned by the sled where that VMM is running. +#[derive( + Clone, + Debug, + AsChangeset, + Selectable, + Insertable, + Queryable, + Serialize, + Deserialize, +)] +#[diesel(table_name = vmm)] +pub struct VmmRuntimeState { + /// The state of this VMM. If this VMM is the active VMM for a given + /// instance, this state is the instance's logical state. + pub state: InstanceState, + + /// The time at which this state was most recently updated. + pub time_state_updated: DateTime, + + /// The generation number protecting this VMM's state and update time. + #[diesel(column_name = state_generation)] + pub gen: Generation, +} + +impl From + for VmmRuntimeState +{ + fn from( + value: omicron_common::api::internal::nexus::VmmRuntimeState, + ) -> Self { + Self { + state: InstanceState::new(value.state), + time_state_updated: value.time_updated, + gen: value.gen.into(), + } + } +} + +impl From for sled_agent_client::types::VmmRuntimeState { + fn from(s: Vmm) -> Self { + Self { + gen: s.runtime.gen.into(), + state: s.runtime.state.into(), + time_updated: s.runtime.time_state_updated, + } + } +} diff --git a/nexus/db-queries/src/db/datastore/disk.rs b/nexus/db-queries/src/db/datastore/disk.rs index 80f72c1e18..a0d9bf12c3 100644 --- a/nexus/db-queries/src/db/datastore/disk.rs +++ b/nexus/db-queries/src/db/datastore/disk.rs @@ -190,7 +190,9 @@ impl DataStore { authz_instance.id(), authz_disk.id(), instance::table.into_boxed().filter( - instance::dsl::state.eq_any(ok_to_attach_instance_states), + instance::dsl::state + .eq_any(ok_to_attach_instance_states) + .and(instance::dsl::active_propolis_id.is_null()), ), disk::table.into_boxed().filter( disk::dsl::disk_state.eq_any(ok_to_attach_disk_state_labels), @@ -230,7 +232,15 @@ impl DataStore { // why we did not attach. api::external::DiskState::Creating | api::external::DiskState::Detached => { - match collection.runtime_state.state.state() { + if collection.runtime_state.propolis_id.is_some() { + return Err( + Error::invalid_request( + "cannot attach disk: instance is not \ + fully stopped" + ) + ); + } + match collection.runtime_state.nexus_state.state() { // Ok-to-be-attached instance states: api::external::InstanceState::Creating | api::external::InstanceState::Stopped => { @@ -254,7 +264,7 @@ impl DataStore { _ => { Err(Error::invalid_request(&format!( "cannot attach disk to instance in {} state", - collection.runtime_state.state.state(), + collection.runtime_state.nexus_state.state(), ))) } } @@ -320,7 +330,9 @@ impl DataStore { authz_disk.id(), instance::table .into_boxed() - .filter(instance::dsl::state.eq_any(ok_to_detach_instance_states)), + .filter(instance::dsl::state + .eq_any(ok_to_detach_instance_states) + .and(instance::dsl::active_propolis_id.is_null())), disk::table .into_boxed() .filter(disk::dsl::disk_state.eq_any(ok_to_detach_disk_state_labels)), @@ -361,7 +373,15 @@ impl DataStore { // Ok-to-detach disk states: Inspect the state to infer // why we did not detach. api::external::DiskState::Attached(id) if id == authz_instance.id() => { - match collection.runtime_state.state.state() { + if collection.runtime_state.propolis_id.is_some() { + return Err( + Error::invalid_request( + "cannot attach disk: instance is not \ + fully stopped" + ) + ); + } + match collection.runtime_state.nexus_state.state() { // Ok-to-be-detached instance states: api::external::InstanceState::Creating | api::external::InstanceState::Stopped => { @@ -375,7 +395,7 @@ impl DataStore { _ => { Err(Error::invalid_request(&format!( "cannot detach disk from instance in {} state", - collection.runtime_state.state.state(), + collection.runtime_state.nexus_state.state(), ))) } } diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index 46ca07a74a..188f5c30c9 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -21,12 +21,14 @@ use crate::db::model::Instance; use crate::db::model::InstanceRuntimeState; use crate::db::model::Name; use crate::db::model::Project; +use crate::db::model::Vmm; use crate::db::pagination::paginated; use crate::db::update_and_check::UpdateAndCheck; use crate::db::update_and_check::UpdateStatus; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use nexus_db_model::VmmRuntimeState; use omicron_common::api; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::CreateResult; @@ -40,6 +42,68 @@ use omicron_common::bail_unless; use ref_cast::RefCast; use uuid::Uuid; +/// Wraps a record of an `Instance` along with its active `Vmm`, if it has one. +#[derive(Clone, Debug)] +pub struct InstanceAndActiveVmm { + instance: Instance, + vmm: Option, +} + +impl InstanceAndActiveVmm { + pub fn instance(&self) -> &Instance { + &self.instance + } + + pub fn vmm(&self) -> &Option { + &self.vmm + } + + pub fn sled_id(&self) -> Option { + self.vmm.as_ref().map(|v| v.sled_id) + } + + pub fn effective_state( + &self, + ) -> omicron_common::api::external::InstanceState { + if let Some(vmm) = &self.vmm { + vmm.runtime.state.0 + } else { + self.instance.runtime().nexus_state.0 + } + } +} + +impl From<(Instance, Option)> for InstanceAndActiveVmm { + fn from(value: (Instance, Option)) -> Self { + Self { instance: value.0, vmm: value.1 } + } +} + +impl From for omicron_common::api::external::Instance { + fn from(value: InstanceAndActiveVmm) -> Self { + let (run_state, time_run_state_updated) = if let Some(vmm) = value.vmm { + (vmm.runtime.state, vmm.runtime.time_state_updated) + } else { + ( + value.instance.runtime_state.nexus_state.clone(), + value.instance.runtime_state.time_updated, + ) + }; + + Self { + identity: value.instance.identity(), + project_id: value.instance.project_id, + ncpus: value.instance.ncpus.into(), + memory: value.instance.memory.into(), + hostname: value.instance.hostname, + runtime: omicron_common::api::external::InstanceRuntimeState { + run_state: *run_state.state(), + time_run_state_updated, + }, + } + } +} + impl DataStore { /// Idempotently insert a database record for an Instance /// @@ -97,10 +161,10 @@ impl DataStore { })?; bail_unless!( - instance.runtime().state.state() + instance.runtime().nexus_state.state() == &api::external::InstanceState::Creating, "newly-created Instance has unexpected state: {:?}", - instance.runtime().state + instance.runtime().nexus_state ); bail_unless!( instance.runtime().gen == gen, @@ -115,11 +179,12 @@ impl DataStore { opctx: &OpContext, authz_project: &authz::Project, pagparams: &PaginatedBy<'_>, - ) -> ListResultVec { + ) -> ListResultVec { opctx.authorize(authz::Action::ListChildren, authz_project).await?; use db::schema::instance::dsl; - match pagparams { + use db::schema::vmm::dsl as vmm_dsl; + Ok(match pagparams { PaginatedBy::Id(pagparams) => { paginated(dsl::instance, dsl::id, &pagparams) } @@ -131,10 +196,21 @@ impl DataStore { } .filter(dsl::project_id.eq(authz_project.id())) .filter(dsl::time_deleted.is_null()) - .select(Instance::as_select()) - .load_async::(&*self.pool_connection_authorized(opctx).await?) + .left_join( + vmm_dsl::vmm.on(vmm_dsl::id + .nullable() + .eq(dsl::active_propolis_id) + .and(vmm_dsl::time_deleted.is_null())), + ) + .select((Instance::as_select(), Option::::as_select())) + .load_async::<(Instance, Option)>( + &*self.pool_connection_authorized(opctx).await?, + ) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? + .into_iter() + .map(|(instance, vmm)| InstanceAndActiveVmm { instance, vmm }) + .collect()) } /// Fetches information about an Instance that the caller has previously @@ -160,22 +236,29 @@ impl DataStore { Ok(db_instance) } - /// Fetches information about a deleted instance. This can be used to - /// query the properties an instance had at the time it was deleted, which - /// can be useful when cleaning up a deleted instance. - pub async fn instance_fetch_deleted( + pub async fn instance_fetch_with_vmm( &self, opctx: &OpContext, authz_instance: &authz::Instance, - ) -> LookupResult { + ) -> LookupResult { opctx.authorize(authz::Action::Read, authz_instance).await?; - use db::schema::instance::dsl; - let instance = dsl::instance - .filter(dsl::id.eq(authz_instance.id())) - .filter(dsl::time_deleted.is_not_null()) - .select(Instance::as_select()) - .get_result_async(&*self.pool_connection_authorized(opctx).await?) + use db::schema::instance::dsl as instance_dsl; + use db::schema::vmm::dsl as vmm_dsl; + + let (instance, vmm) = instance_dsl::instance + .filter(instance_dsl::id.eq(authz_instance.id())) + .filter(instance_dsl::time_deleted.is_null()) + .left_join( + vmm_dsl::vmm.on(vmm_dsl::id + .nullable() + .eq(instance_dsl::active_propolis_id) + .and(vmm_dsl::time_deleted.is_null())), + ) + .select((Instance::as_select(), Option::::as_select())) + .get_result_async::<(Instance, Option)>( + &*self.pool_connection_authorized(opctx).await?, + ) .await .map_err(|e| { public_error_from_diesel( @@ -187,7 +270,7 @@ impl DataStore { ) })?; - Ok(instance) + Ok(InstanceAndActiveVmm { instance, vmm }) } // TODO-design It's tempting to return the updated state of the Instance @@ -211,15 +294,7 @@ impl DataStore { // - the active Propolis ID will not change, the state generation // increased, and the Propolis generation will not change, or // - the Propolis generation increased. - .filter( - (dsl::active_propolis_id - .eq(new_runtime.propolis_id) - .and(dsl::state_generation.lt(new_runtime.gen)) - .and( - dsl::propolis_generation.eq(new_runtime.propolis_gen), - )) - .or(dsl::propolis_generation.lt(new_runtime.propolis_gen)), - ) + .filter(dsl::state_generation.lt(new_runtime.gen)) .set(new_runtime.clone()) .check_if_exists::(*instance_id) .execute_and_check(&*self.pool_connection_unauthorized().await?) @@ -241,6 +316,69 @@ impl DataStore { Ok(updated) } + /// Updates an instance record and a VMM record with a single database + /// command. + /// + /// This is intended to be used to apply updates from sled agent that + /// may change a VMM's runtime state (e.g. moving an instance from Running + /// to Stopped) and its corresponding instance's state (e.g. changing the + /// active Propolis ID to reflect a completed migration) in a single + /// transaction. The caller is responsible for ensuring the instance and + /// VMM states are consistent with each other before calling this routine. + /// + /// # Arguments + /// + /// - instance_id: The ID of the instance to update. + /// - new_instance: The new instance runtime state to try to write. + /// - vmm_id: The ID of the VMM to update. + /// - new_vmm: The new VMM runtime state to try to write. + /// + /// # Return value + /// + /// - `Ok((instance_updated, vmm_updated))` if the query was issued + /// successfully. `instance_updated` and `vmm_updated` are each true if + /// the relevant item was updated and false otherwise. Note that an update + /// can fail because it was inapplicable (i.e. the database has state with + /// a newer generation already) or because the relevant record was not + /// found. + /// - `Err` if another error occurred while accessing the database. + pub async fn instance_and_vmm_update_runtime( + &self, + instance_id: &Uuid, + new_instance: &InstanceRuntimeState, + vmm_id: &Uuid, + new_vmm: &VmmRuntimeState, + ) -> Result<(bool, bool), Error> { + let query = crate::db::queries::instance::InstanceAndVmmUpdate::new( + *instance_id, + new_instance.clone(), + *vmm_id, + new_vmm.clone(), + ); + + // The InstanceAndVmmUpdate query handles and indicates failure to find + // either the instance or the VMM, so a query failure here indicates + // some kind of internal error and not a failed lookup. + let result = query + .execute_and_check(&*self.pool_connection_unauthorized().await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + let instance_updated = match result.instance_status { + Some(UpdateStatus::Updated) => true, + Some(UpdateStatus::NotUpdatedButExists) => false, + None => false, + }; + + let vmm_updated = match result.vmm_status { + Some(UpdateStatus::Updated) => true, + Some(UpdateStatus::NotUpdatedButExists) => false, + None => false, + }; + + Ok((instance_updated, vmm_updated)) + } + pub async fn project_delete_instance( &self, opctx: &OpContext, @@ -270,7 +408,9 @@ impl DataStore { let _instance = Instance::detach_resources( authz_instance.id(), instance::table.into_boxed().filter( - instance::dsl::state.eq_any(ok_to_delete_instance_states), + instance::dsl::state + .eq_any(ok_to_delete_instance_states) + .and(instance::dsl::active_propolis_id.is_null()), ), disk::table.into_boxed().filter( disk::dsl::disk_state.eq_any(ok_to_detach_disk_state_labels), @@ -295,7 +435,14 @@ impl DataStore { &authz_instance.id(), ), DetachManyError::NoUpdate { collection } => { - let instance_state = collection.runtime_state.state.state(); + if collection.runtime_state.propolis_id.is_some() { + return Error::invalid_request( + "cannot delete instance: instance is running or has \ + not yet fully stopped", + ); + } + let instance_state = + collection.runtime_state.nexus_state.state(); match instance_state { api::external::InstanceState::Stopped | api::external::InstanceState::Failed => { diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 7d5e32cad9..a77e20647a 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -82,6 +82,7 @@ mod switch_interface; mod switch_port; mod update; mod virtual_provisioning_collection; +mod vmm; mod volume; mod vpc; mod zpool; @@ -91,6 +92,7 @@ pub use db_metadata::{ all_sql_for_version_migration, EARLIEST_SUPPORTED_VERSION, }; pub use dns::DnsVersionUpdateBuilder; +pub use instance::InstanceAndActiveVmm; pub use rack::RackInit; pub use silo::Discoverability; pub use switch_port::SwitchPortSettingsCombinedResult; diff --git a/nexus/db-queries/src/db/datastore/network_interface.rs b/nexus/db-queries/src/db/datastore/network_interface.rs index 4a46b23529..06550e9439 100644 --- a/nexus/db-queries/src/db/datastore/network_interface.rs +++ b/nexus/db-queries/src/db/datastore/network_interface.rs @@ -471,12 +471,11 @@ impl DataStore { let conn = self.pool_connection_authorized(opctx).await?; if primary { conn.transaction_async(|conn| async move { - let instance_state = instance_query - .get_result_async(&conn) - .await? - .runtime_state - .state; - if instance_state != stopped { + let instance_runtime = + instance_query.get_result_async(&conn).await?.runtime_state; + if instance_runtime.propolis_id.is_some() + || instance_runtime.nexus_state != stopped + { return Err(TxnError::CustomError( NetworkInterfaceUpdateError::InstanceNotStopped, )); @@ -515,12 +514,11 @@ impl DataStore { // we're only hitting a single row. Note that we still need to // verify the instance is stopped. conn.transaction_async(|conn| async move { - let instance_state = instance_query - .get_result_async(&conn) - .await? - .runtime_state - .state; - if instance_state != stopped { + let instance_state = + instance_query.get_result_async(&conn).await?.runtime_state; + if instance_state.propolis_id.is_some() + || instance_state.nexus_state != stopped + { return Err(TxnError::CustomError( NetworkInterfaceUpdateError::InstanceNotStopped, )); diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index a52d1b7772..f4f5188057 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -115,6 +115,7 @@ impl DataStore { resource_dsl::hardware_threads::NAME )) + resources.hardware_threads) .le(sled_dsl::usable_hardware_threads); + // This answers the boolean question: // "Does the SUM of all RAM usage, plus the one we're trying // to allocate, consume less RAM than exists on the sled?" @@ -125,6 +126,15 @@ impl DataStore { )) + resources.rss_ram) .le(sled_dsl::usable_physical_ram); + // Determine whether adding this service's reservoir allocation + // to what's allocated on the sled would avoid going over quota. + let sled_has_space_in_reservoir = + (diesel::dsl::sql::(&format!( + "COALESCE(SUM(CAST({} as INT8)), 0)", + resource_dsl::reservoir_ram::NAME + )) + resources.reservoir_ram) + .le(sled_dsl::reservoir_size); + // Generate a query describing all of the sleds that have space // for this reservation. let mut sled_targets = sled_dsl::sled @@ -134,8 +144,9 @@ impl DataStore { ) .group_by(sled_dsl::id) .having( - sled_has_space_for_threads.and(sled_has_space_for_rss), - // TODO: We should also validate the reservoir space, when it exists. + sled_has_space_for_threads + .and(sled_has_space_for_rss) + .and(sled_has_space_in_reservoir), ) .filter(sled_dsl::time_deleted.is_null()) .select(sled_dsl::id) diff --git a/nexus/db-queries/src/db/datastore/vmm.rs b/nexus/db-queries/src/db/datastore/vmm.rs new file mode 100644 index 0000000000..18afde84f0 --- /dev/null +++ b/nexus/db-queries/src/db/datastore/vmm.rs @@ -0,0 +1,161 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! [`DataStore`] helpers for working with VMM records. + +use super::DataStore; +use crate::authz; +use crate::context::OpContext; +use crate::db::error::public_error_from_diesel; +use crate::db::error::ErrorHandler; +use crate::db::model::Vmm; +use crate::db::model::VmmRuntimeState; +use crate::db::schema::vmm::dsl; +use crate::db::update_and_check::UpdateAndCheck; +use crate::db::update_and_check::UpdateStatus; +use async_bb8_diesel::AsyncRunQueryDsl; +use chrono::Utc; +use diesel::prelude::*; +use omicron_common::api::external::CreateResult; +use omicron_common::api::external::Error; +use omicron_common::api::external::LookupResult; +use omicron_common::api::external::LookupType; +use omicron_common::api::external::ResourceType; +use omicron_common::api::external::UpdateResult; +use uuid::Uuid; + +impl DataStore { + pub async fn vmm_insert( + &self, + opctx: &OpContext, + vmm: Vmm, + ) -> CreateResult { + let vmm = diesel::insert_into(dsl::vmm) + .values(vmm) + .on_conflict(dsl::id) + .do_update() + .set(dsl::time_state_updated.eq(dsl::time_state_updated)) + .returning(Vmm::as_returning()) + .get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(vmm) + } + + pub async fn vmm_mark_deleted( + &self, + opctx: &OpContext, + vmm_id: &Uuid, + ) -> UpdateResult { + use crate::db::model::InstanceState as DbInstanceState; + use omicron_common::api::external::InstanceState as ApiInstanceState; + + let valid_states = vec![ + DbInstanceState::new(ApiInstanceState::Destroyed), + DbInstanceState::new(ApiInstanceState::Failed), + ]; + + let updated = diesel::update(dsl::vmm) + .filter(dsl::id.eq(*vmm_id)) + .filter(dsl::state.eq_any(valid_states)) + .set(dsl::time_deleted.eq(Utc::now())) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::Vmm, + LookupType::ById(*vmm_id), + ), + ) + })?; + + Ok(updated != 0) + } + + pub async fn vmm_fetch( + &self, + opctx: &OpContext, + authz_instance: &authz::Instance, + vmm_id: &Uuid, + ) -> LookupResult { + opctx.authorize(authz::Action::Read, authz_instance).await?; + + let vmm = dsl::vmm + .filter(dsl::id.eq(*vmm_id)) + .filter(dsl::instance_id.eq(authz_instance.id())) + .filter(dsl::time_deleted.is_null()) + .select(Vmm::as_select()) + .get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::Vmm, + LookupType::ById(*vmm_id), + ), + ) + })?; + + Ok(vmm) + } + + pub async fn vmm_update_runtime( + &self, + vmm_id: &Uuid, + new_runtime: &VmmRuntimeState, + ) -> Result { + let updated = diesel::update(dsl::vmm) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq(*vmm_id)) + .filter(dsl::state_generation.lt(new_runtime.gen)) + .set(new_runtime.clone()) + .check_if_exists::(*vmm_id) + .execute_and_check(&*self.pool_connection_unauthorized().await?) + .await + .map(|r| match r.status { + UpdateStatus::Updated => true, + UpdateStatus::NotUpdatedButExists => false, + }) + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::Vmm, + LookupType::ById(*vmm_id), + ), + ) + })?; + + Ok(updated) + } + + /// Forcibly overwrites the Propolis IP in the supplied VMM's record with + /// the supplied Propolis IP. + /// + /// This is used in tests to overwrite the IP for a VMM that is backed by a + /// mock Propolis server that serves on localhost but has its Propolis IP + /// allocated by the instance start procedure. (Unfortunately, this can't be + /// marked #[cfg(test)] because the integration tests require this + /// functionality.) + pub async fn vmm_overwrite_ip_for_test( + &self, + opctx: &OpContext, + vmm_id: &Uuid, + new_ip: ipnetwork::IpNetwork, + ) -> UpdateResult { + let vmm = diesel::update(dsl::vmm) + .filter(dsl::id.eq(*vmm_id)) + .set(dsl::propolis_ip.eq(new_ip)) + .returning(Vmm::as_returning()) + .get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(vmm) + } +} diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index 46c3d2504e..14886ba018 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -572,7 +572,7 @@ impl DataStore { // Sleds to notify when firewall rules change. use db::schema::{ instance, instance_network_interface, service, - service_network_interface, sled, + service_network_interface, sled, vmm, }; let instance_query = instance_network_interface::table @@ -581,10 +581,15 @@ impl DataStore { .on(instance::id .eq(instance_network_interface::instance_id)), ) - .inner_join(sled::table.on(sled::id.eq(instance::active_sled_id))) + .inner_join( + vmm::table + .on(vmm::id.nullable().eq(instance::active_propolis_id)), + ) + .inner_join(sled::table.on(sled::id.eq(vmm::sled_id))) .filter(instance_network_interface::vpc_id.eq(vpc_id)) .filter(instance_network_interface::time_deleted.is_null()) .filter(instance::time_deleted.is_null()) + .filter(vmm::time_deleted.is_null()) .select(Sled::as_select()); let service_query = service_network_interface::table diff --git a/nexus/db-queries/src/db/queries/instance.rs b/nexus/db-queries/src/db/queries/instance.rs new file mode 100644 index 0000000000..ea40877450 --- /dev/null +++ b/nexus/db-queries/src/db/queries/instance.rs @@ -0,0 +1,255 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Implement a query for updating an instance and VMM in a single CTE. + +use async_bb8_diesel::AsyncRunQueryDsl; +use diesel::prelude::QueryResult; +use diesel::query_builder::{Query, QueryFragment, QueryId}; +use diesel::result::Error as DieselError; +use diesel::sql_types::{Nullable, Uuid as SqlUuid}; +use diesel::{pg::Pg, query_builder::AstPass}; +use diesel::{Column, ExpressionMethods, QueryDsl, RunQueryDsl}; +use nexus_db_model::{ + schema::{instance::dsl as instance_dsl, vmm::dsl as vmm_dsl}, + InstanceRuntimeState, VmmRuntimeState, +}; +use uuid::Uuid; + +use crate::db::pool::DbConnection; +use crate::db::update_and_check::UpdateStatus; + +/// A CTE that checks and updates the instance and VMM tables in a single +/// atomic operation. +// +// The single-table update-and-check CTE has the following form: +// +// WITH found AS (SELECT FROM T WHERE ) +// updated AS (UPDATE T SET RETURNING *) +// SELECT +// found. +// updated. +// found.* +// FROM +// found +// LEFT JOIN +// updated +// ON +// found. = updated.; +// +// The idea behind this query is to have separate "found" and "updated" +// subqueries for the instance and VMM tables, then use those to create two more +// subqueries that perform the joins and yield the results, along the following +// lines: +// +// WITH vmm_found AS (SELECT(SELECT id FROM vmm WHERE vmm.id = id) AS id), +// vmm_updated AS (UPDATE vmm SET ... RETURNING *), +// instance_found AS (SELECT( +// SELECT id FROM instance WHERE instance.id = id +// ) AS id), +// instance_updated AS (UPDATE instance SET ... RETURNING *), +// vmm_result AS ( +// SELECT vmm_found.id AS found, vmm_updated.id AS updated +// FROM vmm_found +// LEFT JOIN vmm_updated +// ON vmm_found.id = vmm_updated.id +// ), +// instance_result AS ( +// SELECT instance_found.id AS found, instance_updated.id AS updated +// FROM instance_found +// LEFT JOIN instance_updated +// ON instance_found.id = instance_updated.id +// ) +// SELECT vmm_result.found, vmm_result.updated, instance_result.found, +// instance_result.updated +// FROM vmm_result, instance_result; +// +// The "wrapper" SELECTs when finding instances and VMMs are used to get a NULL +// result in the final output instead of failing the entire query if the target +// object is missing. This maximizes Nexus's flexibility when dealing with +// updates from sled agent that refer to one valid and one deleted object. (This +// can happen if, e.g., sled agent sends a message indicating that a retired VMM +// has finally been destroyed when its instance has since been deleted.) +pub struct InstanceAndVmmUpdate { + instance_find: Box + Send>, + vmm_find: Box + Send>, + instance_update: Box + Send>, + vmm_update: Box + Send>, +} + +/// Contains the result of a combined instance-and-VMM update operation. +#[derive(Copy, Clone, PartialEq, Debug)] +pub struct InstanceAndVmmUpdateResult { + /// `Some(status)` if the target instance was found; the wrapped + /// `UpdateStatus` indicates whether the row was updated. `None` if the + /// instance was not found. + pub instance_status: Option, + + /// `Some(status)` if the target VMM was found; the wrapped `UpdateStatus` + /// indicates whether the row was updated. `None` if the VMM was not found. + pub vmm_status: Option, +} + +/// Computes the update status to return from the results of queries that find +/// and update an object with an ID of type `T`. +fn compute_update_status( + found: Option, + updated: Option, +) -> Option +where + T: PartialEq + std::fmt::Display, +{ + match (found, updated) { + // If both the "find" and "update" prongs returned an ID, the row was + // updated. The IDs should match in this case (if they don't then the + // query was constructed very strangely!). + (Some(found_id), Some(updated_id)) if found_id == updated_id => { + Some(UpdateStatus::Updated) + } + // If the "find" prong returned an ID but the "update" prong didn't, the + // row exists but wasn't updated. + (Some(_), None) => Some(UpdateStatus::NotUpdatedButExists), + // If neither prong returned anything, indicate the row is missing. + (None, None) => None, + // If both prongs returned an ID, but they don't match, something + // terrible has happened--the prongs must have referred to different + // IDs! + (Some(found_id), Some(mismatched_id)) => unreachable!( + "updated ID {} didn't match found ID {}", + mismatched_id, found_id + ), + // Similarly, if the target ID was not found but something was updated + // anyway, then something is wrong with the update query--either it has + // the wrong ID or did not filter rows properly. + (None, Some(updated_id)) => unreachable!( + "ID {} was updated but no found ID was supplied", + updated_id + ), + } +} + +impl InstanceAndVmmUpdate { + pub fn new( + instance_id: Uuid, + new_instance_runtime_state: InstanceRuntimeState, + vmm_id: Uuid, + new_vmm_runtime_state: VmmRuntimeState, + ) -> Self { + let instance_find = Box::new( + instance_dsl::instance + .filter(instance_dsl::id.eq(instance_id)) + .select(instance_dsl::id), + ); + + let vmm_find = Box::new( + vmm_dsl::vmm.filter(vmm_dsl::id.eq(vmm_id)).select(vmm_dsl::id), + ); + + let instance_update = Box::new( + diesel::update(instance_dsl::instance) + .filter(instance_dsl::time_deleted.is_null()) + .filter(instance_dsl::id.eq(instance_id)) + .filter( + instance_dsl::state_generation + .lt(new_instance_runtime_state.gen), + ) + .set(new_instance_runtime_state), + ); + + let vmm_update = Box::new( + diesel::update(vmm_dsl::vmm) + .filter(vmm_dsl::time_deleted.is_null()) + .filter(vmm_dsl::id.eq(vmm_id)) + .filter(vmm_dsl::state_generation.lt(new_vmm_runtime_state.gen)) + .set(new_vmm_runtime_state), + ); + + Self { instance_find, vmm_find, instance_update, vmm_update } + } + + pub async fn execute_and_check( + self, + conn: &(impl async_bb8_diesel::AsyncConnection + Sync), + ) -> Result { + let (vmm_found, vmm_updated, instance_found, instance_updated) = + self.get_result_async::<(Option, + Option, + Option, + Option)>(conn).await?; + + let instance_status = + compute_update_status(instance_found, instance_updated); + let vmm_status = compute_update_status(vmm_found, vmm_updated); + + Ok(InstanceAndVmmUpdateResult { instance_status, vmm_status }) + } +} + +impl QueryId for InstanceAndVmmUpdate { + type QueryId = (); + const HAS_STATIC_QUERY_ID: bool = false; +} + +impl Query for InstanceAndVmmUpdate { + type SqlType = ( + Nullable, + Nullable, + Nullable, + Nullable, + ); +} + +impl RunQueryDsl for InstanceAndVmmUpdate {} + +impl QueryFragment for InstanceAndVmmUpdate { + fn walk_ast<'b>(&'b self, mut out: AstPass<'_, 'b, Pg>) -> QueryResult<()> { + out.push_sql("WITH instance_found AS (SELECT ("); + self.instance_find.walk_ast(out.reborrow())?; + out.push_sql(") AS id), "); + + out.push_sql("vmm_found AS (SELECT ("); + self.vmm_find.walk_ast(out.reborrow())?; + out.push_sql(") AS id), "); + + out.push_sql("instance_updated AS ("); + self.instance_update.walk_ast(out.reborrow())?; + out.push_sql(" RETURNING id), "); + + out.push_sql("vmm_updated AS ("); + self.vmm_update.walk_ast(out.reborrow())?; + out.push_sql(" RETURNING id), "); + + out.push_sql("vmm_result AS ("); + out.push_sql("SELECT vmm_found."); + out.push_identifier(vmm_dsl::id::NAME)?; + out.push_sql(" AS found, vmm_updated."); + out.push_identifier(vmm_dsl::id::NAME)?; + out.push_sql(" AS updated"); + out.push_sql(" FROM vmm_found LEFT JOIN vmm_updated ON vmm_found."); + out.push_identifier(vmm_dsl::id::NAME)?; + out.push_sql(" = vmm_updated."); + out.push_identifier(vmm_dsl::id::NAME)?; + out.push_sql("), "); + + out.push_sql("instance_result AS ("); + out.push_sql("SELECT instance_found."); + out.push_identifier(instance_dsl::id::NAME)?; + out.push_sql(" AS found, instance_updated."); + out.push_identifier(instance_dsl::id::NAME)?; + out.push_sql(" AS updated"); + out.push_sql( + " FROM instance_found LEFT JOIN instance_updated ON instance_found.", + ); + out.push_identifier(instance_dsl::id::NAME)?; + out.push_sql(" = instance_updated."); + out.push_identifier(instance_dsl::id::NAME)?; + out.push_sql(") "); + + out.push_sql("SELECT vmm_result.found, vmm_result.updated, "); + out.push_sql("instance_result.found, instance_result.updated "); + out.push_sql("FROM vmm_result, instance_result;"); + + Ok(()) + } +} diff --git a/nexus/db-queries/src/db/queries/mod.rs b/nexus/db-queries/src/db/queries/mod.rs index f91b54fb69..cd48be61e3 100644 --- a/nexus/db-queries/src/db/queries/mod.rs +++ b/nexus/db-queries/src/db/queries/mod.rs @@ -7,6 +7,7 @@ pub mod disk; pub mod external_ip; +pub mod instance; pub mod ip_pool; #[macro_use] mod next_item; diff --git a/nexus/db-queries/src/db/queries/network_interface.rs b/nexus/db-queries/src/db/queries/network_interface.rs index bac2610b41..84a81a7b7a 100644 --- a/nexus/db-queries/src/db/queries/network_interface.rs +++ b/nexus/db-queries/src/db/queries/network_interface.rs @@ -60,6 +60,11 @@ lazy_static::lazy_static! { static ref INSTANCE_DESTROYED: db::model::InstanceState = db::model::InstanceState(external::InstanceState::Destroyed); + // A sentinel value for the instance state when the instance has an active + // VMM, irrespective of that VMM's actual state. + static ref INSTANCE_RUNNING: db::model::InstanceState = + db::model::InstanceState(external::InstanceState::Running); + static ref NO_INSTANCE_SENTINEL_STRING: String = String::from(NO_INSTANCE_SENTINEL); @@ -1273,7 +1278,10 @@ const INSTANCE_FROM_CLAUSE: InstanceFromClause = InstanceFromClause::new(); // -- Identify the state of the instance // ( // SELECT -// state +// CASE +// WHEN active_propolis_id IS NULL THEN state +// ELSE 'running' +// END // FROM // instance // WHERE @@ -1291,9 +1299,19 @@ const INSTANCE_FROM_CLAUSE: InstanceFromClause = InstanceFromClause::new(); // ``` // // This uses the familiar cast-fail trick to select the instance's UUID if the -// instance is in a state that can be altered, or a sentinel of `'running'` if -// not. It also ensures the instance exists at all with the sentinel -// `'no-instance'`. +// instance is in a state that allows network interfaces to be altered or +// produce a cast error if they cannot. The COALESCE statement and its innards +// yield the following state string: +// +// - 'destroyed' if the instance is not found at all +// - 'running' if the instance is found and has an active VMM (this forbids +// network interface changes irrespective of that VMM's actual state) +// - the instance's `state` otherwise +// +// If this produces 'stopped', 'creating', or (if applicable) 'failed', the +// outer CASE returns the instance ID as a string, which casts to a UUID. The +// 'destroyed' and 'bad-state' cases return non-UUID strings that cause a cast +// failure that can be caught and interpreted as a specific class of error. // // 'failed' is conditionally an accepted state: it would not be accepted as part // of InsertQuery, but it should be as part of DeleteQuery (for example if the @@ -1301,10 +1319,10 @@ const INSTANCE_FROM_CLAUSE: InstanceFromClause = InstanceFromClause::new(); // // Note that 'stopped', 'failed', and 'creating' are considered valid states. // 'stopped' is used for most situations, especially client-facing, but -// 'creating' is critical for the instance-creation saga. When we first -// provision the instance, its in the 'creating' state until a sled agent -// responds telling us that the instance has actually been launched. This -// additional case supports adding interfaces during that provisioning process. +// 'creating' is critical for the instance-creation saga. When an instance is +// first provisioned, it remains in the 'creating' state until provisioning is +// copmleted and it transitions to 'stopped'; it is permissible to add +// interfaces during that provisioning process. fn push_instance_state_verification_subquery<'a>( instance_id: &'a Uuid, @@ -1313,7 +1331,13 @@ fn push_instance_state_verification_subquery<'a>( failed_ok: bool, ) -> QueryResult<()> { out.push_sql("CAST(CASE COALESCE((SELECT "); + out.push_sql("CASE WHEN "); + out.push_identifier(db::schema::instance::dsl::active_propolis_id::NAME)?; + out.push_sql(" IS NULL THEN "); out.push_identifier(db::schema::instance::dsl::state::NAME)?; + out.push_sql(" ELSE "); + out.push_bind_param::(&INSTANCE_RUNNING)?; + out.push_sql(" END "); out.push_sql(" FROM "); INSTANCE_FROM_CLAUSE.walk_ast(out.reborrow())?; out.push_sql(" WHERE "); @@ -1662,7 +1686,6 @@ mod tests { use crate::db::model::Project; use crate::db::model::VpcSubnet; use async_bb8_diesel::AsyncRunQueryDsl; - use chrono::Utc; use dropshot::test_util::LogContext; use ipnetwork::Ipv4Network; use ipnetwork::Ipv6Network; @@ -1674,14 +1697,11 @@ mod tests { use omicron_common::api::external; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Error; - use omicron_common::api::external::Generation; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::InstanceCpuCount; - use omicron_common::api::external::InstanceState; use omicron_common::api::external::Ipv4Net; use omicron_common::api::external::Ipv6Net; use omicron_common::api::external::MacAddr; - use omicron_common::api::internal::nexus::InstanceRuntimeState; use omicron_test_utils::dev; use omicron_test_utils::dev::db::CockroachInstance; use std::convert::TryInto; @@ -1716,25 +1736,8 @@ mod tests { disks: vec![], start: true, }; - let runtime = InstanceRuntimeState { - run_state: InstanceState::Creating, - sled_id: Uuid::new_v4(), - propolis_id: Uuid::new_v4(), - dst_propolis_id: None, - propolis_addr: Some(std::net::SocketAddr::new( - "::1".parse().unwrap(), - 12400, - )), - migration_id: None, - propolis_gen: Generation::new(), - hostname: params.hostname.clone(), - memory: params.memory, - ncpus: params.ncpus, - gen: Generation::new(), - time_updated: Utc::now(), - }; - let instance = - Instance::new(instance_id, project_id, ¶ms, runtime.into()); + + let instance = Instance::new(instance_id, project_id, ¶ms); let (.., authz_project) = LookupPath::new(&opctx, &db_datastore) .project_id(project_id) @@ -1768,14 +1771,41 @@ mod tests { state: external::InstanceState, ) -> Instance { let new_runtime = model::InstanceRuntimeState { - state: model::InstanceState::new(state), + nexus_state: model::InstanceState::new(state), + gen: instance.runtime_state.gen.next().into(), + ..instance.runtime_state.clone() + }; + let res = db_datastore + .instance_update_runtime(&instance.id(), &new_runtime) + .await; + assert!(matches!(res, Ok(true)), "Failed to change instance state"); + instance.runtime_state = new_runtime; + instance + } + + /// Sets or clears the active Propolis ID in the supplied instance record. + /// This can be used to exercise the "does this instance have an active + /// VMM?" test that determines in part whether an instance's network + /// interfaces can change. + /// + /// Note that this routine does not construct a VMM record for the + /// corresponding ID, so any functions that expect such a record to exist + /// will fail in strange and exciting ways. + async fn instance_set_active_vmm( + db_datastore: &DataStore, + mut instance: Instance, + propolis_id: Option, + ) -> Instance { + let new_runtime = model::InstanceRuntimeState { + propolis_id, gen: instance.runtime_state.gen.next().into(), ..instance.runtime_state.clone() }; + let res = db_datastore .instance_update_runtime(&instance.id(), &new_runtime) .await; - assert!(matches!(res, Ok(true)), "Failed to stop instance"); + assert!(matches!(res, Ok(true)), "Failed to change instance VMM ref"); instance.runtime_state = new_runtime; instance } @@ -1900,10 +1930,7 @@ mod tests { self.logctx.cleanup_successful(); } - async fn create_instance( - &self, - state: external::InstanceState, - ) -> Instance { + async fn create_stopped_instance(&self) -> Instance { instance_set_state( &self.db_datastore, create_instance( @@ -1912,7 +1939,28 @@ mod tests { &self.db_datastore, ) .await, - state, + external::InstanceState::Stopped, + ) + .await + } + + async fn create_running_instance(&self) -> Instance { + let instance = instance_set_state( + &self.db_datastore, + create_instance( + &self.opctx, + self.project_id, + &self.db_datastore, + ) + .await, + external::InstanceState::Starting, + ) + .await; + + instance_set_active_vmm( + &self.db_datastore, + instance, + Some(Uuid::new_v4()), ) .await } @@ -1922,8 +1970,7 @@ mod tests { async fn test_insert_running_instance_fails() { let context = TestContext::new("test_insert_running_instance_fails", 2).await; - let instance = - context.create_instance(external::InstanceState::Running).await; + let instance = context.create_running_instance().await; let instance_id = instance.id(); let requested_ip = "172.30.0.5".parse().unwrap(); let interface = IncompleteNetworkInterface::new_instance( @@ -1952,8 +1999,7 @@ mod tests { #[tokio::test] async fn test_insert_request_exact_ip() { let context = TestContext::new("test_insert_request_exact_ip", 2).await; - let instance = - context.create_instance(external::InstanceState::Stopped).await; + let instance = context.create_stopped_instance().await; let instance_id = instance.id(); let requested_ip = "172.30.0.5".parse().unwrap(); let interface = IncompleteNetworkInterface::new_instance( @@ -2024,8 +2070,7 @@ mod tests { .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES); for (i, expected_address) in addresses.take(2).enumerate() { - let instance = - context.create_instance(external::InstanceState::Stopped).await; + let instance = context.create_stopped_instance().await; let interface = IncompleteNetworkInterface::new_instance( Uuid::new_v4(), instance.id(), @@ -2063,10 +2108,8 @@ mod tests { let context = TestContext::new("test_insert_request_same_ip_fails", 2).await; - let instance = - context.create_instance(external::InstanceState::Stopped).await; - let new_instance = - context.create_instance(external::InstanceState::Stopped).await; + let instance = context.create_stopped_instance().await; + let new_instance = context.create_stopped_instance().await; // Insert an interface on the first instance. let interface = IncompleteNetworkInterface::new_instance( @@ -2194,8 +2237,7 @@ mod tests { async fn test_insert_with_duplicate_name_fails() { let context = TestContext::new("test_insert_with_duplicate_name_fails", 2).await; - let instance = - context.create_instance(external::InstanceState::Stopped).await; + let instance = context.create_stopped_instance().await; let interface = IncompleteNetworkInterface::new_instance( Uuid::new_v4(), instance.id(), @@ -2244,8 +2286,7 @@ mod tests { async fn test_insert_same_vpc_subnet_fails() { let context = TestContext::new("test_insert_same_vpc_subnet_fails", 2).await; - let instance = - context.create_instance(external::InstanceState::Stopped).await; + let instance = context.create_stopped_instance().await; let interface = IncompleteNetworkInterface::new_instance( Uuid::new_v4(), instance.id(), @@ -2288,8 +2329,7 @@ mod tests { async fn test_insert_same_interface_fails() { let context = TestContext::new("test_insert_same_interface_fails", 2).await; - let instance = - context.create_instance(external::InstanceState::Stopped).await; + let instance = context.create_stopped_instance().await; let interface = IncompleteNetworkInterface::new_instance( Uuid::new_v4(), instance.id(), @@ -2330,8 +2370,7 @@ mod tests { async fn test_insert_multiple_vpcs_fails() { let context = TestContext::new("test_insert_multiple_vpcs_fails", 2).await; - let instance = - context.create_instance(external::InstanceState::Stopped).await; + let instance = context.create_stopped_instance().await; let interface = IncompleteNetworkInterface::new_instance( Uuid::new_v4(), instance.id(), @@ -2384,8 +2423,7 @@ mod tests { let context = TestContext::new("test_detect_ip_exhaustion", 2).await; let n_interfaces = context.net1.available_ipv4_addresses()[0]; for _ in 0..n_interfaces { - let instance = - context.create_instance(external::InstanceState::Stopped).await; + let instance = context.create_stopped_instance().await; let interface = IncompleteNetworkInterface::new_instance( Uuid::new_v4(), instance.id(), @@ -2443,8 +2481,7 @@ mod tests { let context = TestContext::new("test_insert_multiple_vpc_subnets_succeeds", 2) .await; - let instance = - context.create_instance(external::InstanceState::Stopped).await; + let instance = context.create_stopped_instance().await; for (i, subnet) in context.net1.subnets.iter().enumerate() { let interface = IncompleteNetworkInterface::new_instance( Uuid::new_v4(), @@ -2509,8 +2546,7 @@ mod tests { MAX_NICS as u8 + 1, ) .await; - let instance = - context.create_instance(external::InstanceState::Stopped).await; + let instance = context.create_stopped_instance().await; for slot in 0..MAX_NICS { let subnet = &context.net1.subnets[slot]; let interface = IncompleteNetworkInterface::new_instance( diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index f07ceae4a0..592e1f0492 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -19,9 +19,9 @@ use futures::{FutureExt, SinkExt, StreamExt}; use nexus_db_model::IpKind; use nexus_db_queries::authn; use nexus_db_queries::authz; -use nexus_db_queries::authz::ApiResource; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; +use nexus_db_queries::db::datastore::InstanceAndActiveVmm; use nexus_db_queries::db::identity::Resource; use nexus_db_queries::db::lookup; use nexus_db_queries::db::lookup::LookupPath; @@ -47,11 +47,11 @@ use propolis_client::support::InstanceSerialConsoleHelper; use propolis_client::support::WSClientOffset; use propolis_client::support::WebSocketStream; use sled_agent_client::types::InstanceMigrationSourceParams; +use sled_agent_client::types::InstanceMigrationTargetParams; +use sled_agent_client::types::InstanceProperties; use sled_agent_client::types::InstancePutMigrationIdsBody; use sled_agent_client::types::InstancePutStateBody; -use sled_agent_client::types::InstanceStateRequested; use sled_agent_client::types::SourceNatConfig; -use sled_agent_client::Client as SledAgentClient; use std::net::SocketAddr; use std::sync::Arc; use tokio::io::{AsyncRead, AsyncWrite}; @@ -59,9 +59,39 @@ use uuid::Uuid; const MAX_KEYS_PER_INSTANCE: u32 = 8; -pub(crate) enum WriteBackUpdatedInstance { - WriteBack, - Drop, +/// The kinds of state changes that can be requested of an instance's current +/// VMM (i.e. the VMM pointed to be the instance's `propolis_id` field). +pub(crate) enum InstanceStateChangeRequest { + Run, + Reboot, + Stop, + Migrate(InstanceMigrationTargetParams), +} + +impl From + for sled_agent_client::types::InstanceStateRequested +{ + fn from(value: InstanceStateChangeRequest) -> Self { + match value { + InstanceStateChangeRequest::Run => Self::Running, + InstanceStateChangeRequest::Reboot => Self::Reboot, + InstanceStateChangeRequest::Stop => Self::Stopped, + InstanceStateChangeRequest::Migrate(params) => { + Self::MigrationTarget(params) + } + } + } +} + +/// The actions that can be taken in response to an +/// [`InstanceStateChangeRequest`]. +enum InstanceStateChangeRequestAction { + /// The instance is already in the correct state, so no action is needed. + AlreadyDone, + + /// Request the appropriate state change from the sled with the specified + /// UUID. + SendToSled(Uuid), } impl super::Nexus { @@ -109,7 +139,7 @@ impl super::Nexus { opctx: &OpContext, project_lookup: &lookup::Project<'_>, params: ¶ms::InstanceCreate, - ) -> CreateResult { + ) -> CreateResult { let (.., authz_project) = project_lookup.lookup_for(authz::Action::CreateChild).await?; @@ -190,7 +220,7 @@ impl super::Nexus { }); } - // Reject instances where the memory is greated than the limit + // Reject instances where the memory is greater than the limit if params.memory.to_bytes() > MAX_MEMORY_BYTES_PER_INSTANCE { return Err(Error::InvalidValue { label: String::from("size"), @@ -221,46 +251,38 @@ impl super::Nexus { .map_err(|e| Error::internal_error(&format!("{:#}", &e))) .internal_context("looking up output from instance create saga")?; - // TODO-correctness TODO-robustness TODO-design It's not quite correct - // to take this instance id and look it up again. It's possible that - // it's been modified or even deleted since the saga executed. In that - // case, we might return a different state of the Instance than the one - // that the user created or even fail with a 404! Both of those are - // wrong behavior -- we should be returning the very instance that the - // user created. - // - // How can we fix this? Right now we have internal representations like - // Instance and analaogous end-user-facing representations like - // Instance. The former is not even serializable. The saga - // _could_ emit the View version, but that's not great for two (related) - // reasons: (1) other sagas might want to provision instances and get - // back the internal representation to do other things with the - // newly-created instance, and (2) even within a saga, it would be - // useful to pass a single Instance representation along the saga, - // but they probably would want the internal representation, not the - // view. - // - // The saga could emit an Instance directly. Today, Instance - // etc. aren't supposed to even be serializable -- we wanted to be able - // to have other datastore state there if needed. We could have a third - // InstanceInternalView...but that's starting to feel pedantic. We - // could just make Instance serializable, store that, and call it a - // day. Does it matter that we might have many copies of the same - // objects in memory? - // - // If we make these serializable, it would be nice if we could leverage - // the type system to ensure that we never accidentally send them out a - // dropshot endpoint. (On the other hand, maybe we _do_ want to do - // that, for internal interfaces! Can we do this on a - // per-dropshot-server-basis?) + // If the caller asked to start the instance, kick off that saga. + // There's a window in which the instance is stopped and can be deleted, + // so this is not guaranteed to succeed, and its result should not + // affect the result of the attempt to create the instance. + if params.start { + let lookup = LookupPath::new(opctx, &self.db_datastore) + .instance_id(instance_id); + + let start_result = self.instance_start(opctx, &lookup).await; + if let Err(e) = start_result { + info!(self.log, "failed to start newly-created instance"; + "instance_id" => %instance_id, + "error" => ?e); + } + } + + // TODO: This operation should return the instance as it was created. + // Refetching the instance state here won't return that version of the + // instance if its state changed between the time the saga finished and + // the time this lookup was performed. // - // TODO Even worse, post-authz, we do two lookups here instead of one. - // Maybe sagas should be able to emit `authz::Instance`-type objects. - let (.., db_instance) = LookupPath::new(opctx, &self.db_datastore) + // Because the create saga has to synthesize an instance record (and + // possibly a VMM record), and these are serializable, it should be + // possible to yank the outputs out of the appropriate saga steps and + // return them here. + + let (.., authz_instance) = LookupPath::new(opctx, &self.db_datastore) .instance_id(instance_id) - .fetch() + .lookup_for(authz::Action::Read) .await?; - Ok(db_instance) + + self.db_datastore.instance_fetch_with_vmm(opctx, &authz_instance).await } pub(crate) async fn instance_list( @@ -268,7 +290,7 @@ impl super::Nexus { opctx: &OpContext, project_lookup: &lookup::Project<'_>, pagparams: &PaginatedBy<'_>, - ) -> ListResultVec { + ) -> ListResultVec { let (.., authz_project) = project_lookup.lookup_for(authz::Action::ListChildren).await?; self.db_datastore.instance_list(opctx, &authz_project, pagparams).await @@ -314,30 +336,40 @@ impl super::Nexus { opctx: &OpContext, instance_lookup: &lookup::Instance<'_>, params: params::InstanceMigrate, - ) -> UpdateResult { - let (.., authz_instance, db_instance) = - instance_lookup.fetch_for(authz::Action::Modify).await?; + ) -> UpdateResult { + let (.., authz_instance) = + instance_lookup.lookup_for(authz::Action::Modify).await?; + + let state = self + .db_datastore + .instance_fetch_with_vmm(opctx, &authz_instance) + .await?; + let (instance, vmm) = (state.instance(), state.vmm()); - if db_instance.runtime().state.0 != InstanceState::Running { + if vmm.is_none() + || vmm.as_ref().unwrap().runtime.state.0 != InstanceState::Running + { return Err(Error::invalid_request( "instance must be running before it can migrate", )); } - if db_instance.runtime().sled_id == params.dst_sled_id { + let vmm = vmm.as_ref().unwrap(); + if vmm.sled_id == params.dst_sled_id { return Err(Error::invalid_request( "instance is already running on destination sled", )); } - if db_instance.runtime().migration_id.is_some() { + if instance.runtime().migration_id.is_some() { return Err(Error::unavail("instance is already migrating")); } // Kick off the migration saga let saga_params = sagas::instance_migrate::Params { serialized_authn: authn::saga::Serialized::for_opctx(opctx), - instance: db_instance, + instance: instance.clone(), + src_vmm: vmm.clone(), migrate_params: params, }; self.execute_saga::( @@ -348,7 +380,7 @@ impl super::Nexus { // TODO correctness TODO robustness TODO design // Should we lookup the instance again here? // See comment in project_create_instance. - self.db_datastore.instance_refetch(opctx, &authz_instance).await + self.db_datastore.instance_fetch_with_vmm(opctx, &authz_instance).await } /// Attempts to set the migration IDs for the supplied instance via the @@ -370,23 +402,24 @@ impl super::Nexus { &self, opctx: &OpContext, instance_id: Uuid, - db_instance: &db::model::Instance, + sled_id: Uuid, + prev_instance_runtime: &db::model::InstanceRuntimeState, migration_params: InstanceMigrationSourceParams, ) -> UpdateResult { - assert!(db_instance.runtime().migration_id.is_none()); - assert!(db_instance.runtime().dst_propolis_id.is_none()); + assert!(prev_instance_runtime.migration_id.is_none()); + assert!(prev_instance_runtime.dst_propolis_id.is_none()); let (.., authz_instance) = LookupPath::new(opctx, &self.db_datastore) .instance_id(instance_id) .lookup_for(authz::Action::Modify) .await?; - let sa = self.instance_sled(&db_instance).await?; + let sa = self.sled_client(&sled_id).await?; let instance_put_result = sa .instance_put_migration_ids( &instance_id, &InstancePutMigrationIdsBody { - old_runtime: db_instance.runtime().clone().into(), + old_runtime: prev_instance_runtime.clone().into(), migration_params: Some(migration_params), }, ) @@ -397,8 +430,12 @@ impl super::Nexus { // outright fails, this operation fails. If the operation nominally // succeeds but nothing was updated, this action is outdated and the // caller should not proceed with migration. - let updated = self - .handle_instance_put_result(&db_instance, instance_put_result) + let (updated, _) = self + .handle_instance_put_result( + &instance_id, + prev_instance_runtime, + instance_put_result.map(|state| state.map(Into::into)), + ) .await?; if updated { @@ -431,25 +468,30 @@ impl super::Nexus { pub(crate) async fn instance_clear_migration_ids( &self, instance_id: Uuid, - db_instance: &db::model::Instance, + sled_id: Uuid, + prev_instance_runtime: &db::model::InstanceRuntimeState, ) -> Result<(), Error> { - assert!(db_instance.runtime().migration_id.is_some()); - assert!(db_instance.runtime().dst_propolis_id.is_some()); + assert!(prev_instance_runtime.migration_id.is_some()); + assert!(prev_instance_runtime.dst_propolis_id.is_some()); - let sa = self.instance_sled(&db_instance).await?; + let sa = self.sled_client(&sled_id).await?; let instance_put_result = sa .instance_put_migration_ids( &instance_id, &InstancePutMigrationIdsBody { - old_runtime: db_instance.runtime().clone().into(), + old_runtime: prev_instance_runtime.clone().into(), migration_params: None, }, ) .await .map(|res| Some(res.into_inner())); - self.handle_instance_put_result(&db_instance, instance_put_result) - .await?; + self.handle_instance_put_result( + &instance_id, + prev_instance_runtime, + instance_put_result.map(|state| state.map(Into::into)), + ) + .await?; Ok(()) } @@ -459,16 +501,24 @@ impl super::Nexus { &self, opctx: &OpContext, instance_lookup: &lookup::Instance<'_>, - ) -> UpdateResult { - let (.., authz_instance, db_instance) = instance_lookup.fetch().await?; + ) -> UpdateResult { + let (.., authz_instance) = + instance_lookup.lookup_for(authz::Action::Modify).await?; + + let state = self + .db_datastore + .instance_fetch_with_vmm(opctx, &authz_instance) + .await?; + self.instance_request_state( opctx, &authz_instance, - &db_instance, - InstanceStateRequested::Reboot, + state.instance(), + state.vmm(), + InstanceStateChangeRequest::Reboot, ) .await?; - self.db_datastore.instance_refetch(opctx, &authz_instance).await + self.db_datastore.instance_fetch_with_vmm(opctx, &authz_instance).await } /// Attempts to start an instance if it is currently stopped. @@ -476,42 +526,53 @@ impl super::Nexus { self: &Arc, opctx: &OpContext, instance_lookup: &lookup::Instance<'_>, - ) -> UpdateResult { - let (.., authz_instance, db_instance) = - instance_lookup.fetch_for(authz::Action::Modify).await?; + ) -> UpdateResult { + let (.., authz_instance) = + instance_lookup.lookup_for(authz::Action::Modify).await?; - // If the instance is already starting or running, succeed immediately - // for idempotency. If the instance is stopped, try to start it. In all - // other cases return an error describing the state conflict. - // - // The "Creating" state is not permitted here (even though a request to - // create can include a request to start the instance) because an - // instance that is still being created may not be ready to start yet - // (e.g. its disks may not yet be attached). - // - // If the instance is stopped, the start saga will try to change the - // instance's state to Starting and increment the instance's state - // generation number. If this increment fails (because someone else has - // changed the state), the saga fails. See the saga comments for more - // details on how this synchronization works. - match db_instance.runtime_state.state.0 { - InstanceState::Starting | InstanceState::Running => { - return Ok(db_instance) - } - InstanceState::Stopped => {} - _ => { - return Err(Error::conflict(&format!( - "instance is in state {} but must be {} to be started", - db_instance.runtime_state.state.0, - InstanceState::Stopped - ))) + let state = self + .db_datastore + .instance_fetch_with_vmm(opctx, &authz_instance) + .await?; + let (instance, vmm) = (state.instance(), state.vmm()); + + if let Some(vmm) = vmm { + match vmm.runtime.state.0 { + InstanceState::Starting + | InstanceState::Running + | InstanceState::Rebooting => { + debug!(self.log, "asked to start an active instance"; + "instance_id" => %authz_instance.id()); + + return Ok(state); + } + InstanceState::Stopped => { + let propolis_id = instance + .runtime() + .propolis_id + .expect("needed a VMM ID to fetch a VMM record"); + error!(self.log, + "instance is stopped but still has an active VMM"; + "instance_id" => %authz_instance.id(), + "propolis_id" => %propolis_id); + + return Err(Error::internal_error( + "instance is stopped but still has an active VMM", + )); + } + _ => { + return Err(Error::conflict(&format!( + "instance is in state {} but must be {} to be started", + vmm.runtime.state.0, + InstanceState::Stopped + ))); + } } } let saga_params = sagas::instance_start::Params { serialized_authn: authn::saga::Serialized::for_opctx(opctx), - instance: db_instance, - ensure_network: true, + db_instance: instance.clone(), }; self.execute_saga::( @@ -519,7 +580,7 @@ impl super::Nexus { ) .await?; - self.db_datastore.instance_refetch(opctx, &authz_instance).await + self.db_datastore.instance_fetch_with_vmm(opctx, &authz_instance).await } /// Make sure the given Instance is stopped. @@ -527,16 +588,25 @@ impl super::Nexus { &self, opctx: &OpContext, instance_lookup: &lookup::Instance<'_>, - ) -> UpdateResult { - let (.., authz_instance, db_instance) = instance_lookup.fetch().await?; + ) -> UpdateResult { + let (.., authz_instance) = + instance_lookup.lookup_for(authz::Action::Modify).await?; + + let state = self + .db_datastore + .instance_fetch_with_vmm(opctx, &authz_instance) + .await?; + self.instance_request_state( opctx, &authz_instance, - &db_instance, - InstanceStateRequested::Stopped, + state.instance(), + state.vmm(), + InstanceStateChangeRequest::Stop, ) .await?; - self.db_datastore.instance_refetch(opctx, &authz_instance).await + + self.db_datastore.instance_fetch_with_vmm(opctx, &authz_instance).await } /// Idempotently ensures that the sled specified in `db_instance` does not @@ -546,76 +616,165 @@ impl super::Nexus { &self, opctx: &OpContext, authz_instance: &authz::Instance, - db_instance: &db::model::Instance, - write_back: WriteBackUpdatedInstance, + sled_id: &Uuid, + prev_instance_runtime: &db::model::InstanceRuntimeState, ) -> Result<(), Error> { opctx.authorize(authz::Action::Modify, authz_instance).await?; - let sa = self.instance_sled(&db_instance).await?; + let sa = self.sled_client(&sled_id).await?; let result = sa - .instance_unregister(&db_instance.id()) + .instance_unregister(&authz_instance.id()) .await .map(|res| res.into_inner().updated_runtime); - match write_back { - WriteBackUpdatedInstance::WriteBack => self - .handle_instance_put_result(db_instance, result) - .await - .map(|_| ()), - WriteBackUpdatedInstance::Drop => { - result?; - Ok(()) - } - } + self.handle_instance_put_result( + &authz_instance.id(), + prev_instance_runtime, + result.map(|state| state.map(Into::into)), + ) + .await + .map(|_| ()) } - /// Returns the SledAgentClient for the host where this Instance is running. - pub(crate) async fn instance_sled( + /// Determines the action to take on an instance's active VMM given a + /// request to change its state. + /// + /// # Arguments + /// + /// - instance_state: The prior state of the instance as recorded in CRDB + /// and obtained by the caller. + /// - vmm_state: The prior state of the instance's active VMM as recorded in + /// CRDB and obtained by the caller. `None` if the instance has no active + /// VMM. + /// - requested: The state change being requested. + /// + /// # Return value + /// + /// - `Ok(action)` if the request is allowed to proceed. The result payload + /// specifies how to handle the request. + /// - `Err` if the request should be denied. + fn select_runtime_change_action( &self, - instance: &db::model::Instance, - ) -> Result, Error> { - let sa_id = &instance.runtime().sled_id; - self.sled_client(&sa_id).await - } + instance_state: &db::model::Instance, + vmm_state: &Option, + requested: &InstanceStateChangeRequest, + ) -> Result { + let effective_state = if let Some(vmm) = vmm_state { + vmm.runtime.state.0 + } else { + instance_state.runtime().nexus_state.0 + }; - fn check_runtime_change_allowed( - &self, - runtime: &nexus::InstanceRuntimeState, - requested: &InstanceStateRequested, - ) -> Result<(), Error> { - // Users are allowed to request a start or stop even if the instance is - // already in the desired state (or moving to it), and we will issue a - // request to the SA to make the state change in these cases in case the - // runtime state we saw here was stale. - // - // Users cannot change the state of a failed or destroyed instance. - // TODO(#2825): Failed instances should be allowed to stop. - // - // Migrating instances can't change state until they're done migrating, - // but for idempotency, a request to make an incarnation of an instance - // into a migration target is allowed if the incarnation is already a - // migration target. - let allowed = match runtime.run_state { - InstanceState::Creating => true, - InstanceState::Starting => true, - InstanceState::Running => true, - InstanceState::Stopping => true, - InstanceState::Stopped => true, - InstanceState::Rebooting => true, - InstanceState::Migrating => { - matches!(requested, InstanceStateRequested::MigrationTarget(_)) + // Requests that operate on active instances have to be directed to the + // instance's current sled agent. If there is none, the request needs to + // be handled specially based on its type. + let sled_id = if let Some(vmm) = vmm_state { + vmm.sled_id + } else { + match effective_state { + // If there's no active sled because the instance is stopped, + // allow requests to stop to succeed silently for idempotency, + // but reject requests to do anything else. + InstanceState::Stopped => match requested { + InstanceStateChangeRequest::Run => { + return Err(Error::invalid_request(&format!( + "cannot run an instance in state {} with no VMM", + effective_state + ))) + } + InstanceStateChangeRequest::Stop => { + return Ok(InstanceStateChangeRequestAction::AlreadyDone); + } + InstanceStateChangeRequest::Reboot => { + return Err(Error::invalid_request(&format!( + "cannot reboot an instance in state {} with no VMM", + effective_state + ))) + } + InstanceStateChangeRequest::Migrate(_) => { + return Err(Error::invalid_request(&format!( + "cannot migrate an instance in state {} with no VMM", + effective_state + ))) + } + }, + + // If the instance is still being created (such that it hasn't + // even begun to start yet), no runtime state change is valid. + // Return a specific error message explaining the problem. + InstanceState::Creating => { + return Err(Error::invalid_request( + "cannot change instance state while it is \ + still being created" + )) + } + + // If the instance has no sled beacuse it's been destroyed or + // has fallen over, reject the state change. + // + // TODO(#2825): Failed instances should be allowed to stop, but + // this requires a special action because there is no sled to + // send the request to. + InstanceState::Failed | InstanceState::Destroyed => { + return Err(Error::invalid_request(&format!( + "instance state cannot be changed from {}", + effective_state + ))) + } + + // In other states, the instance should have a sled, and an + // internal invariant has been violated if it doesn't have one. + _ => { + error!(self.log, "instance has no sled but isn't halted"; + "instance_id" => %instance_state.id(), + "state" => ?effective_state); + + return Err(Error::internal_error( + "instance is active but not resident on a sled" + )); + } } - InstanceState::Repairing => false, - InstanceState::Failed => false, - InstanceState::Destroyed => false, + }; + + // The instance has an active sled. Allow the sled agent to decide how + // to handle the request unless the instance is being recovered or the + // underlying VMM has been destroyed. + // + // TODO(#2825): Failed instances should be allowed to stop. See above. + let allowed = match requested { + InstanceStateChangeRequest::Run + | InstanceStateChangeRequest::Reboot + | InstanceStateChangeRequest::Stop => match effective_state { + InstanceState::Creating + | InstanceState::Starting + | InstanceState::Running + | InstanceState::Stopping + | InstanceState::Stopped + | InstanceState::Rebooting + | InstanceState::Migrating => true, + InstanceState::Repairing | InstanceState::Failed => false, + InstanceState::Destroyed => false, + }, + InstanceStateChangeRequest::Migrate(_) => match effective_state { + InstanceState::Running + | InstanceState::Rebooting + | InstanceState::Migrating => true, + InstanceState::Creating + | InstanceState::Starting + | InstanceState::Stopping + | InstanceState::Stopped + | InstanceState::Repairing + | InstanceState::Failed + | InstanceState::Destroyed => false, + }, }; if allowed { - Ok(()) + Ok(InstanceStateChangeRequestAction::SendToSled(sled_id)) } else { Err(Error::InvalidRequest { message: format!( "instance state cannot be changed from state \"{}\"", - runtime.run_state + effective_state ), }) } @@ -625,27 +784,39 @@ impl super::Nexus { &self, opctx: &OpContext, authz_instance: &authz::Instance, - db_instance: &db::model::Instance, - requested: InstanceStateRequested, + prev_instance_state: &db::model::Instance, + prev_vmm_state: &Option, + requested: InstanceStateChangeRequest, ) -> Result<(), Error> { opctx.authorize(authz::Action::Modify, authz_instance).await?; - self.check_runtime_change_allowed( - &db_instance.runtime().clone().into(), - &requested, - )?; + let instance_id = authz_instance.id(); - let sa = self.instance_sled(&db_instance).await?; - let instance_put_result = sa - .instance_put_state( - &db_instance.id(), - &InstancePutStateBody { state: requested }, - ) - .await - .map(|res| res.into_inner().updated_runtime); + match self.select_runtime_change_action( + prev_instance_state, + prev_vmm_state, + &requested, + )? { + InstanceStateChangeRequestAction::AlreadyDone => Ok(()), + InstanceStateChangeRequestAction::SendToSled(sled_id) => { + let sa = self.sled_client(&sled_id).await?; + let instance_put_result = sa + .instance_put_state( + &instance_id, + &InstancePutStateBody { state: requested.into() }, + ) + .await + .map(|res| res.into_inner().updated_runtime) + .map(|state| state.map(Into::into)); - self.handle_instance_put_result(db_instance, instance_put_result) - .await - .map(|_| ()) + self.handle_instance_put_result( + &instance_id, + prev_instance_state.runtime(), + instance_put_result, + ) + .await + .map(|_| ()) + } + } } /// Modifies the runtime state of the Instance as requested. This generally @@ -655,6 +826,8 @@ impl super::Nexus { opctx: &OpContext, authz_instance: &authz::Instance, db_instance: &db::model::Instance, + propolis_id: &Uuid, + initial_vmm: &db::model::Vmm, ) -> Result<(), Error> { opctx.authorize(authz::Action::Modify, authz_instance).await?; @@ -684,7 +857,7 @@ impl super::Nexus { error!(self.log, "attached disk has no PCI slot assignment"; "disk_id" => %disk.id(), "disk_name" => disk.name().to_string(), - "instance" => ?disk.runtime_state.attach_instance_id); + "instance_id" => ?disk.runtime_state.attach_instance_id); return Err(Error::internal_error(&format!( "disk {} is attached but has no PCI slot assignment", @@ -805,9 +978,11 @@ impl super::Nexus { // beat us to it. let instance_hardware = sled_agent_client::types::InstanceHardware { - runtime: sled_agent_client::types::InstanceRuntimeState::from( - db_instance.runtime().clone(), - ), + properties: InstanceProperties { + ncpus: db_instance.ncpus.into(), + memory: db_instance.memory.into(), + hostname: db_instance.hostname.clone(), + }, nics, source_nat, external_ips, @@ -819,21 +994,32 @@ impl super::Nexus { )), }; - let sa = self.instance_sled(&db_instance).await?; - + let sa = self.sled_client(&initial_vmm.sled_id).await?; let instance_register_result = sa .instance_register( &db_instance.id(), &sled_agent_client::types::InstanceEnsureBody { - initial: instance_hardware, + hardware: instance_hardware, + instance_runtime: db_instance.runtime().clone().into(), + vmm_runtime: initial_vmm.clone().into(), + propolis_id: *propolis_id, + propolis_addr: SocketAddr::new( + initial_vmm.propolis_ip.ip(), + PROPOLIS_PORT, + ) + .to_string(), }, ) .await .map(|res| Some(res.into_inner())); - self.handle_instance_put_result(db_instance, instance_register_result) - .await - .map(|_| ()) + self.handle_instance_put_result( + &db_instance.id(), + db_instance.runtime(), + instance_register_result.map(|state| state.map(Into::into)), + ) + .await + .map(|_| ()) } /// Updates an instance's CRDB record based on the result of a call to sled @@ -860,34 +1046,38 @@ impl super::Nexus { /// error while trying to update CRDB. async fn handle_instance_put_result( &self, - db_instance: &db::model::Instance, + instance_id: &Uuid, + prev_instance_runtime: &db::model::InstanceRuntimeState, result: Result< - Option, + Option, sled_agent_client::Error, >, - ) -> Result { + ) -> Result<(bool, bool), Error> { slog::debug!(&self.log, "Handling sled agent instance PUT result"; + "instance_id" => %instance_id, "result" => ?result); match result { - Ok(Some(new_runtime)) => { - let new_runtime: nexus::InstanceRuntimeState = - new_runtime.into(); - + Ok(Some(new_state)) => { let update_result = self .db_datastore - .instance_update_runtime( - &db_instance.id(), - &new_runtime.into(), + .instance_and_vmm_update_runtime( + instance_id, + &new_state.instance_state.into(), + &new_state.propolis_id, + &new_state.vmm_state.into(), ) .await; slog::debug!(&self.log, "Attempted DB update after instance PUT"; + "instance_id" => %instance_id, + "propolis_id" => %new_state.propolis_id, "result" => ?update_result); + update_result } - Ok(None) => Ok(false), + Ok(None) => Ok((false, false)), Err(e) => { // The sled-agent has told us that it can't do what we // requested, but does that mean a failure? One example would be @@ -898,13 +1088,15 @@ impl super::Nexus { // // Without a richer error type, let the sled-agent tell Nexus // what to do with status codes. - error!(self.log, "saw {} from instance_put!", e); + error!(self.log, "received error from instance PUT"; + "instance_id" => %instance_id, + "error" => ?e); // Convert to the Omicron API error type. // - // N.B. The match below assumes that this conversion will turn - // any 400-level error status from sled agent into an - // `Error::InvalidRequest`. + // TODO(#3238): This is an extremely lossy conversion: if the + // operation failed without getting a response from sled agent, + // this unconditionally converts to Error::InternalError. let e = e.into(); match &e { @@ -914,28 +1106,41 @@ impl super::Nexus { // Internal server error (or anything else) should change // the instance state to failed, we don't know what state // the instance is in. + // + // TODO(#4226): This logic needs to be revisited: + // - Some errors that don't get classified as + // Error::InvalidRequest (timeouts, disconnections due to + // network weather, etc.) are not necessarily fatal to the + // instance and shouldn't mark it as Failed. + // - If the instance still has a running VMM, this operation + // won't terminate it or reclaim its resources. (The + // resources will be reclaimed if the sled later reports + // that the VMM is gone, however.) _ => { let new_runtime = db::model::InstanceRuntimeState { - state: db::model::InstanceState::new( + nexus_state: db::model::InstanceState::new( InstanceState::Failed, ), - gen: db_instance.runtime_state.gen.next().into(), - ..db_instance.runtime_state.clone() + + // TODO(#4226): Clearing the Propolis ID is required + // to allow the instance to be deleted, but this + // doesn't actually terminate the VMM (see above). + propolis_id: None, + gen: prev_instance_runtime.gen.next().into(), + ..prev_instance_runtime.clone() }; // XXX what if this fails? let result = self .db_datastore - .instance_update_runtime( - &db_instance.id(), - &new_runtime, - ) + .instance_update_runtime(&instance_id, &new_runtime) .await; error!( self.log, - "saw {:?} from setting InstanceState::Failed after bad instance_put", - result, + "attempted to set instance to Failed after bad put"; + "instance_id" => %instance_id, + "result" => ?result, ); Err(e) @@ -983,10 +1188,11 @@ impl super::Nexus { .await?; // TODO-v1: Write test to verify this case - // Because both instance and disk can be provided by ID it's possible for someone - // to specify resources from different projects. The lookups would resolve the resources - // (assuming the user had sufficient permissions on both) without verifying the shared hierarchy. - // To mitigate that we verify that their parent projects have the same ID. + // Because both instance and disk can be provided by ID it's possible + // for someone to specify resources from different projects. The lookups + // would resolve the resources (assuming the user had sufficient + // permissions on both) without verifying the shared hierarchy. To + // mitigate that we verify that their parent projects have the same ID. if authz_project.id() != authz_project_disk.id() { return Err(Error::InvalidRequest { message: "disk must be in the same project as the instance" @@ -1066,91 +1272,111 @@ impl super::Nexus { pub(crate) async fn notify_instance_updated( &self, opctx: &OpContext, - id: &Uuid, - new_runtime_state: &nexus::InstanceRuntimeState, + instance_id: &Uuid, + new_runtime_state: &nexus::SledInstanceState, ) -> Result<(), Error> { let log = &self.log; + let propolis_id = new_runtime_state.propolis_id; - slog::debug!(log, "received new runtime state from sled agent"; - "instance_id" => %id, - "runtime_state" => ?new_runtime_state); + info!(log, "received new runtime state from sled agent"; + "instance_id" => %instance_id, + "instance_state" => ?new_runtime_state.instance_state, + "propolis_id" => %propolis_id, + "vmm_state" => ?new_runtime_state.vmm_state); - // If the new state has a newer Propolis ID generation than the current - // instance state in CRDB, notify interested parties of this change. + // Update OPTE and Dendrite if the instance's active sled assignment + // changed or a migration was retired. If these actions fail, sled agent + // is expected to retry this update. // - // The synchronization rules here are as follows: + // This configuration must be updated before updating any state in CRDB + // so that, if the instance was migrating or has shut down, it will not + // appear to be able to migrate or start again until the appropriate + // networking state has been written. Without this interlock, another + // thread or another Nexus can race with this routine to write + // conflicting configuration. // - // - Sled agents own an instance's runtime state while an instance is - // running on a sled. Each sled agent prevents concurrent conflicting - // Propolis identifier updates from being sent until previous updates - // are processed. - // - Operations that can dispatch an instance to a brand-new sled (e.g. - // live migration) can only start if the appropriate instance runtime - // state fields are cleared in CRDB. For example, while a live - // migration is in progress, the instance's `migration_id` field will - // be non-NULL, and a new migration cannot start until it is cleared. - // This routine must notify recipients before writing new records - // back to CRDB so that these "locks" remain held until all - // notifications have been sent. Otherwise, Nexus might allow new - // operations to proceed that will produce system updates that might - // race with this one. - // - This work is not done in a saga. The presumption is instead that - // if any of these operations fail, the entire update will fail, and - // sled agent will retry the update. Unwinding on failure isn't needed - // because (a) any partially-applied configuration is correct - // configuration, (b) if the instance is migrating, it can't migrate - // again until this routine successfully updates configuration and - // writes an update back to CRDB, and (c) sled agent won't process any - // new instance state changes (e.g. a change that stops an instance) - // until this state change is successfully committed. - let (.., db_instance) = LookupPath::new(&opctx, &self.db_datastore) - .instance_id(*id) - .fetch_for(authz::Action::Read) - .await?; + // In the future, this should be replaced by a call to trigger a + // networking state update RPW. + let (.., authz_instance, db_instance) = + LookupPath::new(&opctx, &self.db_datastore) + .instance_id(*instance_id) + .fetch() + .await?; - if new_runtime_state.propolis_gen > *db_instance.runtime().propolis_gen - { - self.handle_instance_propolis_gen_change( - opctx, - new_runtime_state, - &db_instance, - ) - .await?; - } + self.ensure_updated_instance_network_config( + opctx, + &authz_instance, + db_instance.runtime(), + &new_runtime_state.instance_state, + ) + .await?; + // Write the new instance and VMM states back to CRDB. This needs to be + // done before trying to clean up the VMM, since the datastore will only + // allow a VMM to be marked as deleted if it is already in a terminal + // state. let result = self .db_datastore - .instance_update_runtime(id, &(new_runtime_state.clone().into())) + .instance_and_vmm_update_runtime( + instance_id, + &db::model::InstanceRuntimeState::from( + new_runtime_state.instance_state.clone(), + ), + &propolis_id, + &db::model::VmmRuntimeState::from( + new_runtime_state.vmm_state.clone(), + ), + ) .await; - match result { - Ok(true) => { - info!(log, "instance updated by sled agent"; - "instance_id" => %id, - "propolis_id" => %new_runtime_state.propolis_id, - "new_state" => %new_runtime_state.run_state); - Ok(()) + // If the VMM is now in a terminal state, make sure its resources get + // cleaned up. + if let Ok((_, true)) = result { + let propolis_terminated = matches!( + new_runtime_state.vmm_state.state, + InstanceState::Destroyed | InstanceState::Failed + ); + + if propolis_terminated { + info!(log, "vmm is terminated, cleaning up resources"; + "instance_id" => %instance_id, + "propolis_id" => %propolis_id); + + self.db_datastore + .sled_reservation_delete(opctx, propolis_id) + .await?; + + if !self + .db_datastore + .vmm_mark_deleted(opctx, &propolis_id) + .await? + { + warn!(log, "failed to mark vmm record as deleted"; + "instance_id" => %instance_id, + "propolis_id" => %propolis_id, + "vmm_state" => ?new_runtime_state.vmm_state); + } } + } - Ok(false) => { - info!(log, "instance update from sled agent ignored (old)"; - "instance_id" => %id, - "propolis_id" => %new_runtime_state.propolis_id, - "requested_state" => %new_runtime_state.run_state); + match result { + Ok((instance_updated, vmm_updated)) => { + info!(log, "instance and vmm updated by sled agent"; + "instance_id" => %instance_id, + "propolis_id" => %propolis_id, + "instance_updated" => instance_updated, + "vmm_updated" => vmm_updated); Ok(()) } - // If the instance doesn't exist, swallow the error -- there's - // nothing to do here. - // TODO-robustness This could only be possible if we've removed an - // Instance from the datastore altogether. When would we do that? - // We don't want to do it as soon as something's destroyed, I think, - // and in that case, we'd need some async task for cleaning these - // up. + // The update command should swallow object-not-found errors and + // return them back as failures to update, so this error case is + // unexpected. There's no work to do if this occurs, however. Err(Error::ObjectNotFound { .. }) => { - warn!(log, "non-existent instance updated by sled agent"; - "instance_id" => %id, - "new_state" => %new_runtime_state.run_state); + error!(log, "instance/vmm update unexpectedly returned \ + an object not found error"; + "instance_id" => %instance_id, + "propolis_id" => %propolis_id); Ok(()) } @@ -1160,83 +1386,28 @@ impl super::Nexus { // different from Error with an Into. Err(error) => { warn!(log, "failed to update instance from sled agent"; - "instance_id" => %id, - "new_state" => %new_runtime_state.run_state, - "error" => ?error); + "instance_id" => %instance_id, + "propolis_id" => %propolis_id, + "error" => ?error); Err(error) } } } - async fn handle_instance_propolis_gen_change( - &self, - opctx: &OpContext, - new_runtime: &nexus::InstanceRuntimeState, - db_instance: &nexus_db_model::Instance, - ) -> Result<(), Error> { - let log = &self.log; - let instance_id = db_instance.id(); - - info!(log, - "updating configuration after Propolis generation change"; - "instance_id" => %instance_id, - "new_sled_id" => %new_runtime.sled_id, - "old_sled_id" => %db_instance.runtime().sled_id); - - // Push updated V2P mappings to all interested sleds. This needs to be - // done irrespective of whether the sled ID actually changed, because - // merely creating the target Propolis on the target sled will create - // XDE devices for its NICs, and creating an XDE device for a virtual IP - // creates a V2P mapping that maps that IP to that sled. This is fine if - // migration succeeded, but if it failed, the instance is running on the - // source sled, and the incorrect mapping needs to be replaced. - // - // TODO(#3107): When XDE no longer creates mappings implicitly, this - // can be restricted to cases where an instance's sled has actually - // changed. - self.create_instance_v2p_mappings( - opctx, - instance_id, - new_runtime.sled_id, - ) - .await?; - - let (.., sled) = LookupPath::new(opctx, &self.db_datastore) - .sled_id(new_runtime.sled_id) - .fetch() - .await?; - - let boundary_switches = - self.boundary_switches(&self.opctx_alloc).await?; - - for switch in &boundary_switches { - let dpd_client = self.dpd_clients.get(switch).ok_or_else(|| { - Error::internal_error(&format!( - "could not find dpd client for {switch}" - )) - })?; - self.instance_ensure_dpd_config( - opctx, - db_instance.id(), - &sled.address(), - None, - dpd_client, - ) - .await?; - } - - Ok(()) - } - /// Returns the requested range of serial console output bytes, /// provided they are still in the propolis-server's cache. pub(crate) async fn instance_serial_console_data( &self, + opctx: &OpContext, instance_lookup: &lookup::Instance<'_>, params: ¶ms::InstanceSerialConsoleRequest, ) -> Result { let client = self - .propolis_client_for_instance(instance_lookup, authz::Action::Read) + .propolis_client_for_instance( + opctx, + instance_lookup, + authz::Action::Read, + ) .await?; let mut request = client.instance_serial_history_get(); if let Some(max_bytes) = params.max_bytes { @@ -1251,10 +1422,12 @@ impl super::Nexus { let data = request .send() .await - .map_err(|_| { - Error::internal_error( - "websocket connection to instance's serial port failed", - ) + .map_err(|e| { + Error::internal_error(&format!( + "websocket connection to instance's serial port failed: \ + {:?}", + e, + )) })? .into_inner(); Ok(params::InstanceSerialConsoleData { @@ -1265,12 +1438,17 @@ impl super::Nexus { pub(crate) async fn instance_serial_console_stream( &self, + opctx: &OpContext, mut client_stream: WebSocketStream, instance_lookup: &lookup::Instance<'_>, params: ¶ms::InstanceSerialConsoleStreamRequest, ) -> Result<(), Error> { let client_addr = match self - .propolis_addr_for_instance(instance_lookup, authz::Action::Modify) + .propolis_addr_for_instance( + opctx, + instance_lookup, + authz::Action::Modify, + ) .await { Ok(x) => x, @@ -1322,48 +1500,64 @@ impl super::Nexus { async fn propolis_addr_for_instance( &self, + opctx: &OpContext, instance_lookup: &lookup::Instance<'_>, action: authz::Action, ) -> Result { - let (.., authz_instance, instance) = - instance_lookup.fetch_for(action).await?; - match instance.runtime_state.state.0 { - InstanceState::Running - | InstanceState::Rebooting - | InstanceState::Migrating - | InstanceState::Repairing => { - let ip_addr = instance - .runtime_state - .propolis_ip - .ok_or_else(|| { - Error::internal_error( - "instance's hypervisor IP address not found", - ) - })? - .ip(); - Ok(SocketAddr::new(ip_addr, PROPOLIS_PORT)) + let (.., authz_instance) = instance_lookup.lookup_for(action).await?; + + let state = self + .db_datastore + .instance_fetch_with_vmm(opctx, &authz_instance) + .await?; + + let (instance, vmm) = (state.instance(), state.vmm()); + if let Some(vmm) = vmm { + match vmm.runtime.state.0 { + InstanceState::Running + | InstanceState::Rebooting + | InstanceState::Migrating + | InstanceState::Repairing => { + Ok(SocketAddr::new(vmm.propolis_ip.ip(), PROPOLIS_PORT)) + } + InstanceState::Creating + | InstanceState::Starting + | InstanceState::Stopping + | InstanceState::Stopped + | InstanceState::Failed => Err(Error::ServiceUnavailable { + internal_message: format!( + "cannot connect to serial console of instance in state \ + {:?}", + vmm.runtime.state.0 + ), + }), + InstanceState::Destroyed => Err(Error::ServiceUnavailable { + internal_message: format!( + "cannot connect to serial console of instance in state \ + {:?}", + InstanceState::Stopped), + }), } - InstanceState::Creating - | InstanceState::Starting - | InstanceState::Stopping - | InstanceState::Stopped - | InstanceState::Failed => Err(Error::ServiceUnavailable { + } else { + Err(Error::ServiceUnavailable { internal_message: format!( - "Cannot connect to hypervisor of instance in state {:?}", - instance.runtime_state.state - ), - }), - InstanceState::Destroyed => Err(authz_instance.not_found()), + "instance is in state {:?} and has no active serial console \ + server", + instance.runtime().nexus_state + ) + }) } } async fn propolis_client_for_instance( &self, + opctx: &OpContext, instance_lookup: &lookup::Instance<'_>, action: authz::Action, ) -> Result { - let client_addr = - self.propolis_addr_for_instance(instance_lookup, action).await?; + let client_addr = self + .propolis_addr_for_instance(opctx, instance_lookup, action) + .await?; Ok(propolis_client::Client::new(&format!("http://{}", client_addr))) } diff --git a/nexus/src/app/instance_network.rs b/nexus/src/app/instance_network.rs index c383840d38..0f52cbd260 100644 --- a/nexus/src/app/instance_network.rs +++ b/nexus/src/app/instance_network.rs @@ -7,10 +7,12 @@ use crate::app::sagas::retry_until_known_result; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; +use nexus_db_queries::db; use nexus_db_queries::db::identity::Asset; use nexus_db_queries::db::lookup::LookupPath; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; +use omicron_common::api::internal::nexus; use omicron_common::api::internal::shared::SwitchLocation; use sled_agent_client::types::DeleteVirtualNetworkInterfaceHost; use sled_agent_client::types::SetVirtualNetworkInterfaceHost; @@ -474,4 +476,212 @@ impl super::Nexus { Ok(()) } + + /// Deletes an instance's OPTE V2P mappings and the boundary switch NAT + /// entries for its external IPs. + /// + /// This routine returns immediately upon encountering any errors (and will + /// not try to destroy any more objects after the point of failure). + async fn clear_instance_networking_state( + &self, + opctx: &OpContext, + authz_instance: &authz::Instance, + ) -> Result<(), Error> { + self.delete_instance_v2p_mappings(opctx, authz_instance.id()).await?; + + let external_ips = self + .datastore() + .instance_lookup_external_ips(opctx, authz_instance.id()) + .await?; + + let boundary_switches = self.boundary_switches(opctx).await?; + for external_ip in external_ips { + for switch in &boundary_switches { + debug!(&self.log, "deleting instance nat mapping"; + "instance_id" => %authz_instance.id(), + "switch" => switch.to_string(), + "entry" => #?external_ip); + + let dpd_client = + self.dpd_clients.get(switch).ok_or_else(|| { + Error::internal_error(&format!( + "unable to find dendrite client for {switch}" + )) + })?; + + dpd_client + .ensure_nat_entry_deleted( + &self.log, + external_ip.ip, + *external_ip.first_port, + ) + .await + .map_err(|e| { + Error::internal_error(&format!( + "failed to delete nat entry via dpd: {e}" + )) + })?; + } + } + + Ok(()) + } + + /// Given old and new instance runtime states, determines the desired + /// networking configuration for a given instance and ensures it has been + /// propagated to all relevant sleds. + /// + /// # Arguments + /// + /// - opctx: An operation context for this operation. + /// - authz_instance: A resolved authorization context for the instance of + /// interest. + /// - prev_instance_state: The most-recently-recorded instance runtime + /// state for this instance. + /// - new_instance_state: The instance state that the caller of this routine + /// has observed and that should be used to set up this instance's + /// networking state. + /// + /// # Return value + /// + /// `Ok(())` if this routine completed all the operations it wanted to + /// complete, or an appropriate `Err` otherwise. + pub(crate) async fn ensure_updated_instance_network_config( + &self, + opctx: &OpContext, + authz_instance: &authz::Instance, + prev_instance_state: &db::model::InstanceRuntimeState, + new_instance_state: &nexus::InstanceRuntimeState, + ) -> Result<(), Error> { + let log = &self.log; + let instance_id = authz_instance.id(); + + // If this instance update is stale, do nothing, since the superseding + // update may have allowed the instance's location to change further. + if prev_instance_state.gen >= new_instance_state.gen.into() { + debug!(log, + "instance state generation already advanced, \ + won't touch network config"; + "instance_id" => %instance_id); + + return Ok(()); + } + + // If this update will retire the instance's active VMM, delete its + // networking state. It will be re-established the next time the + // instance starts. + if new_instance_state.propolis_id.is_none() { + info!(log, + "instance cleared its Propolis ID, cleaning network config"; + "instance_id" => %instance_id, + "propolis_id" => ?prev_instance_state.propolis_id); + + self.clear_instance_networking_state(opctx, authz_instance).await?; + return Ok(()); + } + + // If the instance still has a migration in progress, don't change + // any networking state until an update arrives that retires that + // migration. + // + // This is needed to avoid the following race: + // + // 1. Migration from S to T completes. + // 2. Migration source sends an update that changes the instance's + // active VMM but leaves the migration ID in place. + // 3. Meanwhile, migration target sends an update that changes the + // instance's active VMM and clears the migration ID. + // 4. The migration target's call updates networking state and commits + // the new instance record. + // 5. The instance migrates from T to T' and Nexus applies networking + // configuration reflecting that the instance is on T'. + // 6. The update in step 2 applies configuration saying the instance + // is on sled T. + if new_instance_state.migration_id.is_some() { + debug!(log, + "instance still has a migration in progress, won't touch \ + network config"; + "instance_id" => %instance_id, + "migration_id" => ?new_instance_state.migration_id); + + return Ok(()); + } + + let new_propolis_id = new_instance_state.propolis_id.unwrap(); + + // Updates that end live migration need to push OPTE V2P state even if + // the instance's active sled did not change (see below). + let migration_retired = prev_instance_state.migration_id.is_some() + && new_instance_state.migration_id.is_none(); + + if (prev_instance_state.propolis_id == new_instance_state.propolis_id) + && !migration_retired + { + debug!(log, "instance didn't move, won't touch network config"; + "instance_id" => %instance_id); + + return Ok(()); + } + + // Either the instance moved from one sled to another, or it attempted + // to migrate and failed. Ensure the correct networking configuration + // exists for its current home. + // + // TODO(#3107) This is necessary even if the instance didn't move, + // because registering a migration target on a sled creates OPTE ports + // for its VNICs, and that creates new V2P mappings on that sled that + // place the relevant virtual IPs on the local sled. Once OPTE stops + // creating these mappings, this path only needs to be taken if an + // instance has changed sleds. + let new_sled_id = match self + .db_datastore + .vmm_fetch(&opctx, authz_instance, &new_propolis_id) + .await + { + Ok(vmm) => vmm.sled_id, + + // A VMM in the active position should never be destroyed. If the + // sled sending this message is the owner of the instance's last + // active VMM and is destroying it, it should also have retired that + // VMM. + Err(Error::ObjectNotFound { .. }) => { + error!(log, "instance's active vmm unexpectedly not found"; + "instance_id" => %instance_id, + "propolis_id" => %new_propolis_id); + + return Ok(()); + } + + Err(e) => return Err(e), + }; + + self.create_instance_v2p_mappings(opctx, instance_id, new_sled_id) + .await?; + + let (.., sled) = LookupPath::new(opctx, &self.db_datastore) + .sled_id(new_sled_id) + .fetch() + .await?; + + let boundary_switches = + self.boundary_switches(&self.opctx_alloc).await?; + + for switch in &boundary_switches { + let dpd_client = self.dpd_clients.get(switch).ok_or_else(|| { + Error::internal_error(&format!( + "could not find dpd client for {switch}" + )) + })?; + self.instance_ensure_dpd_config( + opctx, + instance_id, + &sled.address(), + None, + dpd_client, + ) + .await?; + } + + Ok(()) + } } diff --git a/nexus/src/app/sagas/finalize_disk.rs b/nexus/src/app/sagas/finalize_disk.rs index 859cc5a237..d4f6fc39aa 100644 --- a/nexus/src/app/sagas/finalize_disk.rs +++ b/nexus/src/app/sagas/finalize_disk.rs @@ -79,7 +79,7 @@ impl NexusSaga for SagaFinalizeDisk { silo_id: params.silo_id, project_id: params.project_id, disk_id: params.disk_id, - use_the_pantry: true, + attached_instance_and_sled: None, create_params: params::SnapshotCreate { identity: external::IdentityMetadataCreateParams { name: snapshot_name.clone(), diff --git a/nexus/src/app/sagas/instance_common.rs b/nexus/src/app/sagas/instance_common.rs new file mode 100644 index 0000000000..438b92cb84 --- /dev/null +++ b/nexus/src/app/sagas/instance_common.rs @@ -0,0 +1,135 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Common helper functions for instance-related sagas. + +use std::net::{IpAddr, Ipv6Addr}; + +use crate::Nexus; +use chrono::Utc; +use nexus_db_model::{ByteCount, SledReservationConstraints, SledResource}; +use nexus_db_queries::{context::OpContext, db, db::DataStore}; +use omicron_common::api::external::InstanceState; +use steno::ActionError; +use uuid::Uuid; + +/// Reserves resources for a new VMM whose instance has `ncpus` guest logical +/// processors and `guest_memory` bytes of guest RAM. The selected sled is +/// random within the set of sleds allowed by the supplied `constraints`. +/// +/// This function succeeds idempotently if called repeatedly with the same +/// `propolis_id`. +pub async fn reserve_vmm_resources( + nexus: &Nexus, + propolis_id: Uuid, + ncpus: u32, + guest_memory: ByteCount, + constraints: SledReservationConstraints, +) -> Result { + // ALLOCATION POLICY + // + // NOTE: This policy can - and should! - be changed. + // + // See https://rfd.shared.oxide.computer/rfd/0205 for a more complete + // discussion. + // + // Right now, allocate an instance to any random sled agent. This has a few + // problems: + // + // - There's no consideration for "health of the sled" here, other than + // "time_deleted = Null". If the sled is rebooting, in a known unhealthy + // state, etc, we'd currently provision it here. I don't think this is a + // trivial fix, but it's work we'll need to account for eventually. + // + // - This is selecting a random sled from all sleds in the cluster. For + // multi-rack, this is going to fling the sled to an arbitrary system. + // Maybe that's okay, but worth knowing about explicitly. + // + // - This doesn't take into account anti-affinity - users will want to + // schedule instances that belong to a cluster on different failure + // domains. See https://github.com/oxidecomputer/omicron/issues/1705. + let resources = db::model::Resources::new( + ncpus, + ByteCount::try_from(0i64).unwrap(), + guest_memory, + ); + + let resource = nexus + .reserve_on_random_sled( + propolis_id, + nexus_db_model::SledResourceKind::Instance, + resources, + constraints, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(resource) +} + +/// Creates a new VMM record from the supplied IDs and stores it in the supplied +/// datastore. +/// +/// This function succeeds idempotently if called repeatedly with the same +/// parameters, provided that the VMM record was not mutated by some other actor +/// after the calling saga inserted it. +pub async fn create_and_insert_vmm_record( + datastore: &DataStore, + opctx: &OpContext, + instance_id: Uuid, + propolis_id: Uuid, + sled_id: Uuid, + propolis_ip: Ipv6Addr, + initial_state: nexus_db_model::VmmInitialState, +) -> Result { + let vmm = db::model::Vmm::new( + propolis_id, + instance_id, + sled_id, + IpAddr::V6(propolis_ip).into(), + initial_state, + ); + + let vmm = datastore + .vmm_insert(&opctx, vmm) + .await + .map_err(ActionError::action_failed)?; + + Ok(vmm) +} + +/// Given a previously-inserted VMM record, set its state to Destroyed and then +/// delete it. +/// +/// This function succeeds idempotently if called with the same parameters, +/// provided that the VMM record was not changed by some other actor after the +/// calling saga inserted it. +pub async fn destroy_vmm_record( + datastore: &DataStore, + opctx: &OpContext, + prev_record: &db::model::Vmm, +) -> Result<(), anyhow::Error> { + let new_runtime = db::model::VmmRuntimeState { + state: db::model::InstanceState(InstanceState::Destroyed), + time_state_updated: Utc::now(), + gen: prev_record.runtime.gen.next().into(), + }; + + datastore.vmm_update_runtime(&prev_record.id, &new_runtime).await?; + datastore.vmm_mark_deleted(&opctx, &prev_record.id).await?; + Ok(()) +} + +/// Allocates a new IPv6 address for a service that will run on the supplied +/// sled. +pub(super) async fn allocate_sled_ipv6( + opctx: &OpContext, + datastore: &DataStore, + sled_uuid: Uuid, +) -> Result { + datastore + .next_ipv6_address(opctx, sled_uuid) + .await + .map_err(ActionError::action_failed) +} diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index 2762ecaff3..5d55aaf0fe 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -3,18 +3,14 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use super::{NexusActionContext, NexusSaga, SagaInitError, ACTION_GENERATE_ID}; -use crate::app::instance::WriteBackUpdatedInstance; use crate::app::sagas::declare_saga_actions; use crate::app::sagas::disk_create::{self, SagaDiskCreate}; -use crate::app::sagas::retry_until_known_result; use crate::app::{ MAX_DISKS_PER_INSTANCE, MAX_EXTERNAL_IPS_PER_INSTANCE, MAX_NICS_PER_INSTANCE, }; use crate::external_api::params; -use chrono::Utc; use nexus_db_model::NetworkInterfaceKind; -use nexus_db_queries::context::OpContext; use nexus_db_queries::db::identity::Resource; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::model::ByteCount as DbByteCount; @@ -23,20 +19,16 @@ use nexus_db_queries::{authn, authz, db}; use nexus_defaults::DEFAULT_PRIMARY_NIC_NAME; use nexus_types::external_api::params::InstanceDiskAttachment; use omicron_common::api::external::Error; -use omicron_common::api::external::Generation; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::InstanceState; use omicron_common::api::external::Name; -use omicron_common::api::internal::nexus::InstanceRuntimeState; use omicron_common::api::internal::shared::SwitchLocation; use serde::Deserialize; use serde::Serialize; -use sled_agent_client::types::InstanceStateRequested; use slog::warn; use std::collections::HashSet; use std::convert::TryFrom; use std::fmt::Debug; -use std::net::Ipv6Addr; use steno::ActionError; use steno::Node; use steno::{DagBuilder, SagaName}; @@ -83,18 +75,11 @@ struct DiskAttachParams { declare_saga_actions! { instance_create; - ALLOC_SERVER -> "server_id" { - + sic_alloc_server - - sic_alloc_server_undo - } VIRTUAL_RESOURCES_ACCOUNT -> "no_result" { + sic_account_virtual_resources - sic_account_virtual_resources_undo } - ALLOC_PROPOLIS_IP -> "propolis_ip" { - + sic_allocate_propolis_ip - } - CREATE_INSTANCE_RECORD -> "instance_name" { + CREATE_INSTANCE_RECORD -> "instance_record" { + sic_create_instance_record - sic_delete_instance_record } @@ -114,23 +99,8 @@ declare_saga_actions! { + sic_attach_disk_to_instance - sic_attach_disk_to_instance_undo } - CONFIGURE_ASIC -> "configure_asic" { - + sic_add_network_config - - sic_remove_network_config - } - V2P_ENSURE_UNDO -> "v2p_ensure_undo" { - + sic_noop - - sic_v2p_ensure_undo - } - V2P_ENSURE -> "v2p_ensure" { - + sic_v2p_ensure - } - INSTANCE_ENSURE_REGISTERED -> "instance_ensure_registered" { - + sic_instance_ensure_registered - - sic_instance_ensure_registered_undo - } - INSTANCE_ENSURE_RUNNING -> "instance_ensure_running" { - + sic_instance_ensure_running + MOVE_TO_STOPPED -> "stopped_instance" { + + sic_move_to_stopped } } @@ -161,15 +131,7 @@ impl NexusSaga for SagaInstanceCreate { })?, )); - builder.append(Node::action( - "propolis_id", - "GeneratePropolisId", - ACTION_GENERATE_ID.as_ref(), - )); - - builder.append(alloc_server_action()); builder.append(virtual_resources_account_action()); - builder.append(alloc_propolis_ip_action()); builder.append(create_instance_record_action()); // Helper function for appending subsagas to our parent saga. @@ -280,7 +242,8 @@ impl NexusSaga for SagaInstanceCreate { )?; } - // Appends the disk create saga as a subsaga directly to the instance create builder. + // Appends the disk create saga as a subsaga directly to the instance + // create builder. for (i, disk) in params.create_params.disks.iter().enumerate() { if let InstanceDiskAttachment::Create(create_disk) = disk { let subsaga_name = @@ -301,8 +264,8 @@ impl NexusSaga for SagaInstanceCreate { } } - // Attaches all disks included in the instance create request, including those which were previously created - // by the disk create subsagas. + // Attaches all disks included in the instance create request, including + // those which were previously created by the disk create subsagas. for (i, disk_attach) in params.create_params.disks.iter().enumerate() { let subsaga_name = SagaName::new(&format!("instance-attach-disk-{i}")); @@ -327,230 +290,11 @@ impl NexusSaga for SagaInstanceCreate { )?; } - // If a primary NIC exists, create a NAT entry for the default external IP, - // as well as additional NAT entries for each requested ephemeral IP - for i in 0..(params.create_params.external_ips.len() + 1) { - for &switch_location in ¶ms.boundary_switches { - let subsaga_name = SagaName::new(&format!( - "instance-configure-nat-{i}-{switch_location}" - )); - let mut subsaga_builder = DagBuilder::new(subsaga_name); - - let basename = format!("ConfigureAsic-{i}-{switch_location}"); - subsaga_builder.append(Node::action( - "configure_asic", - &basename, - CONFIGURE_ASIC.as_ref(), - )); - let net_params = NetworkConfigParams { - saga_params: params.clone(), - instance_id, - which: i, - switch_location, - }; - subsaga_append( - basename, - subsaga_builder.build()?, - &mut builder, - net_params, - i, - )?; - } - } - - // creating instance v2p mappings is not atomic - there are many calls - // to different sled agents that occur. for this to unwind correctly - // given a partial success of the ensure node, the undo node must be - // prior to the ensure node as a separate action. - builder.append(v2p_ensure_undo_action()); - builder.append(v2p_ensure_action()); - - builder.append(instance_ensure_registered_action()); - if params.create_params.start { - builder.append(instance_ensure_running_action()); - } + builder.append(move_to_stopped_action()); Ok(builder.build()?) } } -async fn sic_add_network_config( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let net_params = sagactx.saga_params::()?; - let which = net_params.which; - let instance_id = net_params.instance_id; - let params = net_params.saga_params; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - let osagactx = sagactx.user_data(); - let datastore = osagactx.datastore(); - let switch = net_params.switch_location; - let dpd_client = - osagactx.nexus().dpd_clients.get(&switch).ok_or_else(|| { - ActionError::action_failed(Error::internal_error(&format!( - "unable to find client for switch {switch}" - ))) - })?; - - let (.., db_instance) = LookupPath::new(&opctx, &datastore) - .instance_id(instance_id) - .fetch() - .await - .map_err(ActionError::action_failed)?; - - // Read the sled record from the database. This needs to use the instance- - // create context (and not the regular saga context) to leverage its fleet- - // read permissions. - let sled_uuid = db_instance.runtime_state.sled_id; - let (.., sled) = LookupPath::new(&osagactx.nexus().opctx_alloc, &datastore) - .sled_id(sled_uuid) - .fetch() - .await - .map_err(ActionError::action_failed)?; - - // Set up Dendrite configuration using the saga context, which supplies - // access to the instance's device configuration. - osagactx - .nexus() - .instance_ensure_dpd_config( - &opctx, - instance_id, - &sled.address(), - Some(which), - dpd_client, - ) - .await - .map_err(ActionError::action_failed) -} - -async fn sic_remove_network_config( - sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { - let net_params = sagactx.saga_params::()?; - let which = net_params.which; - let instance_id = net_params.instance_id; - let switch = net_params.switch_location; - let params = net_params.saga_params; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - let osagactx = sagactx.user_data(); - let dpd_client = - osagactx.nexus().dpd_clients.get(&switch).ok_or_else(|| { - Error::internal_error(&format!( - "unable to find client for switch {switch}" - )) - })?; - let datastore = &osagactx.datastore(); - let log = sagactx.user_data().log(); - - debug!(log, "fetching external ip addresses"); - - let target_ip = &datastore - .instance_lookup_external_ips(&opctx, instance_id) - .await - .map_err(ActionError::action_failed)? - .get(which) - .ok_or_else(|| { - ActionError::action_failed(Error::internal_error(&format!( - "failed to find external ip address at index: {which}" - ))) - })? - .to_owned(); - - debug!(log, "deleting nat mapping for entry: {target_ip:#?}"); - - let result = retry_until_known_result(log, || async { - dpd_client - .ensure_nat_entry_deleted(log, target_ip.ip, *target_ip.first_port) - .await - }) - .await; - - match result { - Ok(_) => { - debug!(log, "deletion of nat entry successful for: {target_ip:#?}"); - Ok(()) - } - Err(e) => Err(Error::internal_error(&format!( - "failed to delete nat entry via dpd: {e}" - ))), - }?; - - Ok(()) -} - -async fn sic_alloc_server( - sagactx: NexusActionContext, -) -> Result { - let osagactx = sagactx.user_data(); - - // ALLOCATION POLICY - // - // NOTE: This policy can - and should! - be changed. - // - // See https://rfd.shared.oxide.computer/rfd/0205 for a more complete - // discussion. - // - // Right now, allocate an instance to any random sled agent. This has a few - // problems: - // - // - There's no consideration for "health of the sled" here, other than - // "time_deleted = Null". If the sled is rebooting, in a known unhealthy - // state, etc, we'd currently provision it here. I don't think this is a - // trivial fix, but it's work we'll need to account for eventually. - // - // - This is selecting a random sled from all sleds in the cluster. For - // multi-rack, this is going to fling the sled to an arbitrary system. - // Maybe that's okay, but worth knowing about explicitly. - // - // - This doesn't take into account anti-affinity - users will want to - // schedule instances that belong to a cluster on different failure - // domains. See https://github.com/oxidecomputer/omicron/issues/1705. - - // TODO: Fix these values. They're wrong now, but they let us move - // forward with plumbing. - let params = sagactx.saga_params::()?; - let hardware_threads = params.create_params.ncpus.0; - let rss_ram = params.create_params.memory; - let reservoir_ram = omicron_common::api::external::ByteCount::from(0); - - // Use the instance's Propolis ID as its resource key, since each unique - // Propolis consumes its own resources, and an instance can have multiple - // Propolises during a live migration. - let propolis_id = sagactx.lookup::("propolis_id")?; - let resources = db::model::Resources::new( - hardware_threads.into(), - rss_ram.into(), - reservoir_ram.into(), - ); - - let resource = osagactx - .nexus() - .reserve_on_random_sled( - propolis_id, - db::model::SledResourceKind::Instance, - resources, - db::model::SledReservationConstraints::none(), - ) - .await - .map_err(ActionError::action_failed)?; - Ok(resource.sled_id) -} - -async fn sic_alloc_server_undo( - sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { - let osagactx = sagactx.user_data(); - let propolis_id = sagactx.lookup::("propolis_id")?; - - osagactx.nexus().delete_sled_reservation(propolis_id).await?; - Ok(()) -} - /// Create a network interface for an instance, using the parameters at index /// `nic_index`, returning the UUID for the NIC (or None). async fn sic_create_network_interface( @@ -984,24 +728,6 @@ async fn ensure_instance_disk_attach_state( Ok(()) } -/// Helper function to allocate a new IPv6 address for an Oxide service running -/// on the provided sled. -/// -/// `sled_id_name` is the name of the serialized output containing the UUID for -/// the targeted sled. -pub(super) async fn allocate_sled_ipv6( - opctx: &OpContext, - sagactx: NexusActionContext, - sled_uuid: Uuid, -) -> Result { - let osagactx = sagactx.user_data(); - osagactx - .datastore() - .next_ipv6_address(opctx, sled_uuid) - .await - .map_err(ActionError::action_failed) -} - async fn sic_account_virtual_resources( sagactx: NexusActionContext, ) -> Result<(), ActionError> { @@ -1052,56 +778,21 @@ async fn sic_account_virtual_resources_undo( Ok(()) } -// Allocate an IP address on the destination sled for the Propolis server -async fn sic_allocate_propolis_ip( - sagactx: NexusActionContext, -) -> Result { - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - let sled_uuid = sagactx.lookup::("server_id")?; - allocate_sled_ipv6(&opctx, sagactx, sled_uuid).await -} - async fn sic_create_instance_record( sagactx: NexusActionContext, -) -> Result { +) -> Result { let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; let opctx = crate::context::op_context_for_saga_action( &sagactx, ¶ms.serialized_authn, ); - let sled_uuid = sagactx.lookup::("server_id")?; let instance_id = sagactx.lookup::("instance_id")?; - let propolis_uuid = sagactx.lookup::("propolis_id")?; - let propolis_addr = sagactx.lookup::("propolis_ip")?; - - let runtime = InstanceRuntimeState { - run_state: InstanceState::Creating, - sled_id: sled_uuid, - propolis_id: propolis_uuid, - dst_propolis_id: None, - propolis_addr: Some(std::net::SocketAddr::new( - propolis_addr.into(), - 12400, - )), - migration_id: None, - propolis_gen: Generation::new(), - hostname: params.create_params.hostname.clone(), - memory: params.create_params.memory, - ncpus: params.create_params.ncpus, - gen: Generation::new(), - time_updated: Utc::now(), - }; let new_instance = db::model::Instance::new( instance_id, params.project_id, ¶ms.create_params, - runtime.into(), ); let (.., authz_project) = LookupPath::new(&opctx, &osagactx.datastore()) @@ -1116,7 +807,7 @@ async fn sic_create_instance_record( .await .map_err(ActionError::action_failed)?; - Ok(instance.name().clone().into()) + Ok(instance) } async fn sic_delete_instance_record( @@ -1130,7 +821,11 @@ async fn sic_delete_instance_record( ¶ms.serialized_authn, ); let instance_id = sagactx.lookup::("instance_id")?; - let instance_name = sagactx.lookup::("instance_name")?; + let instance_name = sagactx + .lookup::("instance_record")? + .name() + .clone() + .into(); // We currently only support deleting an instance if it is stopped or // failed, so update the state accordingly to allow deletion. @@ -1156,7 +851,7 @@ async fn sic_delete_instance_record( }; let runtime_state = db::model::InstanceRuntimeState { - state: db::model::InstanceState::new(InstanceState::Failed), + nexus_state: db::model::InstanceState::new(InstanceState::Failed), // Must update the generation, or the database query will fail. // // The runtime state of the instance record is only changed as a result @@ -1183,186 +878,43 @@ async fn sic_delete_instance_record( Ok(()) } -async fn sic_noop(_sagactx: NexusActionContext) -> Result<(), ActionError> { - Ok(()) -} - -/// Ensure that the necessary v2p mappings exist for this instance -async fn sic_v2p_ensure( +async fn sic_move_to_stopped( sagactx: NexusActionContext, ) -> Result<(), ActionError> { let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - let instance_id = sagactx.lookup::("instance_id")?; - let sled_id = sagactx.lookup::("server_id")?; - - osagactx - .nexus() - .create_instance_v2p_mappings(&opctx, instance_id, sled_id) - .await - .map_err(ActionError::action_failed)?; - - Ok(()) -} - -async fn sic_v2p_ensure_undo( - sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); let instance_id = sagactx.lookup::("instance_id")?; + let instance_record = + sagactx.lookup::("instance_record")?; + + // Create a new generation of the isntance record with the Stopped state and + // try to write it back to the database. If this node is replayed, or the + // instance has already changed state by the time this step is reached, this + // update will (correctly) be ignored because its generation number is out + // of date. + let new_state = db::model::InstanceRuntimeState { + nexus_state: db::model::InstanceState::new(InstanceState::Stopped), + gen: db::model::Generation::from( + instance_record.runtime_state.gen.next(), + ), + ..instance_record.runtime_state + }; - osagactx - .nexus() - .delete_instance_v2p_mappings(&opctx, instance_id) - .await - .map_err(ActionError::action_failed)?; - - Ok(()) -} - -async fn sic_instance_ensure_registered( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let datastore = osagactx.datastore(); - - // TODO-correctness TODO-security It's not correct to re-resolve the - // instance name now. See oxidecomputer/omicron#1536. - let instance_name = sagactx.lookup::("instance_name")?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let (.., authz_instance, db_instance) = LookupPath::new(&opctx, &datastore) - .project_id(params.project_id) - .instance_name(&instance_name) - .fetch() + // If this node is being replayed, this instance may already have been + // deleted, so ignore object-not-found errors. + if let Err(e) = osagactx + .datastore() + .instance_update_runtime(&instance_id, &new_state) .await - .map_err(ActionError::action_failed)?; - - if !params.create_params.start { - let instance_id = db_instance.id(); - // If we don't need to start the instance, we can skip the ensure - // and just update the instance runtime state to `Stopped`. - // - // TODO-correctness: This is dangerous if this step is replayed, since - // a user can discover this instance and ask to start it in between - // attempts to run this step. One way to fix this is to avoid refetching - // the previous runtime state each time this step is taken, such that - // once this update is applied once, subsequent attempts to apply it - // will have an already-used generation number. - let runtime_state = db::model::InstanceRuntimeState { - state: db::model::InstanceState::new(InstanceState::Stopped), - // Must update the generation, or the database query will fail. - // - // The runtime state of the instance record is only changed as a - // result of the successful completion of the saga (i.e. after - // ensure which we're skipping in this case) or during saga - // unwinding. So we're guaranteed that the cached generation in the - // saga log is the most recent in the database. - gen: db::model::Generation::from( - db_instance.runtime_state.gen.next(), - ), - ..db_instance.runtime_state - }; - - let updated = datastore - .instance_update_runtime(&instance_id, &runtime_state) - .await - .map_err(ActionError::action_failed)?; - - if !updated { - warn!( - osagactx.log(), - "failed to update instance runtime state from creating to stopped", - ); + { + match e { + Error::ObjectNotFound { .. } => return Ok(()), + e => return Err(ActionError::action_failed(e)), } - } else { - osagactx - .nexus() - .instance_ensure_registered(&opctx, &authz_instance, &db_instance) - .await - .map_err(ActionError::action_failed)?; } Ok(()) } -async fn sic_instance_ensure_registered_undo( - sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let datastore = osagactx.datastore(); - let instance_id = sagactx.lookup::("instance_id")?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let (.., authz_instance, db_instance) = LookupPath::new(&opctx, &datastore) - .instance_id(instance_id) - .fetch() - .await - .map_err(ActionError::action_failed)?; - - osagactx - .nexus() - .instance_ensure_unregistered( - &opctx, - &authz_instance, - &db_instance, - WriteBackUpdatedInstance::WriteBack, - ) - .await - .map_err(ActionError::action_failed)?; - - Ok(()) -} - -async fn sic_instance_ensure_running( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let datastore = osagactx.datastore(); - let instance_id = sagactx.lookup::("instance_id")?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let (.., authz_instance, db_instance) = LookupPath::new(&opctx, &datastore) - .instance_id(instance_id) - .fetch() - .await - .map_err(ActionError::action_failed)?; - - osagactx - .nexus() - .instance_request_state( - &opctx, - &authz_instance, - &db_instance, - InstanceStateRequested::Running, - ) - .await - .map_err(ActionError::action_failed)?; - - Ok(()) -} - #[cfg(test)] pub mod test { use crate::{ diff --git a/nexus/src/app/sagas/instance_delete.rs b/nexus/src/app/sagas/instance_delete.rs index 005e9724a6..7da497136e 100644 --- a/nexus/src/app/sagas/instance_delete.rs +++ b/nexus/src/app/sagas/instance_delete.rs @@ -8,9 +8,7 @@ use super::ActionRegistry; use super::NexusActionContext; use super::NexusSaga; use crate::app::sagas::declare_saga_actions; -use nexus_db_queries::db; -use nexus_db_queries::db::lookup::LookupPath; -use nexus_db_queries::{authn, authz}; +use nexus_db_queries::{authn, authz, db}; use nexus_types::identity::Resource; use omicron_common::api::external::{Error, ResourceType}; use omicron_common::api::internal::shared::SwitchLocation; @@ -21,7 +19,7 @@ use steno::ActionError; // instance delete saga: input parameters #[derive(Debug, Deserialize, Serialize)] -pub(crate) struct Params { +pub struct Params { pub serialized_authn: authn::saga::Serialized, pub authz_instance: authz::Instance, pub instance: db::model::Instance, @@ -32,19 +30,10 @@ pub(crate) struct Params { declare_saga_actions! { instance_delete; - V2P_ENSURE_UNDO -> "v2p_ensure_undo" { - + sid_noop - - sid_v2p_ensure_undo - } - V2P_ENSURE -> "v2p_ensure" { - + sid_v2p_ensure - } + INSTANCE_DELETE_RECORD -> "no_result1" { + sid_delete_instance_record } - DELETE_ASIC_CONFIGURATION -> "delete_asic_configuration" { - + sid_delete_network_config - } DELETE_NETWORK_INTERFACES -> "no_result2" { + sid_delete_network_interfaces } @@ -54,15 +43,12 @@ declare_saga_actions! { VIRTUAL_RESOURCES_ACCOUNT -> "no_result4" { + sid_account_virtual_resources } - SLED_RESOURCES_ACCOUNT -> "no_result5" { - + sid_account_sled_resources - } } // instance delete saga: definition #[derive(Debug)] -pub(crate) struct SagaInstanceDelete; +pub struct SagaInstanceDelete; impl NexusSaga for SagaInstanceDelete { const NAME: &'static str = "instance-delete"; type Params = Params; @@ -75,91 +61,16 @@ impl NexusSaga for SagaInstanceDelete { _params: &Self::Params, mut builder: steno::DagBuilder, ) -> Result { - builder.append(v2p_ensure_undo_action()); - builder.append(v2p_ensure_action()); - builder.append(delete_asic_configuration_action()); builder.append(instance_delete_record_action()); builder.append(delete_network_interfaces_action()); builder.append(deallocate_external_ip_action()); builder.append(virtual_resources_account_action()); - builder.append(sled_resources_account_action()); Ok(builder.build()?) } } // instance delete saga: action implementations -async fn sid_noop(_sagactx: NexusActionContext) -> Result<(), ActionError> { - Ok(()) -} - -/// Ensure that the v2p mappings for this instance are deleted -async fn sid_v2p_ensure( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - osagactx - .nexus() - .delete_instance_v2p_mappings(&opctx, params.authz_instance.id()) - .await - .map_err(ActionError::action_failed)?; - - Ok(()) -} - -/// During unwind, ensure that v2p mappings are created again -async fn sid_v2p_ensure_undo( - sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let (.., db_instance) = LookupPath::new(&opctx, &osagactx.datastore()) - .instance_id(params.authz_instance.id()) - .fetch_for(authz::Action::Read) - .await?; - - osagactx - .nexus() - .create_instance_v2p_mappings( - &opctx, - params.authz_instance.id(), - db_instance.runtime().sled_id, - ) - .await - .map_err(ActionError::action_failed)?; - - Ok(()) -} - -async fn sid_delete_network_config( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - let authz_instance = ¶ms.authz_instance; - let osagactx = sagactx.user_data(); - - osagactx - .nexus() - .instance_delete_dpd_config(&opctx, authz_instance) - .await - .map_err(ActionError::action_failed) -} - async fn sid_delete_instance_record( sagactx: NexusActionContext, ) -> Result<(), ActionError> { @@ -240,50 +151,14 @@ async fn sid_account_virtual_resources( &opctx, params.instance.id(), params.instance.project_id, - i64::from(params.instance.runtime_state.ncpus.0 .0), - params.instance.runtime_state.memory, + i64::from(params.instance.ncpus.0 .0), + params.instance.memory, ) .await .map_err(ActionError::action_failed)?; Ok(()) } -async fn sid_account_sled_resources( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - // Fetch the previously-deleted instance record to get its Propolis ID. It - // is safe to fetch the ID at this point because the instance is already - // deleted and so cannot change anymore. - // - // TODO(#2315): This prevents the garbage collection of soft-deleted - // instance records. A better method is to remove a Propolis's reservation - // once an instance no longer refers to it (e.g. when it has stopped or - // been removed from the instance's migration information) and then make - // this saga check that the instance has no active Propolises before it is - // deleted. This logic should be part of the logic needed to stop an - // instance and release its Propolis reservation; when that is added this - // step can be removed. - let instance = osagactx - .datastore() - .instance_fetch_deleted(&opctx, ¶ms.authz_instance) - .await - .map_err(ActionError::action_failed)?; - - osagactx - .datastore() - .sled_reservation_delete(&opctx, instance.runtime().propolis_id) - .await - .map_err(ActionError::action_failed)?; - Ok(()) -} - #[cfg(test)] mod test { use crate::{ @@ -415,10 +290,20 @@ mod test { }; let project_lookup = nexus.project_lookup(&opctx, project_selector).unwrap(); - nexus + + let instance_state = nexus .project_create_instance(&opctx, &project_lookup, ¶ms) .await - .unwrap() + .unwrap(); + + let datastore = cptestctx.server.apictx().nexus.datastore().clone(); + let (.., db_instance) = LookupPath::new(&opctx, &datastore) + .instance_id(instance_state.instance().id()) + .fetch() + .await + .expect("test instance should be present in datastore"); + + db_instance } #[nexus_test(server = crate::Server)] diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index 5e9b8680bf..d32a20bc40 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -2,23 +2,22 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use super::instance_create::allocate_sled_ipv6; use super::{NexusActionContext, NexusSaga, ACTION_GENERATE_ID}; -use crate::app::instance::WriteBackUpdatedInstance; -use crate::app::sagas::declare_saga_actions; +use crate::app::instance::InstanceStateChangeRequest; +use crate::app::sagas::{ + declare_saga_actions, instance_common::allocate_sled_ipv6, +}; use crate::external_api::params; use nexus_db_queries::db::{identity::Resource, lookup::LookupPath}; use nexus_db_queries::{authn, authz, db}; -use omicron_common::api::external::InstanceState; -use omicron_common::api::internal::nexus::InstanceRuntimeState; +use omicron_common::address::PROPOLIS_PORT; use serde::Deserialize; use serde::Serialize; use sled_agent_client::types::{ InstanceMigrationSourceParams, InstanceMigrationTargetParams, - InstanceStateRequested, }; use slog::warn; -use std::net::Ipv6Addr; +use std::net::{Ipv6Addr, SocketAddr}; use steno::ActionError; use steno::Node; use uuid::Uuid; @@ -26,40 +25,31 @@ use uuid::Uuid; // instance migrate saga: input parameters #[derive(Debug, Deserialize, Serialize)] -pub(crate) struct Params { +pub struct Params { pub serialized_authn: authn::saga::Serialized, pub instance: db::model::Instance, + pub src_vmm: db::model::Vmm, pub migrate_params: params::InstanceMigrate, } -// The migration saga is similar to the instance creation saga: get a -// destination sled, allocate a Propolis process on it, and send it a request to +// The migration saga is similar to the instance start saga: get a destination +// sled, allocate a Propolis process on it, and send that Propolis a request to // initialize via migration, then wait (outside the saga) for this to resolve. -// -// Most of the complexity in this saga comes from the fact that during -// migration, there are two sleds with their own instance runtime states, and -// both the saga and the work that happen after it have to specify carefully -// which of the two participating VMMs is actually running the VM once the -// migration is over. -// -// Only active instances can migrate. While an instance is active on some sled -// (and isn't migrating), that sled's sled agent maintains the instance's -// runtime state and sends updated state to Nexus when it changes. At the start -// of this saga, the participating sled agents and CRDB have the following -// runtime states (note that some fields, like the actual Propolis state, are -// not relevant to migration and are omitted here): -// -// | Item | Source | Dest | CRDB | -// |--------------|--------|------|------| -// | Propolis gen | G | None | G | -// | Propolis ID | P1 | None | P1 | -// | Sled ID | S1 | None | S1 | -// | Dst Prop. ID | None | None | None | -// | Migration ID | None | None | None | + declare_saga_actions! { instance_migrate; - RESERVE_RESOURCES -> "server_id" { + // In order to set up migration, the saga needs to construct the following: + // + // - A migration ID and destination Propolis ID (added to the DAG inline as + // ACTION_GENERATE_ID actions) + // - A sled ID + // - An IP address for the destination Propolis server + // + // The latter two pieces of information are used to create a VMM record for + // the new Propolis, which can then be written into the instance as a + // migration target. + RESERVE_RESOURCES -> "dst_sled_id" { + sim_reserve_sled_resources - sim_release_sled_resources } @@ -68,110 +58,47 @@ declare_saga_actions! { + sim_allocate_propolis_ip } - // This step sets the instance's migration ID and destination Propolis ID + CREATE_VMM_RECORD -> "dst_vmm_record" { + + sim_create_vmm_record + - sim_destroy_vmm_record + } + + // This step the instance's migration ID and destination Propolis ID // fields. Because the instance is active, its current sled agent maintains - // the most recent runtime state, so to update it, the saga calls into the - // sled and asks it to produce an updated record with the appropriate - // migration IDs and a new generation number. + // its most recent runtime state, so to update it, the saga calls into the + // sled and asks it to produce an updated instance record with the + // appropriate migration IDs and a new generation number. // - // Sled agent provides the synchronization here: while this operation is - // idempotent for any single transition between IDs, sled agent ensures that - // if multiple concurrent sagas try to set migration IDs at the same - // Propolis generation, then only one will win and get to proceed through - // the saga. - // - // Once this update completes, the sleds have the following states, and the - // source sled's state will be stored in CRDB: - // - // | Item | Source | Dest | CRDB | - // |--------------|--------|------|------| - // | Propolis gen | G+1 | None | G+1 | - // | Propolis ID | P1 | None | P1 | - // | Sled ID | S1 | None | S1 | - // | Dst Prop. ID | P2 | None | P2 | - // | Migration ID | M | None | M | - // - // Unwinding this step clears the migration IDs using the source sled: - // - // | Item | Source | Dest | CRDB | - // |--------------|--------|------|------| - // | Propolis gen | G+2 | None | G+2 | - // | Propolis ID | P1 | None | P1 | - // | Sled ID | S1 | None | S1 | - // | Dst Prop. ID | None | None | None | - // | Migration ID | None | None | None | + // The source sled agent synchronizes concurrent attempts to set these IDs. + // Setting a new migration ID and re-setting an existing ID are allowed, but + // trying to set an ID when a different ID is already present fails. SET_MIGRATION_IDS -> "set_migration_ids" { + sim_set_migration_ids - sim_clear_migration_ids } - // The instance state on the destination looks like the instance state on - // the source, except that it bears all of the destination's "location" - // information--its Propolis ID, sled ID, and Propolis IP--with the same - // Propolis generation number as the source set in the previous step. - CREATE_DESTINATION_STATE -> "dst_runtime_state" { - + sim_create_destination_state - } - - // Instantiate the new Propolis on the destination sled. This uses the - // record created in the previous step, so the sleds end up with the - // following state: - // - // | Item | Source | Dest | CRDB | - // |--------------|--------|------|------| - // | Propolis gen | G+1 | G+1 | G+1 | - // | Propolis ID | P1 | P2 | P1 | - // | Sled ID | S1 | S2 | S1 | - // | Dst Prop. ID | P2 | P2 | P2 | - // | Migration ID | M | M | M | - // - // Note that, because the source and destination have the same Propolis - // generation, the destination's record will not be written back to CRDB. - // - // Once the migration completes (whether successfully or not), the sled that - // ends up with the instance will publish an update that clears the - // generation numbers and (on success) updates the Propolis ID pointer. If - // migration succeeds, this produces the following: - // - // | Item | Source | Dest | CRDB | - // |--------------|--------|------|------| - // | Propolis gen | G+1 | G+2 | G+2 | - // | Propolis ID | P1 | P2 | P2 | - // | Sled ID | S1 | S2 | S2 | - // | Dst Prop. ID | P2 | None | None | - // | Migration ID | M | None | None | - // - // The undo step for this node requires special care. Unregistering a - // Propolis from a sled typically increments its Propolis generation number. - // (This is so that Nexus can rudely terminate a Propolis via unregistration - // and end up with the state it would have gotten if the Propolis had shut - // down normally.) If this step unwinds, this will produce the same state - // on the destination as in the previous table, even though no migration - // has started yet. If that update gets written back, then it will write - // Propolis generation G+2 to CRDB (as in the table above) with the wrong - // Propolis ID, and the subsequent request to clear migration IDs will not - // fix it (because the source sled's generation number is still at G+1 and - // will move to G+2, which is not recent enough to push another update). - // - // To avoid this problem, this undo step takes special care not to write - // back the updated record the destination sled returns to it. + // This step registers the instance with the destination sled. Care is + // needed at this point because there are two sleds that can send updates + // that affect the same instance record (though they have separate VMMs that + // update independently), and if the saga unwinds they need to ensure they + // cooperate to return the instance to the correct pre-migration state. ENSURE_DESTINATION_PROPOLIS -> "ensure_destination" { + sim_ensure_destination_propolis - sim_ensure_destination_propolis_undo } - // Note that this step only requests migration by sending a "migrate in" - // request to the destination sled. It does not wait for migration to - // finish. It cannot be unwound, either, because there is no way to cancel - // an in-progress migration (indeed, a requested migration might have - // finished entirely by the time the undo step runs). + // Finally, this step requests migration by sending a "migrate in" request + // to the destination sled. It does not wait for migration to finish and + // cannot be allowed to unwind (if a migration has already started, it + // cannot be canceled and indeed may have completed by the time the undo + // step runs). INSTANCE_MIGRATE -> "instance_migrate" { + sim_instance_migrate } } #[derive(Debug)] -pub(crate) struct SagaInstanceMigrate; +pub struct SagaInstanceMigrate; impl NexusSaga for SagaInstanceMigrate { const NAME: &'static str = "instance-migrate"; type Params = Params; @@ -198,8 +125,8 @@ impl NexusSaga for SagaInstanceMigrate { builder.append(reserve_resources_action()); builder.append(allocate_propolis_ip_action()); + builder.append(create_vmm_record_action()); builder.append(set_migration_ids_action()); - builder.append(create_destination_state_action()); builder.append(ensure_destination_propolis_action()); builder.append(instance_migrate_action()); @@ -213,33 +140,23 @@ async fn sim_reserve_sled_resources( ) -> Result { let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; + let propolis_id = sagactx.lookup::("dst_propolis_id")?; - // N.B. This assumes that the instance's shape (CPU/memory allotment) is - // immutable despite being in the instance's "runtime" state. - let resources = db::model::Resources::new( - params.instance.runtime_state.ncpus.0 .0.into(), - params.instance.runtime_state.memory, - // TODO(#2804): Properly specify reservoir size. - omicron_common::api::external::ByteCount::from(0).into(), - ); - - // Add a constraint that the only allowed sled is the one specified in the - // parameters. + // Add a constraint that requires the allocator to reserve on the + // migration's destination sled instead of a random sled. let constraints = db::model::SledReservationConstraintBuilder::new() .must_select_from(&[params.migrate_params.dst_sled_id]) .build(); - let propolis_id = sagactx.lookup::("dst_propolis_id")?; - let resource = osagactx - .nexus() - .reserve_on_random_sled( - propolis_id, - db::model::SledResourceKind::Instance, - resources, - constraints, - ) - .await - .map_err(ActionError::action_failed)?; + let resource = super::instance_common::reserve_vmm_resources( + osagactx.nexus(), + propolis_id, + params.instance.ncpus.0 .0 as u32, + params.instance.memory, + constraints, + ) + .await?; + Ok(resource.sled_id) } @@ -248,6 +165,7 @@ async fn sim_release_sled_resources( ) -> Result<(), anyhow::Error> { let osagactx = sagactx.user_data(); let propolis_id = sagactx.lookup::("dst_propolis_id")?; + osagactx.nexus().delete_sled_reservation(propolis_id).await?; Ok(()) } @@ -261,7 +179,66 @@ async fn sim_allocate_propolis_ip( &sagactx, ¶ms.serialized_authn, ); - allocate_sled_ipv6(&opctx, sagactx, params.migrate_params.dst_sled_id).await + allocate_sled_ipv6( + &opctx, + sagactx.user_data().datastore(), + params.migrate_params.dst_sled_id, + ) + .await +} + +async fn sim_create_vmm_record( + sagactx: NexusActionContext, +) -> Result { + let params = sagactx.saga_params::()?; + let osagactx = sagactx.user_data(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let instance_id = params.instance.id(); + let propolis_id = sagactx.lookup::("dst_propolis_id")?; + let sled_id = sagactx.lookup::("dst_sled_id")?; + let propolis_ip = sagactx.lookup::("dst_propolis_ip")?; + + info!(osagactx.log(), "creating vmm record for migration destination"; + "instance_id" => %instance_id, + "propolis_id" => %propolis_id, + "sled_id" => %sled_id); + + super::instance_common::create_and_insert_vmm_record( + osagactx.datastore(), + &opctx, + instance_id, + propolis_id, + sled_id, + propolis_ip, + nexus_db_model::VmmInitialState::Migrating, + ) + .await +} + +async fn sim_destroy_vmm_record( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let params = sagactx.saga_params::()?; + let osagactx = sagactx.user_data(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let vmm = sagactx.lookup::("dst_vmm_record")?; + info!(osagactx.log(), "destroying vmm record for migration unwind"; + "propolis_id" => %vmm.id); + + super::instance_common::destroy_vmm_record( + osagactx.datastore(), + &opctx, + &vmm, + ) + .await } async fn sim_set_migration_ids( @@ -275,14 +252,24 @@ async fn sim_set_migration_ids( ); let db_instance = ¶ms.instance; + let src_sled_id = params.src_vmm.sled_id; let migration_id = sagactx.lookup::("migrate_id")?; let dst_propolis_id = sagactx.lookup::("dst_propolis_id")?; + + info!(osagactx.log(), "setting migration IDs on migration source sled"; + "instance_id" => %db_instance.id(), + "sled_id" => %src_sled_id, + "migration_id" => %migration_id, + "dst_propolis_id" => %dst_propolis_id, + "prev_runtime_state" => ?db_instance.runtime()); + let updated_record = osagactx .nexus() .instance_set_migration_ids( &opctx, db_instance.id(), - db_instance, + src_sled_id, + db_instance.runtime(), InstanceMigrationSourceParams { dst_propolis_id, migration_id }, ) .await @@ -295,9 +282,16 @@ async fn sim_clear_migration_ids( sagactx: NexusActionContext, ) -> Result<(), anyhow::Error> { let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let src_sled_id = params.src_vmm.sled_id; let db_instance = sagactx.lookup::("set_migration_ids")?; + info!(osagactx.log(), "clearing migration IDs for saga unwind"; + "instance_id" => %db_instance.id(), + "sled_id" => %src_sled_id, + "prev_runtime_state" => ?db_instance.runtime()); + // Because the migration never actually started (and thus didn't finish), // the instance should be at the same Propolis generation as it was when // migration IDs were set, which means sled agent should accept a request to @@ -312,7 +306,11 @@ async fn sim_clear_migration_ids( // as failed. if let Err(e) = osagactx .nexus() - .instance_clear_migration_ids(db_instance.id(), &db_instance) + .instance_clear_migration_ids( + db_instance.id(), + src_sled_id, + db_instance.runtime(), + ) .await { warn!(osagactx.log(), @@ -324,28 +322,6 @@ async fn sim_clear_migration_ids( Ok(()) } -async fn sim_create_destination_state( - sagactx: NexusActionContext, -) -> Result { - let params = sagactx.saga_params::()?; - let mut db_instance = - sagactx.lookup::("set_migration_ids")?; - let dst_propolis_id = sagactx.lookup::("dst_propolis_id")?; - let dst_propolis_ip = sagactx.lookup::("dst_propolis_ip")?; - - // Update the runtime state to refer to the new Propolis. - let new_runtime = db::model::InstanceRuntimeState { - state: db::model::InstanceState::new(InstanceState::Creating), - sled_id: params.migrate_params.dst_sled_id, - propolis_id: dst_propolis_id, - propolis_ip: Some(ipnetwork::Ipv6Network::from(dst_propolis_ip).into()), - ..db_instance.runtime_state - }; - - db_instance.runtime_state = new_runtime; - Ok(db_instance) -} - async fn sim_ensure_destination_propolis( sagactx: NexusActionContext, ) -> Result<(), ActionError> { @@ -355,8 +331,16 @@ async fn sim_ensure_destination_propolis( &sagactx, ¶ms.serialized_authn, ); + + let vmm = sagactx.lookup::("dst_vmm_record")?; let db_instance = - sagactx.lookup::("dst_runtime_state")?; + sagactx.lookup::("set_migration_ids")?; + + info!(osagactx.log(), "ensuring migration destination vmm exists"; + "instance_id" => %db_instance.id(), + "dst_propolis_id" => %vmm.id, + "dst_vmm_state" => ?vmm); + let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) .instance_id(db_instance.id()) .lookup_for(authz::Action::Modify) @@ -365,7 +349,13 @@ async fn sim_ensure_destination_propolis( osagactx .nexus() - .instance_ensure_registered(&opctx, &authz_instance, &db_instance) + .instance_ensure_registered( + &opctx, + &authz_instance, + &db_instance, + &vmm.id, + &vmm, + ) .await .map_err(ActionError::action_failed)?; @@ -381,27 +371,39 @@ async fn sim_ensure_destination_propolis_undo( &sagactx, ¶ms.serialized_authn, ); + + let dst_sled_id = sagactx.lookup::("dst_sled_id")?; let db_instance = - sagactx.lookup::("dst_runtime_state")?; + sagactx.lookup::("set_migration_ids")?; let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) .instance_id(db_instance.id()) .lookup_for(authz::Action::Modify) .await .map_err(ActionError::action_failed)?; + info!(osagactx.log(), "unregistering destination vmm for migration unwind"; + "instance_id" => %db_instance.id(), + "sled_id" => %dst_sled_id, + "prev_runtime_state" => ?db_instance.runtime()); + // Ensure that the destination sled has no Propolis matching the description // the saga previously generated. // - // The updated instance record from this undo action must be dropped so - // that a later undo action (clearing migration IDs) can update the record - // instead. See the saga definition for more details. + // Sled agent guarantees that if an instance is unregistered from a sled + // that does not believe it holds the "active" Propolis for the instance, + // then the sled's copy of the instance record will not change during + // unregistration. This precondition always holds here because the "start + // migration" step is not allowed to unwind once migration has possibly + // started. Not changing the instance is important here because the next + // undo step (clearing migration IDs) needs to advance the instance's + // generation number to succeed. osagactx .nexus() .instance_ensure_unregistered( &opctx, &authz_instance, - &db_instance, - WriteBackUpdatedInstance::Drop, + &dst_sled_id, + db_instance.runtime(), ) .await .map_err(ActionError::action_failed)?; @@ -418,19 +420,26 @@ async fn sim_instance_migrate( &sagactx, ¶ms.serialized_authn, ); - let src_runtime: InstanceRuntimeState = sagactx - .lookup::("set_migration_ids")? - .runtime() - .clone() - .into(); - let dst_db_instance = - sagactx.lookup::("dst_runtime_state")?; + + let db_instance = + sagactx.lookup::("set_migration_ids")?; + + let src_vmm_addr = + SocketAddr::new(params.src_vmm.propolis_ip.ip(), PROPOLIS_PORT); + + let src_propolis_id = db_instance.runtime().propolis_id.unwrap(); + let dst_vmm = sagactx.lookup::("dst_vmm_record")?; let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) - .instance_id(dst_db_instance.id()) + .instance_id(db_instance.id()) .lookup_for(authz::Action::Modify) .await .map_err(ActionError::action_failed)?; + info!(osagactx.log(), "initiating migration from destination sled"; + "instance_id" => %db_instance.id(), + "dst_vmm_record" => ?dst_vmm, + "src_propolis_id" => %src_propolis_id); + // TODO-correctness: This needs to be retried if a transient error occurs to // avoid a problem like the following: // @@ -450,14 +459,12 @@ async fn sim_instance_migrate( .instance_request_state( &opctx, &authz_instance, - &dst_db_instance, - InstanceStateRequested::MigrationTarget( + &db_instance, + &Some(dst_vmm), + InstanceStateChangeRequest::Migrate( InstanceMigrationTargetParams { - src_propolis_addr: src_runtime - .propolis_addr - .unwrap() - .to_string(), - src_propolis_id: src_runtime.propolis_id, + src_propolis_addr: src_vmm_addr.to_string(), + src_propolis_id, }, ), ) @@ -552,26 +559,8 @@ mod tests { .await } - async fn fetch_db_instance( - cptestctx: &ControlPlaneTestContext, - opctx: &nexus_db_queries::context::OpContext, - id: Uuid, - ) -> nexus_db_model::Instance { - let datastore = cptestctx.server.apictx().nexus.datastore().clone(); - let (.., db_instance) = LookupPath::new(&opctx, &datastore) - .instance_id(id) - .fetch() - .await - .expect("test instance should be present in datastore"); - - info!(&cptestctx.logctx.log, "refetched instance from db"; - "instance" => ?db_instance); - - db_instance - } - fn select_first_alternate_sled( - db_instance: &db::model::Instance, + db_vmm: &db::model::Vmm, other_sleds: &[(Uuid, Server)], ) -> Uuid { let default_sled_uuid = @@ -584,7 +573,7 @@ mod tests { panic!("default test sled agent was in other_sleds"); } - if db_instance.runtime().sled_id == default_sled_uuid { + if db_vmm.sled_id == default_sled_uuid { other_sleds[0].0 } else { default_sled_uuid @@ -606,14 +595,14 @@ mod tests { // Poke the instance to get it into the Running state. test_helpers::instance_simulate(cptestctx, &instance.identity.id).await; - let db_instance = - fetch_db_instance(cptestctx, &opctx, instance.identity.id).await; - let old_runtime = db_instance.runtime().clone(); - let dst_sled_id = - select_first_alternate_sled(&db_instance, &other_sleds); + let state = + test_helpers::instance_fetch(cptestctx, instance.identity.id).await; + let vmm = state.vmm().as_ref().unwrap(); + let dst_sled_id = select_first_alternate_sled(vmm, &other_sleds); let params = Params { serialized_authn: authn::saga::Serialized::for_opctx(&opctx), - instance: db_instance, + instance: state.instance().clone(), + src_vmm: vmm.clone(), migrate_params: params::InstanceMigrate { dst_sled_id }, }; @@ -624,12 +613,13 @@ mod tests { // Merely running the migration saga (without simulating any completion // steps in the simulated agents) should not change where the instance // is running. - let new_db_instance = - fetch_db_instance(cptestctx, &opctx, instance.identity.id).await; - assert_eq!(new_db_instance.runtime().sled_id, old_runtime.sled_id); + let new_state = + test_helpers::instance_fetch(cptestctx, state.instance().id()) + .await; + assert_eq!( - new_db_instance.runtime().propolis_id, - old_runtime.propolis_id + new_state.instance().runtime().propolis_id, + state.instance().runtime().propolis_id ); } @@ -649,26 +639,35 @@ mod tests { // Poke the instance to get it into the Running state. test_helpers::instance_simulate(cptestctx, &instance.identity.id).await; - let db_instance = - fetch_db_instance(cptestctx, &opctx, instance.identity.id).await; - let old_runtime = db_instance.runtime().clone(); - let dst_sled_id = - select_first_alternate_sled(&db_instance, &other_sleds); - let make_params = || -> futures::future::BoxFuture<'_, Params> { Box::pin({ async { - let db_instance = fetch_db_instance( + let old_state = test_helpers::instance_fetch( cptestctx, - &opctx, instance.identity.id, ) .await; + + let old_instance = old_state.instance(); + let old_vmm = old_state + .vmm() + .as_ref() + .expect("instance should have a vmm before migrating"); + + let dst_sled_id = + select_first_alternate_sled(old_vmm, &other_sleds); + + info!(log, "setting up new migration saga"; + "old_instance" => ?old_instance, + "src_vmm" => ?old_vmm, + "dst_sled_id" => %dst_sled_id); + Params { serialized_authn: authn::saga::Serialized::for_opctx( &opctx, ), - instance: db_instance, + instance: old_instance.clone(), + src_vmm: old_vmm.clone(), migrate_params: params::InstanceMigrate { dst_sled_id }, } } @@ -681,25 +680,27 @@ mod tests { // Unwinding at any step should clear the migration IDs from // the instance record and leave the instance's location // otherwise untouched. - let new_db_instance = fetch_db_instance( + let new_state = test_helpers::instance_fetch( cptestctx, - &opctx, instance.identity.id, ) .await; - assert!(new_db_instance.runtime().migration_id.is_none()); - assert!(new_db_instance - .runtime() - .dst_propolis_id - .is_none()); + let new_instance = new_state.instance(); + let new_vmm = + new_state.vmm().as_ref().expect("vmm should be active"); + + assert!(new_instance.runtime().migration_id.is_none()); + assert!(new_instance.runtime().dst_propolis_id.is_none()); assert_eq!( - new_db_instance.runtime().sled_id, - old_runtime.sled_id + new_instance.runtime().propolis_id.unwrap(), + new_vmm.id ); - assert_eq!( - new_db_instance.runtime().propolis_id, - old_runtime.propolis_id + + info!( + &log, + "migration saga unwind: stopping instance after failed \ + saga" ); // Ensure the instance can stop. This helps to check that @@ -716,18 +717,28 @@ mod tests { &instance.identity.id, ) .await; - let new_db_instance = fetch_db_instance( + + let new_state = test_helpers::instance_fetch( cptestctx, - &opctx, instance.identity.id, ) .await; + + let new_instance = new_state.instance(); + let new_vmm = new_state.vmm().as_ref(); assert_eq!( - new_db_instance.runtime().state.0, - InstanceState::Stopped + new_instance.runtime().nexus_state.0, + omicron_common::api::external::InstanceState::Stopped ); + assert!(new_instance.runtime().propolis_id.is_none()); + assert!(new_vmm.is_none()); // Restart the instance for the next iteration. + info!( + &log, + "migration saga unwind: restarting instance after \ + failed saga" + ); test_helpers::instance_start( cptestctx, &instance.identity.id, diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index 68e88b0d13..5d02d44b6b 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -4,39 +4,50 @@ //! Implements a saga that starts an instance. -use super::{NexusActionContext, NexusSaga, SagaInitError}; -use crate::app::{ - instance::WriteBackUpdatedInstance, - sagas::{declare_saga_actions, retry_until_known_result}, +use std::net::Ipv6Addr; + +use super::{ + instance_common::allocate_sled_ipv6, NexusActionContext, NexusSaga, + SagaInitError, ACTION_GENERATE_ID, }; +use crate::app::sagas::declare_saga_actions; +use chrono::Utc; use nexus_db_queries::db::{identity::Resource, lookup::LookupPath}; use nexus_db_queries::{authn, authz, db}; use omicron_common::api::external::{Error, InstanceState}; use serde::{Deserialize, Serialize}; -use sled_agent_client::types::InstanceStateRequested; use slog::info; -use steno::ActionError; +use steno::{ActionError, Node}; +use uuid::Uuid; /// Parameters to the instance start saga. #[derive(Debug, Deserialize, Serialize)] pub(crate) struct Params { - pub instance: db::model::Instance, + pub db_instance: db::model::Instance, /// Authentication context to use to fetch the instance's current state from /// the database. pub serialized_authn: authn::saga::Serialized, - - /// True if the saga should configure Dendrite and OPTE configuration for - /// this instance. This allows the instance create saga to do this work - /// prior to invoking the instance start saga as a subsaga without repeating - /// these steps. - pub ensure_network: bool, } declare_saga_actions! { instance_start; - MARK_AS_STARTING -> "starting_state" { + ALLOC_SERVER -> "sled_id" { + + sis_alloc_server + - sis_alloc_server_undo + } + + ALLOC_PROPOLIS_IP -> "propolis_ip" { + + sis_alloc_propolis_ip + } + + CREATE_VMM_RECORD -> "vmm_record" { + + sis_create_vmm_record + - sis_destroy_vmm_record + } + + MARK_AS_STARTING -> "started_record" { + sis_move_to_starting - sis_move_to_starting_undo } @@ -77,6 +88,15 @@ impl NexusSaga for SagaInstanceStart { _params: &Self::Params, mut builder: steno::DagBuilder, ) -> Result { + builder.append(Node::action( + "propolis_id", + "GeneratePropolisId", + ACTION_GENERATE_ID.as_ref(), + )); + + builder.append(alloc_server_action()); + builder.append(alloc_propolis_ip_action()); + builder.append(create_vmm_record_action()); builder.append(mark_as_starting_action()); builder.append(dpd_ensure_action()); builder.append(v2p_ensure_action()); @@ -86,118 +106,200 @@ impl NexusSaga for SagaInstanceStart { } } +async fn sis_alloc_server( + sagactx: NexusActionContext, +) -> Result { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let hardware_threads = params.db_instance.ncpus.0; + let reservoir_ram = params.db_instance.memory; + let propolis_id = sagactx.lookup::("propolis_id")?; + + let resource = super::instance_common::reserve_vmm_resources( + osagactx.nexus(), + propolis_id, + hardware_threads.0 as u32, + reservoir_ram, + db::model::SledReservationConstraints::none(), + ) + .await?; + + Ok(resource.sled_id) +} + +async fn sis_alloc_server_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let propolis_id = sagactx.lookup::("propolis_id")?; + + osagactx.nexus().delete_sled_reservation(propolis_id).await?; + Ok(()) +} + +async fn sis_alloc_propolis_ip( + sagactx: NexusActionContext, +) -> Result { + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + let sled_uuid = sagactx.lookup::("sled_id")?; + allocate_sled_ipv6(&opctx, sagactx.user_data().datastore(), sled_uuid).await +} + +async fn sis_create_vmm_record( + sagactx: NexusActionContext, +) -> Result { + let params = sagactx.saga_params::()?; + let osagactx = sagactx.user_data(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + let instance_id = params.db_instance.id(); + let propolis_id = sagactx.lookup::("propolis_id")?; + let sled_id = sagactx.lookup::("sled_id")?; + let propolis_ip = sagactx.lookup::("propolis_ip")?; + + super::instance_common::create_and_insert_vmm_record( + osagactx.datastore(), + &opctx, + instance_id, + propolis_id, + sled_id, + propolis_ip, + nexus_db_model::VmmInitialState::Starting, + ) + .await +} + +async fn sis_destroy_vmm_record( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let params = sagactx.saga_params::()?; + let osagactx = sagactx.user_data(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let vmm = sagactx.lookup::("vmm_record")?; + super::instance_common::destroy_vmm_record( + osagactx.datastore(), + &opctx, + &vmm, + ) + .await +} + async fn sis_move_to_starting( sagactx: NexusActionContext, -) -> Result { +) -> Result { let params = sagactx.saga_params::()?; let osagactx = sagactx.user_data(); - let instance_id = params.instance.id(); + let datastore = osagactx.datastore(); + let instance_id = params.db_instance.id(); + let propolis_id = sagactx.lookup::("propolis_id")?; info!(osagactx.log(), "moving instance to Starting state via saga"; - "instance_id" => %instance_id); + "instance_id" => %instance_id, + "propolis_id" => %propolis_id); let opctx = crate::context::op_context_for_saga_action( &sagactx, ¶ms.serialized_authn, ); - // The saga invoker needs to supply a prior state in which the instance can - // legally be started. This action will try to transition the instance to - // the Starting state; once this succeeds, the instance can't be deleted, so - // it is safe to program its network configuration (if required) and then - // try to start it. - // - // This interlock is not sufficient to handle multiple concurrent instance - // creation sagas. See below. - if !matches!( - params.instance.runtime_state.state.0, - InstanceState::Creating | InstanceState::Stopped, - ) { - return Err(ActionError::action_failed(Error::conflict(&format!( - "instance is in state {}, but must be one of {} or {} to be started", - params.instance.runtime_state.state.0, - InstanceState::Creating, - InstanceState::Stopped - )))); - } + // For idempotency, refetch the instance to see if this step already applied + // its desired update. + let (.., db_instance) = LookupPath::new(&opctx, &datastore) + .instance_id(instance_id) + .fetch_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; - let new_runtime = db::model::InstanceRuntimeState { - state: db::model::InstanceState::new(InstanceState::Starting), - gen: params.instance.runtime_state.gen.next().into(), - ..params.instance.runtime_state - }; + match db_instance.runtime().propolis_id { + // If this saga's Propolis ID is already written to the record, then + // this step must have completed already and is being retried, so + // proceed. + Some(db_id) if db_id == propolis_id => { + info!(osagactx.log(), "start saga: Propolis ID already set"; + "instance_id" => %instance_id); - if !osagactx - .datastore() - .instance_update_runtime(&instance_id, &new_runtime) - .await - .map_err(ActionError::action_failed)? - { - // If the update was not applied, but the desired state is already - // what's in the database, proceed anyway. - // - // TODO(#2315) This logic is not completely correct. It provides - // idempotency in the case where this action moved the instance to - // Starting, but the action was then replayed. It does not handle the - // case where the conflict occurred because a different instance of this - // saga won the race to set the instance to Starting; this will lead to - // two sagas concurrently trying to start the instance. - // - // The correct way to handle this case is to use saga-generated Propolis - // IDs to distinguish between saga executions: the ID must be NULL in - // order to start the instance; if multiple saga executions race, only - // one will write its chosen ID to the record, allowing the sagas to - // determine a winner. - let (.., new_instance) = LookupPath::new(&opctx, &osagactx.datastore()) - .instance_id(instance_id) - .fetch() - .await - .map_err(ActionError::action_failed)?; + Ok(db_instance) + } - if new_instance.runtime_state.gen != new_runtime.gen - || !matches!( - new_instance.runtime_state.state.0, - InstanceState::Starting - ) - { + // If the instance has a different Propolis ID, a competing start saga + // must have started the instance already, so unwind. + Some(_) => { return Err(ActionError::action_failed(Error::conflict( "instance changed state before it could be started", ))); } - info!(osagactx.log(), "start saga: instance was already starting"; - "instance_id" => %instance_id); + // If the instance has no Propolis ID, try to write this saga's chosen + // ID into the instance and put it in the Running state. (While the + // instance is still technically starting up, writing the Propolis ID at + // this point causes the VMM's state, which is Starting, to supersede + // the instance's state, so this won't cause the instance to appear to + // be running before Propolis thinks it has started.) + None => { + let new_runtime = db::model::InstanceRuntimeState { + nexus_state: db::model::InstanceState::new( + InstanceState::Running, + ), + propolis_id: Some(propolis_id), + time_updated: Utc::now(), + gen: db_instance.runtime().gen.next().into(), + ..db_instance.runtime_state + }; + + // Bail if another actor managed to update the instance's state in + // the meantime. + if !osagactx + .datastore() + .instance_update_runtime(&instance_id, &new_runtime) + .await + .map_err(ActionError::action_failed)? + { + return Err(ActionError::action_failed(Error::conflict( + "instance changed state before it could be started", + ))); + } + + let mut new_record = db_instance.clone(); + new_record.runtime_state = new_runtime; + Ok(new_record) + } } - - Ok(new_runtime) } async fn sis_move_to_starting_undo( sagactx: NexusActionContext, ) -> Result<(), anyhow::Error> { - let params = sagactx.saga_params::()?; let osagactx = sagactx.user_data(); + let db_instance = + sagactx.lookup::("started_record")?; + let instance_id = db_instance.id(); info!(osagactx.log(), "start saga failed; returning instance to Stopped"; - "instance_id" => %params.instance.id()); - - let runtime_state = - sagactx.lookup::("starting_state")?; + "instance_id" => %instance_id); - // Don't just restore the old state; if the instance was being created, and - // starting it failed, the instance is now stopped, not creating. let new_runtime = db::model::InstanceRuntimeState { - state: db::model::InstanceState::new(InstanceState::Stopped), - gen: runtime_state.gen.next().into(), - ..runtime_state + nexus_state: db::model::InstanceState::new(InstanceState::Stopped), + propolis_id: None, + gen: db_instance.runtime_state.gen.next().into(), + ..db_instance.runtime_state }; if !osagactx .datastore() - .instance_update_runtime(¶ms.instance.id(), &new_runtime) + .instance_update_runtime(&instance_id, &new_runtime) .await? { info!(osagactx.log(), "did not return instance to Stopped: old generation number"; - "instance_id" => %params.instance.id()); + "instance_id" => %instance_id); } Ok(()) @@ -208,25 +310,20 @@ async fn sis_dpd_ensure( ) -> Result<(), ActionError> { let params = sagactx.saga_params::()?; let osagactx = sagactx.user_data(); - if !params.ensure_network { - info!(osagactx.log(), "start saga: skipping dpd_ensure by request"; - "instance_id" => %params.instance.id()); - - return Ok(()); - } + let db_instance = + sagactx.lookup::("started_record")?; + let instance_id = db_instance.id(); info!(osagactx.log(), "start saga: ensuring instance dpd configuration"; - "instance_id" => %params.instance.id()); + "instance_id" => %instance_id); let opctx = crate::context::op_context_for_saga_action( &sagactx, ¶ms.serialized_authn, ); let datastore = osagactx.datastore(); - let runtime_state = - sagactx.lookup::("starting_state")?; - let sled_uuid = runtime_state.sled_id; + let sled_uuid = sagactx.lookup::("sled_id")?; let (.., sled) = LookupPath::new(&osagactx.nexus().opctx_alloc, &datastore) .sled_id(sled_uuid) .fetch() @@ -251,7 +348,7 @@ async fn sis_dpd_ensure( .nexus() .instance_ensure_dpd_config( &opctx, - params.instance.id(), + instance_id, &sled.address(), None, dpd_client, @@ -267,57 +364,27 @@ async fn sis_dpd_ensure_undo( sagactx: NexusActionContext, ) -> Result<(), anyhow::Error> { let params = sagactx.saga_params::()?; + let instance_id = params.db_instance.id(); let osagactx = sagactx.user_data(); let log = osagactx.log(); - if !params.ensure_network { - info!(log, - "start saga: didn't ensure dpd configuration, nothing to undo"; - "instance_id" => %params.instance.id()); - - return Ok(()); - } - - info!(log, "start saga: undoing dpd configuration"; - "instance_id" => %params.instance.id()); - - let datastore = &osagactx.datastore(); let opctx = crate::context::op_context_for_saga_action( &sagactx, ¶ms.serialized_authn, ); - let target_ips = &datastore - .instance_lookup_external_ips(&opctx, params.instance.id()) - .await?; + info!(log, "start saga: undoing dpd configuration"; + "instance_id" => %instance_id); - let boundary_switches = osagactx.nexus().boundary_switches(&opctx).await?; - for switch in boundary_switches { - let dpd_client = - osagactx.nexus().dpd_clients.get(&switch).ok_or_else(|| { - ActionError::action_failed(Error::internal_error(&format!( - "unable to find client for switch {switch}" - ))) - })?; + let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) + .instance_id(instance_id) + .lookup_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; - for ip in target_ips { - let result = retry_until_known_result(log, || async { - dpd_client - .ensure_nat_entry_deleted(log, ip.ip, *ip.first_port) - .await - }) - .await; - - match result { - Ok(_) => { - debug!(log, "successfully deleted nat entry for {ip:#?}"); - Ok(()) - } - Err(e) => Err(Error::internal_error(&format!( - "failed to delete nat entry for {ip:#?} via dpd: {e}" - ))), - }?; - } - } + osagactx + .nexus() + .instance_delete_dpd_config(&opctx, &authz_instance) + .await?; Ok(()) } @@ -327,27 +394,20 @@ async fn sis_v2p_ensure( ) -> Result<(), ActionError> { let params = sagactx.saga_params::()?; let osagactx = sagactx.user_data(); - if !params.ensure_network { - info!(osagactx.log(), "start saga: skipping v2p_ensure by request"; - "instance_id" => %params.instance.id()); - - return Ok(()); - } + let instance_id = params.db_instance.id(); info!(osagactx.log(), "start saga: ensuring v2p mappings are configured"; - "instance_id" => %params.instance.id()); + "instance_id" => %instance_id); let opctx = crate::context::op_context_for_saga_action( &sagactx, ¶ms.serialized_authn, ); - let runtime_state = - sagactx.lookup::("starting_state")?; - let sled_uuid = runtime_state.sled_id; + let sled_uuid = sagactx.lookup::("sled_id")?; osagactx .nexus() - .create_instance_v2p_mappings(&opctx, params.instance.id(), sled_uuid) + .create_instance_v2p_mappings(&opctx, instance_id, sled_uuid) .await .map_err(ActionError::action_failed)?; @@ -359,17 +419,11 @@ async fn sis_v2p_ensure_undo( ) -> Result<(), anyhow::Error> { let params = sagactx.saga_params::()?; let osagactx = sagactx.user_data(); - if !params.ensure_network { - info!(osagactx.log(), - "start saga: didn't ensure v2p configuration, nothing to undo"; - "instance_id" => %params.instance.id()); - - return Ok(()); - } - - let instance_id = params.instance.id(); + let instance_id = params.db_instance.id(); + let sled_id = sagactx.lookup::("sled_id")?; info!(osagactx.log(), "start saga: undoing v2p configuration"; - "instance_id" => %instance_id); + "instance_id" => %instance_id, + "sled_id" => %sled_id); let opctx = crate::context::op_context_for_saga_action( &sagactx, @@ -394,29 +448,32 @@ async fn sis_ensure_registered( ¶ms.serialized_authn, ); let osagactx = sagactx.user_data(); + let db_instance = + sagactx.lookup::("started_record")?; + let instance_id = db_instance.id(); + let sled_id = sagactx.lookup::("sled_id")?; + let vmm_record = sagactx.lookup::("vmm_record")?; + let propolis_id = sagactx.lookup::("propolis_id")?; info!(osagactx.log(), "start saga: ensuring instance is registered on sled"; - "instance_id" => %params.instance.id(), - "sled_id" => %params.instance.runtime().sled_id); - - let (.., authz_instance, mut db_instance) = - LookupPath::new(&opctx, &osagactx.datastore()) - .instance_id(params.instance.id()) - .fetch_for(authz::Action::Modify) - .await - .map_err(ActionError::action_failed)?; + "instance_id" => %instance_id, + "sled_id" => %sled_id); - // The instance is not really being "created" (it already exists from - // the caller's perspective), but if it does not exist on its sled, the - // target sled agent will populate its instance manager with the - // contents of this modified record, and that record needs to allow a - // transition to the Starting state. - db_instance.runtime_state.state = - nexus_db_model::InstanceState(InstanceState::Creating); + let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) + .instance_id(instance_id) + .lookup_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; osagactx .nexus() - .instance_ensure_registered(&opctx, &authz_instance, &db_instance) + .instance_ensure_registered( + &opctx, + &authz_instance, + &db_instance, + &propolis_id, + &vmm_record, + ) .await .map_err(ActionError::action_failed)?; @@ -429,7 +486,8 @@ async fn sis_ensure_registered_undo( let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; let datastore = osagactx.datastore(); - let instance_id = params.instance.id(); + let instance_id = params.db_instance.id(); + let sled_id = sagactx.lookup::("sled_id")?; let opctx = crate::context::op_context_for_saga_action( &sagactx, ¶ms.serialized_authn, @@ -437,8 +495,10 @@ async fn sis_ensure_registered_undo( info!(osagactx.log(), "start saga: unregistering instance from sled"; "instance_id" => %instance_id, - "sled_id" => %params.instance.runtime().sled_id); + "sled_id" => %sled_id); + // Fetch the latest record so that this callee can drive the instance into + // a Failed state if the unregister call fails. let (.., authz_instance, db_instance) = LookupPath::new(&opctx, &datastore) .instance_id(instance_id) .fetch() @@ -450,8 +510,8 @@ async fn sis_ensure_registered_undo( .instance_ensure_unregistered( &opctx, &authz_instance, - &db_instance, - WriteBackUpdatedInstance::WriteBack, + &sled_id, + db_instance.runtime(), ) .await .map_err(ActionError::action_failed)?; @@ -470,12 +530,18 @@ async fn sis_ensure_running( ¶ms.serialized_authn, ); + let db_instance = + sagactx.lookup::("started_record")?; + let db_vmm = sagactx.lookup::("vmm_record")?; + let instance_id = params.db_instance.id(); + let sled_id = sagactx.lookup::("sled_id")?; info!(osagactx.log(), "start saga: ensuring instance is running"; - "instance_id" => %params.instance.id()); + "instance_id" => %instance_id, + "sled_id" => %sled_id); - let (.., authz_instance, db_instance) = LookupPath::new(&opctx, &datastore) - .instance_id(params.instance.id()) - .fetch() + let (.., authz_instance) = LookupPath::new(&opctx, &datastore) + .instance_id(instance_id) + .lookup_for(authz::Action::Modify) .await .map_err(ActionError::action_failed)?; @@ -485,7 +551,8 @@ async fn sis_ensure_running( &opctx, &authz_instance, &db_instance, - InstanceStateRequested::Running, + &Some(db_vmm), + crate::app::instance::InstanceStateChangeRequest::Run, ) .await .map_err(ActionError::action_failed)?; @@ -495,11 +562,8 @@ async fn sis_ensure_running( #[cfg(test)] mod test { + use crate::app::{saga::create_saga_dag, sagas::test_helpers}; use crate::external_api::params; - use crate::{ - app::{saga::create_saga_dag, sagas::test_helpers}, - Nexus, TestInterfaces as _, - }; use dropshot::test_util::ClientTestContext; use nexus_db_queries::authn; use nexus_test_utils::resource_helpers::{ @@ -509,8 +573,6 @@ mod test { use omicron_common::api::external::{ ByteCount, IdentityMetadataCreateParams, InstanceCpuCount, }; - use sled_agent_client::TestInterfaces as _; - use std::sync::Arc; use uuid::Uuid; use super::*; @@ -553,35 +615,6 @@ mod test { .await } - async fn fetch_db_instance( - cptestctx: &ControlPlaneTestContext, - opctx: &nexus_db_queries::context::OpContext, - id: Uuid, - ) -> nexus_db_model::Instance { - let datastore = cptestctx.server.apictx().nexus.datastore().clone(); - let (.., db_instance) = LookupPath::new(&opctx, &datastore) - .instance_id(id) - .fetch() - .await - .expect("test instance should be present in datastore"); - - info!(&cptestctx.logctx.log, "refetched instance from db"; - "instance" => ?db_instance); - - db_instance - } - - async fn instance_simulate( - cptestctx: &ControlPlaneTestContext, - nexus: &Arc, - instance_id: &Uuid, - ) { - info!(&cptestctx.logctx.log, "Poking simulated instance"; - "instance_id" => %instance_id); - let sa = nexus.instance_sled_by_id(instance_id).await.unwrap(); - sa.instance_finish_transition(*instance_id).await; - } - #[nexus_test(server = crate::Server)] async fn test_saga_basic_usage_succeeds( cptestctx: &ControlPlaneTestContext, @@ -592,22 +625,32 @@ mod test { let opctx = test_helpers::test_opctx(cptestctx); let instance = create_instance(client).await; let db_instance = - fetch_db_instance(cptestctx, &opctx, instance.identity.id).await; + test_helpers::instance_fetch(cptestctx, instance.identity.id) + .await + .instance() + .clone(); let params = Params { serialized_authn: authn::saga::Serialized::for_opctx(&opctx), - instance: db_instance, - ensure_network: true, + db_instance, }; let dag = create_saga_dag::(params).unwrap(); let saga = nexus.create_runnable_saga(dag).await.unwrap(); nexus.run_saga(saga).await.expect("Start saga should succeed"); - instance_simulate(cptestctx, nexus, &instance.identity.id).await; - let db_instance = - fetch_db_instance(cptestctx, &opctx, instance.identity.id).await; - assert_eq!(db_instance.runtime().state.0, InstanceState::Running); + test_helpers::instance_simulate(cptestctx, &instance.identity.id).await; + let vmm_state = + test_helpers::instance_fetch(cptestctx, instance.identity.id) + .await + .vmm() + .as_ref() + .expect("running instance should have a vmm") + .runtime + .state + .0; + + assert_eq!(vmm_state, InstanceState::Running); } #[nexus_test(server = crate::Server)] @@ -630,18 +673,16 @@ mod test { || { Box::pin({ async { - let db_instance = fetch_db_instance( + let db_instance = test_helpers::instance_fetch( cptestctx, - &opctx, instance.identity.id, ) - .await; + .await.instance().clone(); Params { serialized_authn: authn::saga::Serialized::for_opctx(&opctx), - instance: db_instance, - ensure_network: true, + db_instance, } } }) @@ -649,20 +690,20 @@ mod test { || { Box::pin({ async { - let new_db_instance = fetch_db_instance( + let new_db_instance = test_helpers::instance_fetch( cptestctx, - &opctx, instance.identity.id, ) - .await; + .await.instance().clone(); info!(log, "fetched instance runtime state after saga execution"; "instance_id" => %instance.identity.id, "instance_runtime" => ?new_db_instance.runtime()); + assert!(new_db_instance.runtime().propolis_id.is_none()); assert_eq!( - new_db_instance.runtime().state.0, + new_db_instance.runtime().nexus_state.0, InstanceState::Stopped ); } @@ -682,20 +723,29 @@ mod test { let opctx = test_helpers::test_opctx(cptestctx); let instance = create_instance(client).await; let db_instance = - fetch_db_instance(cptestctx, &opctx, instance.identity.id).await; + test_helpers::instance_fetch(cptestctx, instance.identity.id) + .await + .instance() + .clone(); let params = Params { serialized_authn: authn::saga::Serialized::for_opctx(&opctx), - instance: db_instance, - ensure_network: true, + db_instance, }; let dag = create_saga_dag::(params).unwrap(); test_helpers::actions_succeed_idempotently(nexus, dag).await; - instance_simulate(cptestctx, nexus, &instance.identity.id).await; - let new_db_instance = - fetch_db_instance(cptestctx, &opctx, instance.identity.id).await; - - assert_eq!(new_db_instance.runtime().state.0, InstanceState::Running); + test_helpers::instance_simulate(cptestctx, &instance.identity.id).await; + let vmm_state = + test_helpers::instance_fetch(cptestctx, instance.identity.id) + .await + .vmm() + .as_ref() + .expect("running instance should have a vmm") + .runtime + .state + .0; + + assert_eq!(vmm_state, InstanceState::Running); } } diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index 8a9fc69f0e..88778e3573 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -23,6 +23,7 @@ pub mod disk_create; pub mod disk_delete; pub mod finalize_disk; pub mod import_blocks_from_url; +mod instance_common; pub mod instance_create; pub mod instance_delete; pub mod instance_migrate; @@ -369,7 +370,7 @@ where )) } - // Anything elses is a permanent error + // Anything else is a permanent error _ => Err(backoff::BackoffError::Permanent( progenitor_client::Error::ErrorResponse( response_value, diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 0b3c5c99d7..5a686b2f3d 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -130,7 +130,7 @@ pub(crate) struct Params { pub silo_id: Uuid, pub project_id: Uuid, pub disk_id: Uuid, - pub use_the_pantry: bool, + pub attached_instance_and_sled: Option<(Uuid, Uuid)>, pub create_params: params::SnapshotCreate, } @@ -251,7 +251,8 @@ impl NexusSaga for SagaSnapshotCreate { // (DB) Tracks virtual resource provisioning. builder.append(space_account_action()); - if !params.use_the_pantry { + let use_the_pantry = params.attached_instance_and_sled.is_none(); + if !use_the_pantry { // (Sleds) If the disk is attached to an instance, send a // snapshot request to sled-agent to create a ZFS snapshot. builder.append(send_snapshot_request_to_sled_agent_action()); @@ -283,7 +284,7 @@ impl NexusSaga for SagaSnapshotCreate { // (DB) Mark snapshot as "ready" builder.append(finalize_snapshot_record_action()); - if params.use_the_pantry { + if use_the_pantry { // (Pantry) Set the state back to Detached // // This has to be the last saga node! Otherwise, concurrent @@ -669,67 +670,47 @@ async fn ssc_send_snapshot_request_to_sled_agent( let log = sagactx.user_data().log(); let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - let snapshot_id = sagactx.lookup::("snapshot_id")?; - // Find if this disk is attached to an instance - let (.., disk) = LookupPath::new(&opctx, &osagactx.datastore()) - .disk_id(params.disk_id) - .fetch() - .await - .map_err(ActionError::action_failed)?; - - match disk.runtime().attach_instance_id { - Some(instance_id) => { - info!(log, "disk {} instance is {}", disk.id(), instance_id); - - // Get the instance's sled agent client - let (.., instance) = LookupPath::new(&opctx, &osagactx.datastore()) - .instance_id(instance_id) - .fetch() - .await - .map_err(ActionError::action_failed)?; + // If this node was reached, the saga initiator thought the disk was + // attached to an instance that was running on a specific sled. Contact that + // sled and ask it to initiate a snapshot. Note that this is best-effort: + // the instance may have stopped (or may be have stopped, had the disk + // detached, and resumed running on the same sled) while the saga was + // executing. + let (instance_id, sled_id) = + params.attached_instance_and_sled.ok_or_else(|| { + ActionError::action_failed(Error::internal_error( + "snapshot saga in send_snapshot_request_to_sled_agent but no \ + instance/sled pair was provided", + )) + })?; - let sled_agent_client = osagactx - .nexus() - .instance_sled(&instance) - .await - .map_err(ActionError::action_failed)?; + info!(log, "asking for disk snapshot from Propolis via sled agent"; + "disk_id" => %params.disk_id, + "instance_id" => %instance_id, + "sled_id" => %sled_id); - info!(log, "instance {} sled agent created ok", instance_id); + let sled_agent_client = osagactx + .nexus() + .sled_client(&sled_id) + .await + .map_err(ActionError::action_failed)?; - // Send a snapshot request to propolis through sled agent - retry_until_known_result(log, || async { - sled_agent_client - .instance_issue_disk_snapshot_request( - &instance.id(), - &disk.id(), - &InstanceIssueDiskSnapshotRequestBody { snapshot_id }, - ) - .await - }) + retry_until_known_result(log, || async { + sled_agent_client + .instance_issue_disk_snapshot_request( + &instance_id, + ¶ms.disk_id, + &InstanceIssueDiskSnapshotRequestBody { snapshot_id }, + ) .await - .map_err(|e| e.to_string()) - .map_err(ActionError::action_failed)?; - Ok(()) - } - - None => { - // This branch shouldn't be seen unless there's a detach that occurs - // after the saga starts. - error!(log, "disk {} not attached to an instance!", disk.id()); + }) + .await + .map_err(|e| e.to_string()) + .map_err(ActionError::action_failed)?; - Err(ActionError::action_failed(Error::ServiceUnavailable { - internal_message: - "disk detached after snapshot_create saga started!" - .to_string(), - })) - } - } + Ok(()) } async fn ssc_send_snapshot_request_to_sled_agent_undo( @@ -1566,7 +1547,6 @@ mod test { use crate::app::saga::create_saga_dag; use crate::app::sagas::test_helpers; - use crate::app::test_interfaces::TestInterfaces; use crate::external_api::shared::IpRange; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::{ @@ -1574,6 +1554,7 @@ mod test { }; use dropshot::test_util::ClientTestContext; use nexus_db_queries::context::OpContext; + use nexus_db_queries::db::datastore::InstanceAndActiveVmm; use nexus_db_queries::db::DataStore; use nexus_test_utils::resource_helpers::create_disk; use nexus_test_utils::resource_helpers::create_ip_pool; @@ -1810,14 +1791,14 @@ mod test { project_id: Uuid, disk_id: Uuid, disk: NameOrId, - use_the_pantry: bool, + instance_and_sled: Option<(Uuid, Uuid)>, ) -> Params { Params { serialized_authn: authn::saga::Serialized::for_opctx(opctx), silo_id, project_id, disk_id, - use_the_pantry, + attached_instance_and_sled: instance_and_sled, create_params: params::SnapshotCreate { identity: IdentityMetadataCreateParams { name: "my-snapshot".parse().expect("Invalid disk name"), @@ -1866,7 +1847,7 @@ mod test { project_id, disk_id, Name::from_str(DISK_NAME).unwrap().into(), - true, + None, ); let dag = create_saga_dag::(params).unwrap(); let runnable_saga = nexus.create_runnable_saga(dag).await.unwrap(); @@ -1941,7 +1922,7 @@ mod test { cptestctx: &ControlPlaneTestContext, client: &ClientTestContext, disks_to_attach: Vec, - ) { + ) -> InstanceAndActiveVmm { let instances_url = format!("/v1/instances?project={}", PROJECT_NAME,); let instance: Instance = object_create( client, @@ -1966,11 +1947,49 @@ mod test { ) .await; - // cannot snapshot attached disk for instance in state starting + // Read out the instance's assigned sled, then poke the instance to get + // it from the Starting state to the Running state so the test disk can + // be snapshotted. let nexus = &cptestctx.server.apictx().nexus; - let sa = - nexus.instance_sled_by_id(&instance.identity.id).await.unwrap(); + let opctx = test_opctx(&cptestctx); + let (.., authz_instance) = LookupPath::new(&opctx, nexus.datastore()) + .instance_id(instance.identity.id) + .lookup_for(authz::Action::Read) + .await + .unwrap(); + + let instance_state = nexus + .datastore() + .instance_fetch_with_vmm(&opctx, &authz_instance) + .await + .unwrap(); + + let sled_id = instance_state + .sled_id() + .expect("starting instance should have a sled"); + let sa = nexus.sled_client(&sled_id).await.unwrap(); + sa.instance_finish_transition(instance.identity.id).await; + let instance_state = nexus + .datastore() + .instance_fetch_with_vmm(&opctx, &authz_instance) + .await + .unwrap(); + + let new_state = instance_state + .vmm() + .as_ref() + .expect("running instance should have a sled") + .runtime + .state + .0; + + assert_eq!( + new_state, + omicron_common::api::external::InstanceState::Running + ); + + instance_state } #[nexus_test(server = crate::Server)] @@ -2053,8 +2072,8 @@ mod test { // since this is just a test, bypass the normal // attachment machinery and just update the disk's // database record directly. - if !use_the_pantry { - setup_test_instance( + let instance_and_sled = if !use_the_pantry { + let state = setup_test_instance( cptestctx, client, vec![params::InstanceDiskAttachment::Attach( @@ -2065,7 +2084,15 @@ mod test { )], ) .await; - } + + let sled_id = state + .sled_id() + .expect("running instance should have a vmm"); + + Some((state.instance().id(), sled_id)) + } else { + None + }; new_test_params( &opctx, @@ -2073,7 +2100,7 @@ mod test { project_id, disk_id, Name::from_str(DISK_NAME).unwrap().into(), - use_the_pantry, + instance_and_sled, ) } }) @@ -2169,8 +2196,8 @@ mod test { project_id, disk_id, Name::from_str(DISK_NAME).unwrap().into(), - // set use_the_pantry to true, disk is unattached at time of saga creation - true, + // The disk isn't attached at this time, so don't supply a sled. + None, ); let dag = create_saga_dag::(params).unwrap(); @@ -2233,8 +2260,8 @@ mod test { project_id, disk_id, Name::from_str(DISK_NAME).unwrap().into(), - // set use_the_pantry to true, disk is unattached at time of saga creation - true, + // The disk isn't attached at this time, so don't supply a sled. + None, ); let dag = create_saga_dag::(params).unwrap(); @@ -2272,14 +2299,23 @@ mod test { let silo_id = authz_silo.id(); let project_id = authz_project.id(); + // Synthesize an instance ID to pass to the saga, but use the default + // test sled ID. This will direct a snapshot request to the simulated + // sled agent specifying an instance it knows nothing about, which is + // equivalent to creating an instance, attaching the test disk, creating + // the saga, stopping the instance, detaching the disk, and then letting + // the saga run. + let fake_instance_id = Uuid::new_v4(); + let fake_sled_id = + Uuid::parse_str(nexus_test_utils::SLED_AGENT_UUID).unwrap(); + let params = new_test_params( &opctx, silo_id, project_id, disk_id, Name::from_str(DISK_NAME).unwrap().into(), - // set use_the_pantry to true, disk is attached at time of saga creation - false, + Some((fake_instance_id, fake_sled_id)), ); let dag = create_saga_dag::(params).unwrap(); @@ -2303,19 +2339,10 @@ mod test { .await .expect("failed to detach disk")); - // Actually run the saga + // Actually run the saga. This should fail. let output = nexus.run_saga(runnable_saga).await; - // Expect to see 503 - match output { - Err(e) => { - assert!(matches!(e, Error::ServiceUnavailable { .. })); - } - - Ok(_) => { - assert!(false); - } - } + assert!(output.is_err()); // Attach the disk to an instance, then rerun the saga populate_ip_pool( @@ -2331,7 +2358,7 @@ mod test { ) .await; - setup_test_instance( + let instance_state = setup_test_instance( cptestctx, client, vec![params::InstanceDiskAttachment::Attach( @@ -2342,6 +2369,10 @@ mod test { ) .await; + let sled_id = instance_state + .sled_id() + .expect("running instance should have a vmm"); + // Rerun the saga let params = new_test_params( &opctx, @@ -2349,8 +2380,7 @@ mod test { project_id, disk_id, Name::from_str(DISK_NAME).unwrap().into(), - // set use_the_pantry to false, disk is attached at time of saga creation - false, + Some((instance_state.instance().id(), sled_id)), ); let dag = create_saga_dag::(params).unwrap(); diff --git a/nexus/src/app/sagas/test_helpers.rs b/nexus/src/app/sagas/test_helpers.rs index aa9334b682..eccb013b66 100644 --- a/nexus/src/app/sagas/test_helpers.rs +++ b/nexus/src/app/sagas/test_helpers.rs @@ -15,7 +15,11 @@ use async_bb8_diesel::{ }; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; use futures::future::BoxFuture; -use nexus_db_queries::{context::OpContext, db::DataStore}; +use nexus_db_queries::{ + authz, + context::OpContext, + db::{datastore::InstanceAndActiveVmm, lookup::LookupPath, DataStore}, +}; use nexus_types::identity::Resource; use omicron_common::api::external::NameOrId; use sled_agent_client::TestInterfaces as _; @@ -123,7 +127,12 @@ pub(crate) async fn instance_simulate( info!(&cptestctx.logctx.log, "Poking simulated instance"; "instance_id" => %instance_id); let nexus = &cptestctx.server.apictx().nexus; - let sa = nexus.instance_sled_by_id(instance_id).await.unwrap(); + let sa = nexus + .instance_sled_by_id(instance_id) + .await + .unwrap() + .expect("instance must be on a sled to simulate a state change"); + sa.instance_finish_transition(*instance_id).await; } @@ -147,10 +156,38 @@ pub(crate) async fn instance_simulate_by_name( let instance_lookup = nexus.instance_lookup(&opctx, instance_selector).unwrap(); let (.., instance) = instance_lookup.fetch().await.unwrap(); - let sa = nexus.instance_sled_by_id(&instance.id()).await.unwrap(); + let sa = nexus + .instance_sled_by_id(&instance.id()) + .await + .unwrap() + .expect("instance must be on a sled to simulate a state change"); sa.instance_finish_transition(instance.id()).await; } +pub async fn instance_fetch( + cptestctx: &ControlPlaneTestContext, + instance_id: Uuid, +) -> InstanceAndActiveVmm { + let datastore = cptestctx.server.apictx().nexus.datastore().clone(); + let opctx = test_opctx(&cptestctx); + let (.., authz_instance) = LookupPath::new(&opctx, &datastore) + .instance_id(instance_id) + .lookup_for(authz::Action::Read) + .await + .expect("test instance should be present in datastore"); + + let db_state = datastore + .instance_fetch_with_vmm(&opctx, &authz_instance) + .await + .expect("test instance's info should be fetchable"); + + info!(&cptestctx.logctx.log, "refetched instance info from db"; + "instance_id" => %instance_id, + "instance_and_vmm" => ?db_state); + + db_state +} + /// Tests that the saga described by `dag` succeeds if each of its nodes is /// repeated. /// diff --git a/nexus/src/app/snapshot.rs b/nexus/src/app/snapshot.rs index 06ac140606..0c90ac31fb 100644 --- a/nexus/src/app/snapshot.rs +++ b/nexus/src/app/snapshot.rs @@ -93,41 +93,43 @@ impl super::Nexus { // If there isn't a running propolis, Nexus needs to use the Crucible // Pantry to make this snapshot - let use_the_pantry = if let Some(attach_instance_id) = + let instance_and_sled = if let Some(attach_instance_id) = &db_disk.runtime_state.attach_instance_id { - let (.., db_instance) = LookupPath::new(opctx, &self.db_datastore) - .instance_id(*attach_instance_id) - .fetch_for(authz::Action::Read) + let (.., authz_instance) = + LookupPath::new(opctx, &self.db_datastore) + .instance_id(*attach_instance_id) + .lookup_for(authz::Action::Read) + .await?; + + let instance_state = self + .datastore() + .instance_fetch_with_vmm(&opctx, &authz_instance) .await?; - let instance_state: InstanceState = db_instance.runtime().state.0; - - match instance_state { - // If there's a propolis running, use that - InstanceState::Running | - // Rebooting doesn't deactivate the volume - InstanceState::Rebooting - => false, - - // If there's definitely no propolis running, then use the - // pantry - InstanceState::Stopped | InstanceState::Destroyed => true, - - // If there *may* be a propolis running, then fail: we can't - // know if that propolis has activated the Volume or not, or if - // it's in the process of deactivating. - _ => { - return Err( - Error::invalid_request( - &format!("cannot snapshot attached disk for instance in state {}", instance_state) - ) - ); - } + match instance_state.vmm().as_ref() { + None => None, + Some(vmm) => match vmm.runtime.state.0 { + // If the VM might be running, or it's rebooting (which + // doesn't deactivate the volume), send the snapshot request + // to the relevant VMM. Otherwise, there's no way to know if + // the instance has attached the volume or is in the process + // of detaching it, so bail. + InstanceState::Running | InstanceState::Rebooting => { + Some((*attach_instance_id, vmm.sled_id)) + } + _ => { + return Err(Error::invalid_request(&format!( + "cannot snapshot attached disk for instance in \ + state {}", + vmm.runtime.state.0 + ))); + } + }, } } else { // This disk is not attached to an instance, use the pantry. - true + None }; let saga_params = sagas::snapshot_create::Params { @@ -135,7 +137,7 @@ impl super::Nexus { silo_id: authz_silo.id(), project_id: authz_project.id(), disk_id: authz_disk.id(), - use_the_pantry, + attached_instance_and_sled: instance_and_sled, create_params: params.clone(), }; diff --git a/nexus/src/app/test_interfaces.rs b/nexus/src/app/test_interfaces.rs index 17ea205cbb..486569333e 100644 --- a/nexus/src/app/test_interfaces.rs +++ b/nexus/src/app/test_interfaces.rs @@ -22,17 +22,20 @@ pub trait TestInterfaces { async fn instance_sled_by_id( &self, id: &Uuid, - ) -> Result, Error>; + ) -> Result>, Error>; - /// Returns the SledAgentClient for a Disk from its id. + /// Returns the SledAgentClient for the sled running an instance to which a + /// disk is attached. async fn disk_sled_by_id( &self, id: &Uuid, - ) -> Result, Error>; + ) -> Result>, Error>; /// Returns the supplied instance's current active sled ID. - async fn instance_sled_id(&self, instance_id: &Uuid) - -> Result; + async fn instance_sled_id( + &self, + instance_id: &Uuid, + ) -> Result, Error>; async fn set_disk_as_faulted(&self, disk_id: &Uuid) -> Result; @@ -48,22 +51,19 @@ impl TestInterfaces for super::Nexus { async fn instance_sled_by_id( &self, id: &Uuid, - ) -> Result, Error> { - let opctx = OpContext::for_tests( - self.log.new(o!()), - Arc::clone(&self.db_datastore), - ); - let (.., db_instance) = LookupPath::new(&opctx, &self.db_datastore) - .instance_id(*id) - .fetch() - .await?; - self.instance_sled(&db_instance).await + ) -> Result>, Error> { + let sled_id = self.instance_sled_id(id).await?; + if let Some(sled_id) = sled_id { + Ok(Some(self.sled_client(&sled_id).await?)) + } else { + Ok(None) + } } async fn disk_sled_by_id( &self, id: &Uuid, - ) -> Result, Error> { + ) -> Result>, Error> { let opctx = OpContext::for_tests( self.log.new(o!()), Arc::clone(&self.db_datastore), @@ -72,23 +72,27 @@ impl TestInterfaces for super::Nexus { .disk_id(*id) .fetch() .await?; - let (.., db_instance) = LookupPath::new(&opctx, &self.db_datastore) - .instance_id(db_disk.runtime().attach_instance_id.unwrap()) - .fetch() - .await?; - self.instance_sled(&db_instance).await + + self.instance_sled_by_id(&db_disk.runtime().attach_instance_id.unwrap()) + .await } - async fn instance_sled_id(&self, id: &Uuid) -> Result { + async fn instance_sled_id(&self, id: &Uuid) -> Result, Error> { let opctx = OpContext::for_tests( self.log.new(o!()), Arc::clone(&self.db_datastore), ); - let (.., db_instance) = LookupPath::new(&opctx, &self.db_datastore) + + let (.., authz_instance) = LookupPath::new(&opctx, &self.db_datastore) .instance_id(*id) - .fetch() + .lookup_for(nexus_db_queries::authz::Action::Read) .await?; - Ok(db_instance.runtime().sled_id) + + Ok(self + .datastore() + .instance_fetch_with_vmm(&opctx, &authz_instance) + .await? + .sled_id()) } async fn set_disk_as_faulted(&self, disk_id: &Uuid) -> Result { diff --git a/nexus/src/cidata.rs b/nexus/src/cidata.rs index d35b3f8256..8f776501b6 100644 --- a/nexus/src/cidata.rs +++ b/nexus/src/cidata.rs @@ -21,7 +21,7 @@ impl InstanceCiData for Instance { // cloud-init meta-data is YAML, but YAML is a strict superset of JSON. let meta_data = serde_json::to_vec(&MetaData { instance_id: self.id(), - local_hostname: &self.runtime().hostname, + local_hostname: &self.hostname, public_keys, }) .map_err(|_| Error::internal_error("failed to serialize meta-data"))?; diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index ac5cf76775..1fddfba85b 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -1926,8 +1926,13 @@ async fn instance_view( }; let instance_lookup = nexus.instance_lookup(&opctx, instance_selector)?; - let (.., instance) = instance_lookup.fetch().await?; - Ok(HttpResponseOk(instance.into())) + let (.., authz_instance) = + instance_lookup.lookup_for(authz::Action::Read).await?; + let instance_and_vmm = nexus + .datastore() + .instance_fetch_with_vmm(&opctx, &authz_instance) + .await?; + Ok(HttpResponseOk(instance_and_vmm.into())) }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } @@ -2110,7 +2115,7 @@ async fn instance_serial_console( let instance_lookup = nexus.instance_lookup(&opctx, instance_selector)?; let data = nexus - .instance_serial_console_data(&instance_lookup, &query) + .instance_serial_console_data(&opctx, &instance_lookup, &query) .await?; Ok(HttpResponseOk(data)) }; @@ -2148,6 +2153,7 @@ async fn instance_serial_console_stream( Ok(instance_lookup) => { nexus .instance_serial_console_stream( + &opctx, client_stream, &instance_lookup, &query, diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index a99d386349..ebb21feb40 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -34,8 +34,8 @@ use omicron_common::api::external::http_pagination::PaginatedById; use omicron_common::api::external::http_pagination::ScanById; use omicron_common::api::external::http_pagination::ScanParams; use omicron_common::api::internal::nexus::DiskRuntimeState; -use omicron_common::api::internal::nexus::InstanceRuntimeState; use omicron_common::api::internal::nexus::ProducerEndpoint; +use omicron_common::api::internal::nexus::SledInstanceState; use omicron_common::api::internal::nexus::UpdateArtifactId; use oximeter::types::ProducerResults; use oximeter_producer::{collect, ProducerIdPathParams}; @@ -250,7 +250,7 @@ struct InstancePathParam { async fn cpapi_instances_put( rqctx: RequestContext>, path_params: Path, - new_runtime_state: TypedBody, + new_runtime_state: TypedBody, ) -> Result { let apictx = rqctx.context(); let nexus = &apictx.nexus; diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index 20f4b90b1b..71a3977192 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -172,7 +172,11 @@ async fn set_instance_state( } async fn instance_simulate(nexus: &Arc, id: &Uuid) { - let sa = nexus.instance_sled_by_id(id).await.unwrap(); + let sa = nexus + .instance_sled_by_id(id) + .await + .unwrap() + .expect("instance must be on a sled to simulate a state change"); sa.instance_finish_transition(*id).await; } diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 83fff2fbab..b8fcc9f2cb 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -87,6 +87,10 @@ fn get_instance_url(instance_name: &str) -> String { format!("/v1/instances/{}?{}", instance_name, get_project_selector()) } +fn get_instance_start_url(instance_name: &str) -> String { + format!("/v1/instances/{}/start?{}", instance_name, get_project_selector()) +} + fn get_disks_url() -> String { format!("/v1/disks?{}", get_project_selector()) } @@ -574,12 +578,20 @@ async fn test_instance_start_creates_networking_state( let opctx = OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); - let (.., authz_instance, db_instance) = LookupPath::new(&opctx, &datastore) + let (.., authz_instance) = LookupPath::new(&opctx, &datastore) .instance_id(instance.identity.id) - .fetch() + .lookup_for(nexus_db_queries::authz::Action::Read) .await .unwrap(); + let instance_state = datastore + .instance_fetch_with_vmm(&opctx, &authz_instance) + .await + .unwrap(); + + let sled_id = + instance_state.sled_id().expect("running instance should have a sled"); + let guest_nics = datastore .derive_guest_network_interface_info(&opctx, &authz_instance) .await @@ -589,7 +601,7 @@ async fn test_instance_start_creates_networking_state( for agent in &sled_agents { // TODO(#3107) Remove this bifurcation when Nexus programs all mappings // itself. - if agent.id != db_instance.runtime().sled_id { + if agent.id != sled_id { assert_sled_v2p_mappings( agent, &nics[0], @@ -645,7 +657,12 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { let instance_next = instance_get(&client, &instance_url).await; assert_eq!(instance_next.runtime.run_state, InstanceState::Running); - let original_sled = nexus.instance_sled_id(&instance_id).await.unwrap(); + let original_sled = nexus + .instance_sled_id(&instance_id) + .await + .unwrap() + .expect("running instance should have a sled"); + let dst_sled_id = if original_sled == default_sled_id { other_sled_id } else { @@ -666,7 +683,12 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { .parsed_body::() .unwrap(); - let current_sled = nexus.instance_sled_id(&instance_id).await.unwrap(); + let current_sled = nexus + .instance_sled_id(&instance_id) + .await + .unwrap() + .expect("running instance should have a sled"); + assert_eq!(current_sled, original_sled); // Explicitly simulate the migration action on the target. Simulated @@ -678,7 +700,12 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { let instance = instance_get(&client, &instance_url).await; assert_eq!(instance.runtime.run_state, InstanceState::Running); - let current_sled = nexus.instance_sled_id(&instance_id).await.unwrap(); + let current_sled = nexus + .instance_sled_id(&instance_id) + .await + .unwrap() + .expect("migrated instance should still have a sled"); + assert_eq!(current_sled, dst_sled_id); } @@ -752,7 +779,11 @@ async fn test_instance_migrate_v2p(cptestctx: &ControlPlaneTestContext) { .derive_guest_network_interface_info(&opctx, &authz_instance) .await .unwrap(); - let original_sled_id = nexus.instance_sled_id(&instance_id).await.unwrap(); + let original_sled_id = nexus + .instance_sled_id(&instance_id) + .await + .unwrap() + .expect("running instance should have a sled"); let mut sled_agents = vec![cptestctx.sled_agent.sled_agent.clone()]; sled_agents.extend(other_sleds.iter().map(|tup| tup.1.sled_agent.clone())); @@ -806,7 +837,11 @@ async fn test_instance_migrate_v2p(cptestctx: &ControlPlaneTestContext) { instance_simulate_on_sled(cptestctx, nexus, dst_sled_id, instance_id).await; let instance = instance_get(&client, &instance_url).await; assert_eq!(instance.runtime.run_state, InstanceState::Running); - let current_sled = nexus.instance_sled_id(&instance_id).await.unwrap(); + let current_sled = nexus + .instance_sled_id(&instance_id) + .await + .unwrap() + .expect("migrated instance should have a sled"); assert_eq!(current_sled, dst_sled_id); for sled_agent in &sled_agents { @@ -1050,7 +1085,7 @@ async fn test_instances_delete_fails_when_running_succeeds_when_stopped( .unwrap(); assert_eq!( error.message, - "instance cannot be deleted in state \"running\"" + "cannot delete instance: instance is running or has not yet fully stopped" ); // Stop the instance @@ -2816,16 +2851,22 @@ async fn test_disks_detached_when_instance_destroyed( assert!(matches!(disk.state, DiskState::Attached(_))); } - // Stop and delete instance + // Stash the instance's current sled agent for later disk simulation. This + // needs to be done before the instance is stopped and dissociated from its + // sled. let instance_url = format!("/v1/instances/nfs?project={}", PROJECT_NAME); - let instance = - instance_post(&client, instance_name, InstanceOp::Stop).await; - + let instance = instance_get(&client, &instance_url).await; let apictx = &cptestctx.server.apictx(); let nexus = &apictx.nexus; + let sa = nexus + .instance_sled_by_id(&instance.identity.id) + .await + .unwrap() + .expect("instance should be on a sled while it's running"); - // Store the sled agent for this instance for later disk simulation - let sa = nexus.instance_sled_by_id(&instance.identity.id).await.unwrap(); + // Stop and delete instance + let instance = + instance_post(&client, instance_name, InstanceOp::Stop).await; instance_simulate(nexus, &instance.identity.id).await; let instance = instance_get(&client, &instance_url).await; @@ -3042,20 +3083,40 @@ async fn test_instances_memory_greater_than_max_size( assert!(error.message.contains("memory must be less than")); } -async fn expect_instance_creation_fail_unavailable( +async fn expect_instance_start_fail_unavailable( client: &ClientTestContext, - url_instances: &str, - instance_params: ¶ms::InstanceCreate, + instance_name: &str, ) { - let builder = - RequestBuilder::new(client, http::Method::POST, &url_instances) - .body(Some(&instance_params)) - .expect_status(Some(http::StatusCode::SERVICE_UNAVAILABLE)); + let builder = RequestBuilder::new( + client, + http::Method::POST, + &get_instance_start_url(instance_name), + ) + .expect_status(Some(http::StatusCode::SERVICE_UNAVAILABLE)); + NexusRequest::new(builder) .authn_as(AuthnMode::PrivilegedUser) .execute() .await - .expect("Expected instance creation to fail with SERVICE_UNAVAILABLE!"); + .expect("Expected instance start to fail with SERVICE_UNAVAILABLE"); +} + +async fn expect_instance_start_ok( + client: &ClientTestContext, + instance_name: &str, +) { + let builder = RequestBuilder::new( + client, + http::Method::POST, + &get_instance_start_url(instance_name), + ) + .expect_status(Some(http::StatusCode::ACCEPTED)); + + NexusRequest::new(builder) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Expected instance start to succeed with 202 Accepted"); } async fn expect_instance_creation_ok( @@ -3074,17 +3135,6 @@ async fn expect_instance_creation_ok( .expect("Expected instance creation to work!"); } -async fn expect_instance_deletion_ok( - client: &ClientTestContext, - url_instances: &str, -) { - NexusRequest::object_delete(client, &url_instances) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap(); -} - #[nexus_test] async fn test_cannot_provision_instance_beyond_cpu_capacity( cptestctx: &ControlPlaneTestContext, @@ -3093,59 +3143,65 @@ async fn test_cannot_provision_instance_beyond_cpu_capacity( create_project(client, PROJECT_NAME).await; populate_ip_pool(&client, "default", None).await; - let too_many_cpus = InstanceCpuCount::try_from(i64::from( - nexus_test_utils::TEST_HARDWARE_THREADS + 1, - )) - .unwrap(); - let enough_cpus = InstanceCpuCount::try_from(i64::from( - nexus_test_utils::TEST_HARDWARE_THREADS, - )) - .unwrap(); + // The third item in each tuple specifies whether instance start should + // succeed or fail if all these configs are visited in order and started in + // sequence. Note that for this reason the order of these elements matters. + let configs = vec![ + ("too-many-cpus", nexus_test_utils::TEST_HARDWARE_THREADS + 1, Err(())), + ("just-right-cpus", nexus_test_utils::TEST_HARDWARE_THREADS, Ok(())), + ( + "insufficient-space", + nexus_test_utils::TEST_HARDWARE_THREADS, + Err(()), + ), + ]; - // Try to boot an instance that uses more CPUs than we have - // on our test sled setup. - let name1 = Name::try_from(String::from("test")).unwrap(); - let mut instance_params = params::InstanceCreate { - identity: IdentityMetadataCreateParams { - name: name1.clone(), - description: String::from("probably serving data"), - }, - ncpus: too_many_cpus, - memory: ByteCount::from_gibibytes_u32(4), - hostname: String::from("test"), - user_data: vec![], - network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, - external_ips: vec![], - disks: vec![], - start: false, - }; - let url_instances = get_instances_url(); + // Creating all the instances should succeed, even though there will never + // be enough space to run the too-large instance. + let mut instances = Vec::new(); + for config in &configs { + let name = Name::try_from(config.0.to_string()).unwrap(); + let ncpus = InstanceCpuCount::try_from(i64::from(config.1)).unwrap(); + let params = params::InstanceCreate { + identity: IdentityMetadataCreateParams { + name, + description: String::from("probably serving data"), + }, + ncpus, + memory: ByteCount::from_gibibytes_u32(1), + hostname: config.0.to_string(), + user_data: vec![], + network_interfaces: + params::InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], + disks: vec![], + start: false, + }; - expect_instance_creation_fail_unavailable( - client, - &url_instances, - &instance_params, - ) - .await; + let url_instances = get_instances_url(); + expect_instance_creation_ok(client, &url_instances, ¶ms).await; - // If we ask for fewer CPUs, the request should work - instance_params.ncpus = enough_cpus; - expect_instance_creation_ok(client, &url_instances, &instance_params).await; + let instance = instance_get(&client, &get_instance_url(config.0)).await; + instances.push(instance); + } - // Requesting another instance won't have enough space - let name2 = Name::try_from(String::from("test2")).unwrap(); - instance_params.identity.name = name2; - expect_instance_creation_fail_unavailable( - client, - &url_instances, - &instance_params, - ) - .await; + // Only the first properly-sized instance should be able to start. + for config in &configs { + match config.2 { + Ok(_) => expect_instance_start_ok(client, config.0).await, + Err(_) => { + expect_instance_start_fail_unavailable(client, config.0).await + } + } + } - // But if we delete the first instace, we'll have space - let url_instance = get_instance_url(&name1.to_string()); - expect_instance_deletion_ok(client, &url_instance).await; - expect_instance_creation_ok(client, &url_instances, &instance_params).await; + // Make the started instance transition to Running, shut it down, and verify + // that the other reasonably-sized instance can now start. + let nexus = &cptestctx.server.apictx().nexus; + instance_simulate(nexus, &instances[1].identity.id).await; + instances[1] = instance_post(client, configs[1].0, InstanceOp::Stop).await; + instance_simulate(nexus, &instances[1].identity.id).await; + expect_instance_start_ok(client, configs[2].0).await; } #[nexus_test] @@ -3198,57 +3254,62 @@ async fn test_cannot_provision_instance_beyond_ram_capacity( create_project(client, PROJECT_NAME).await; populate_ip_pool(&client, "default", None).await; - let too_much_ram = ByteCount::try_from( - nexus_test_utils::TEST_PHYSICAL_RAM - + u64::from(MIN_MEMORY_BYTES_PER_INSTANCE), - ) - .unwrap(); - let enough_ram = - ByteCount::try_from(nexus_test_utils::TEST_PHYSICAL_RAM).unwrap(); + let configs = vec![ + ( + "too-much-memory", + nexus_test_utils::TEST_RESERVOIR_RAM + + u64::from(MIN_MEMORY_BYTES_PER_INSTANCE), + Err(()), + ), + ("just-right-memory", nexus_test_utils::TEST_RESERVOIR_RAM, Ok(())), + ("insufficient-space", nexus_test_utils::TEST_RESERVOIR_RAM, Err(())), + ]; - // Try to boot an instance that uses more RAM than we have - // on our test sled setup. - let name1 = Name::try_from(String::from("test")).unwrap(); - let mut instance_params = params::InstanceCreate { - identity: IdentityMetadataCreateParams { - name: name1.clone(), - description: String::from("probably serving data"), - }, - ncpus: InstanceCpuCount::try_from(2).unwrap(), - memory: too_much_ram, - hostname: String::from("test"), - user_data: vec![], - network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, - external_ips: vec![], - disks: vec![], - start: false, - }; - let url_instances = get_instances_url(); - expect_instance_creation_fail_unavailable( - client, - &url_instances, - &instance_params, - ) - .await; + // Creating all the instances should succeed, even though there will never + // be enough space to run the too-large instance. + let mut instances = Vec::new(); + for config in &configs { + let name = Name::try_from(config.0.to_string()).unwrap(); + let params = params::InstanceCreate { + identity: IdentityMetadataCreateParams { + name, + description: String::from("probably serving data"), + }, + ncpus: InstanceCpuCount::try_from(i64::from(1)).unwrap(), + memory: ByteCount::try_from(config.1).unwrap(), + hostname: config.0.to_string(), + user_data: vec![], + network_interfaces: + params::InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], + disks: vec![], + start: false, + }; - // If we ask for less RAM, the request should work - instance_params.memory = enough_ram; - expect_instance_creation_ok(client, &url_instances, &instance_params).await; + let url_instances = get_instances_url(); + expect_instance_creation_ok(client, &url_instances, ¶ms).await; - // Requesting another instance won't have enough space - let name2 = Name::try_from(String::from("test2")).unwrap(); - instance_params.identity.name = name2; - expect_instance_creation_fail_unavailable( - client, - &url_instances, - &instance_params, - ) - .await; + let instance = instance_get(&client, &get_instance_url(config.0)).await; + instances.push(instance); + } - // But if we delete the first instace, we'll have space - let url_instance = get_instance_url(&name1.to_string()); - expect_instance_deletion_ok(client, &url_instance).await; - expect_instance_creation_ok(client, &url_instances, &instance_params).await; + // Only the first properly-sized instance should be able to start. + for config in &configs { + match config.2 { + Ok(_) => expect_instance_start_ok(client, config.0).await, + Err(_) => { + expect_instance_start_fail_unavailable(client, config.0).await + } + } + } + + // Make the started instance transition to Running, shut it down, and verify + // that the other reasonably-sized instance can now start. + let nexus = &cptestctx.server.apictx().nexus; + instance_simulate(nexus, &instances[1].identity.id).await; + instances[1] = instance_post(client, configs[1].0, InstanceOp::Stop).await; + instance_simulate(nexus, &instances[1].identity.id).await; + expect_instance_start_ok(client, configs[2].0).await; } #[nexus_test] @@ -3288,17 +3349,8 @@ async fn test_instance_serial(cptestctx: &ControlPlaneTestContext) { format!("not found: instance with name \"{}\"", instance_name).as_str() ); - // Create an instance. + // Create an instance and poke it to ensure it's running. let instance = create_instance(client, PROJECT_NAME, instance_name).await; - - // Now, simulate completion of instance boot and check the state reported. - // NOTE: prior to this instance_simulate call, nexus's stored propolis addr - // is one it allocated in a 'real' sled-agent IP range as part of its usual - // instance-creation saga. after we poke the new run state for the instance - // here, sled-agent-sim will send an entire updated InstanceRuntimeState - // back to nexus, including the localhost address on which the mock - // propolis-server is running, which overwrites this -- after which nexus's - // serial-console related API calls will start working. instance_simulate(nexus, &instance.identity.id).await; let instance_next = instance_get(&client, &instance_url).await; identity_eq(&instance.identity, &instance_next.identity); @@ -3308,6 +3360,29 @@ async fn test_instance_serial(cptestctx: &ControlPlaneTestContext) { > instance.runtime.time_run_state_updated ); + // Starting a simulated instance with a mock Propolis server starts the + // mock, but it serves on localhost instead of the address that was chosen + // by the instance start process. Forcibly update the VMM record to point to + // the correct IP. + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + let (.., db_instance) = LookupPath::new(&opctx, &datastore) + .instance_id(instance.identity.id) + .fetch() + .await + .unwrap(); + let propolis_id = db_instance + .runtime() + .propolis_id + .expect("running instance should have vmm"); + let localhost = std::net::IpAddr::V6(std::net::Ipv6Addr::LOCALHOST); + let updated_vmm = datastore + .vmm_overwrite_ip_for_test(&opctx, &propolis_id, localhost.into()) + .await + .unwrap(); + assert_eq!(updated_vmm.propolis_ip.ip(), localhost); + // Query serial output history endpoint // This is the first line of output generated by the mock propolis-server. let expected = "This is simulated serial console output for ".as_bytes(); @@ -3615,16 +3690,25 @@ async fn test_instance_v2p_mappings(cptestctx: &ControlPlaneTestContext) { let opctx = OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); - let (.., authz_instance, db_instance) = LookupPath::new(&opctx, &datastore) + let (.., authz_instance) = LookupPath::new(&opctx, &datastore) .instance_id(instance.identity.id) - .fetch() + .lookup_for(nexus_db_queries::authz::Action::Read) + .await + .unwrap(); + + let instance_state = datastore + .instance_fetch_with_vmm(&opctx, &authz_instance) .await .unwrap(); + let sled_id = + instance_state.sled_id().expect("running instance should have a sled"); + let guest_nics = datastore .derive_guest_network_interface_info(&opctx, &authz_instance) .await .unwrap(); + assert_eq!(guest_nics.len(), 1); let mut sled_agents: Vec<&Arc> = @@ -3634,7 +3718,7 @@ async fn test_instance_v2p_mappings(cptestctx: &ControlPlaneTestContext) { for sled_agent in &sled_agents { // TODO(#3107) Remove this bifurcation when Nexus programs all mappings // itself. - if sled_agent.id != db_instance.runtime().sled_id { + if sled_agent.id != sled_id { assert_sled_v2p_mappings( sled_agent, &nics[0], @@ -3765,7 +3849,11 @@ async fn assert_sled_v2p_mappings( /// instance, and then tell it to finish simulating whatever async transition is /// going on. pub async fn instance_simulate(nexus: &Arc, id: &Uuid) { - let sa = nexus.instance_sled_by_id(id).await.unwrap(); + let sa = nexus + .instance_sled_by_id(id) + .await + .unwrap() + .expect("instance must be on a sled to simulate a state change"); sa.instance_finish_transition(*id).await; } diff --git a/nexus/tests/integration_tests/ip_pools.rs b/nexus/tests/integration_tests/ip_pools.rs index 27f4b04290..6a633fc5e1 100644 --- a/nexus/tests/integration_tests/ip_pools.rs +++ b/nexus/tests/integration_tests/ip_pools.rs @@ -889,7 +889,11 @@ async fn test_ip_range_delete_with_allocated_external_ip_fails( .expect("Failed to stop instance"); // Simulate the transition, wait until it is in fact stopped. - let sa = nexus.instance_sled_by_id(&instance.identity.id).await.unwrap(); + let sa = nexus + .instance_sled_by_id(&instance.identity.id) + .await + .unwrap() + .expect("running instance should be on a sled"); sa.instance_finish_transition(instance.identity.id).await; // Delete the instance diff --git a/nexus/tests/integration_tests/pantry.rs b/nexus/tests/integration_tests/pantry.rs index c63f57e7fb..26e27e92ee 100644 --- a/nexus/tests/integration_tests/pantry.rs +++ b/nexus/tests/integration_tests/pantry.rs @@ -84,7 +84,11 @@ async fn set_instance_state( } async fn instance_simulate(nexus: &Arc, id: &Uuid) { - let sa = nexus.instance_sled_by_id(id).await.unwrap(); + let sa = nexus + .instance_sled_by_id(id) + .await + .unwrap() + .expect("instance must be on a sled to simulate a state change"); sa.instance_finish_transition(*id).await; } diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index 1d4556e8ed..6d2595b561 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -569,7 +569,13 @@ impl InformationSchema { fn pretty_assert_eq(&self, other: &Self) { // similar_asserts gets us nice diff that only includes the relevant context. // the columns diff especially needs this: it can be 20k lines otherwise + similar_asserts::assert_eq!(self.tables, other.tables); similar_asserts::assert_eq!(self.columns, other.columns); + similar_asserts::assert_eq!(self.views, other.views); + similar_asserts::assert_eq!( + self.table_constraints, + other.table_constraints + ); similar_asserts::assert_eq!( self.check_constraints, other.check_constraints @@ -586,15 +592,9 @@ impl InformationSchema { self.referential_constraints, other.referential_constraints ); - similar_asserts::assert_eq!(self.views, other.views); similar_asserts::assert_eq!(self.statistics, other.statistics); similar_asserts::assert_eq!(self.sequences, other.sequences); similar_asserts::assert_eq!(self.pg_indexes, other.pg_indexes); - similar_asserts::assert_eq!(self.tables, other.tables); - similar_asserts::assert_eq!( - self.table_constraints, - other.table_constraints - ); } async fn new(crdb: &CockroachInstance) -> Self { diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 1ec8c1a5eb..67db222155 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -211,7 +211,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/InstanceRuntimeState" + "$ref": "#/components/schemas/SledInstanceState" } } }, @@ -3537,102 +3537,44 @@ "start_time" ] }, - "InstanceCpuCount": { - "description": "The number of CPUs in an Instance", - "type": "integer", - "format": "uint16", - "minimum": 0 - }, "InstanceRuntimeState": { - "description": "Runtime state of the Instance, including the actual running state and minimal metadata\n\nThis state is owned by the sled agent running that Instance.", + "description": "The dynamic runtime properties of an instance: its current VMM ID (if any), migration information (if any), and the instance state to report if there is no active VMM.", "type": "object", "properties": { "dst_propolis_id": { "nullable": true, - "description": "the target propolis-server during a migration of this Instance", + "description": "If a migration is active, the ID of the target VMM.", "type": "string", "format": "uuid" }, "gen": { - "description": "generation number for this state", + "description": "Generation number for this state.", "allOf": [ { "$ref": "#/components/schemas/Generation" } ] }, - "hostname": { - "description": "RFC1035-compliant hostname for the Instance.", - "type": "string" - }, - "memory": { - "description": "memory allocated for this Instance", - "allOf": [ - { - "$ref": "#/components/schemas/ByteCount" - } - ] - }, "migration_id": { "nullable": true, - "description": "migration id (if one in process)", + "description": "If a migration is active, the ID of that migration.", "type": "string", "format": "uuid" }, - "ncpus": { - "description": "number of CPUs allocated for this Instance", - "allOf": [ - { - "$ref": "#/components/schemas/InstanceCpuCount" - } - ] - }, - "propolis_addr": { - "nullable": true, - "description": "address of propolis-server running this Instance", - "type": "string" - }, - "propolis_gen": { - "description": "The generation number for the Propolis and sled identifiers for this instance.", - "allOf": [ - { - "$ref": "#/components/schemas/Generation" - } - ] - }, "propolis_id": { - "description": "which propolis-server is running this Instance", - "type": "string", - "format": "uuid" - }, - "run_state": { - "description": "runtime state of the Instance", - "allOf": [ - { - "$ref": "#/components/schemas/InstanceState" - } - ] - }, - "sled_id": { - "description": "which sled is running this Instance", + "nullable": true, + "description": "The instance's currently active VMM ID.", "type": "string", "format": "uuid" }, "time_updated": { - "description": "timestamp for this information", + "description": "Timestamp for this information.", "type": "string", "format": "date-time" } }, "required": [ "gen", - "hostname", - "memory", - "ncpus", - "propolis_gen", - "propolis_id", - "run_state", - "sled_id", "time_updated" ] }, @@ -5002,6 +4944,38 @@ "usable_physical_ram" ] }, + "SledInstanceState": { + "description": "A wrapper type containing a sled's total knowledge of the state of a specific VMM and the instance it incarnates.", + "type": "object", + "properties": { + "instance_state": { + "description": "The sled's conception of the state of the instance.", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceRuntimeState" + } + ] + }, + "propolis_id": { + "description": "The ID of the VMM whose state is being reported.", + "type": "string", + "format": "uuid" + }, + "vmm_state": { + "description": "The most recent state of the sled's VMM process.", + "allOf": [ + { + "$ref": "#/components/schemas/VmmRuntimeState" + } + ] + } + }, + "required": [ + "instance_state", + "propolis_id", + "vmm_state" + ] + }, "SledRole": { "description": "Describes the role of the sled within the rack.\n\nNote that this may change if the sled is physically moved within the rack.", "oneOf": [ @@ -5185,6 +5159,38 @@ "minLength": 1, "maxLength": 63 }, + "VmmRuntimeState": { + "description": "The dynamic runtime properties of an individual VMM process.", + "type": "object", + "properties": { + "gen": { + "description": "The generation number for this VMM's state.", + "allOf": [ + { + "$ref": "#/components/schemas/Generation" + } + ] + }, + "state": { + "description": "The last state reported by this VMM.", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceState" + } + ] + }, + "time_updated": { + "description": "Timestamp for the VMM's state.", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "gen", + "state", + "time_updated" + ] + }, "ZpoolPutRequest": { "description": "Sent by a sled agent on startup to Nexus to request further instruction", "type": "object", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 91f027d28c..56437ab283 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -101,7 +101,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/InstanceRuntimeState" + "$ref": "#/components/schemas/SledInstanceState" } } } @@ -231,7 +231,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/InstanceRuntimeState" + "$ref": "#/components/schemas/SledInstanceState" } } } @@ -1644,20 +1644,49 @@ "minimum": 0 }, "InstanceEnsureBody": { - "description": "The body of a request to ensure that an instance is known to a sled agent.", + "description": "The body of a request to ensure that a instance and VMM are known to a sled agent.", "type": "object", "properties": { - "initial": { + "hardware": { "description": "A description of the instance's virtual hardware and the initial runtime state this sled agent should store for this incarnation of the instance.", "allOf": [ { "$ref": "#/components/schemas/InstanceHardware" } ] + }, + "instance_runtime": { + "description": "The instance runtime state for the instance being registered.", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceRuntimeState" + } + ] + }, + "propolis_addr": { + "description": "The address at which this VMM should serve a Propolis server API.", + "type": "string" + }, + "propolis_id": { + "description": "The ID of the VMM being registered. This may not be the active VMM ID in the instance runtime state (e.g. if the new VMM is going to be a migration target).", + "type": "string", + "format": "uuid" + }, + "vmm_runtime": { + "description": "The initial VMM runtime state for the VMM being registered.", + "allOf": [ + { + "$ref": "#/components/schemas/VmmRuntimeState" + } + ] } }, "required": [ - "initial" + "hardware", + "instance_runtime", + "propolis_addr", + "propolis_id", + "vmm_runtime" ] }, "InstanceHardware": { @@ -1694,8 +1723,8 @@ "$ref": "#/components/schemas/NetworkInterface" } }, - "runtime": { - "$ref": "#/components/schemas/InstanceRuntimeState" + "properties": { + "$ref": "#/components/schemas/InstanceProperties" }, "source_nat": { "$ref": "#/components/schemas/SourceNatConfig" @@ -1706,7 +1735,7 @@ "external_ips", "firewall_rules", "nics", - "runtime", + "properties", "source_nat" ] }, @@ -1771,6 +1800,27 @@ "src_propolis_id" ] }, + "InstanceProperties": { + "description": "The \"static\" properties of an instance: information about the instance that doesn't change while the instance is running.", + "type": "object", + "properties": { + "hostname": { + "description": "RFC1035-compliant hostname for the instance.", + "type": "string" + }, + "memory": { + "$ref": "#/components/schemas/ByteCount" + }, + "ncpus": { + "$ref": "#/components/schemas/InstanceCpuCount" + } + }, + "required": [ + "hostname", + "memory", + "ncpus" + ] + }, "InstancePutMigrationIdsBody": { "description": "The body of a request to set or clear the migration identifiers from a sled agent's instance state records.", "type": "object", @@ -1785,7 +1835,7 @@ ] }, "old_runtime": { - "description": "The last runtime state known to this requestor. This request will succeed if either (a) the Propolis generation in the sled agent's runtime state matches the generation in this record, or (b) the sled agent's runtime state matches what would result from applying this request to the caller's runtime state. This latter condition provides idempotency.", + "description": "The last instance runtime state known to this requestor. This request will succeed if either (a) the state generation in the sled agent's runtime state matches the generation in this record, or (b) the sled agent's runtime state matches what would result from applying this request to the caller's runtime state. This latter condition provides idempotency.", "allOf": [ { "$ref": "#/components/schemas/InstanceRuntimeState" @@ -1823,102 +1873,50 @@ "description": "The current runtime state of the instance after handling the request to change its state. If the instance's state did not change, this field is `None`.", "allOf": [ { - "$ref": "#/components/schemas/InstanceRuntimeState" + "$ref": "#/components/schemas/SledInstanceState" } ] } } }, "InstanceRuntimeState": { - "description": "Runtime state of the Instance, including the actual running state and minimal metadata\n\nThis state is owned by the sled agent running that Instance.", + "description": "The dynamic runtime properties of an instance: its current VMM ID (if any), migration information (if any), and the instance state to report if there is no active VMM.", "type": "object", "properties": { "dst_propolis_id": { "nullable": true, - "description": "the target propolis-server during a migration of this Instance", + "description": "If a migration is active, the ID of the target VMM.", "type": "string", "format": "uuid" }, "gen": { - "description": "generation number for this state", + "description": "Generation number for this state.", "allOf": [ { "$ref": "#/components/schemas/Generation" } ] }, - "hostname": { - "description": "RFC1035-compliant hostname for the Instance.", - "type": "string" - }, - "memory": { - "description": "memory allocated for this Instance", - "allOf": [ - { - "$ref": "#/components/schemas/ByteCount" - } - ] - }, "migration_id": { "nullable": true, - "description": "migration id (if one in process)", + "description": "If a migration is active, the ID of that migration.", "type": "string", "format": "uuid" }, - "ncpus": { - "description": "number of CPUs allocated for this Instance", - "allOf": [ - { - "$ref": "#/components/schemas/InstanceCpuCount" - } - ] - }, - "propolis_addr": { - "nullable": true, - "description": "address of propolis-server running this Instance", - "type": "string" - }, - "propolis_gen": { - "description": "The generation number for the Propolis and sled identifiers for this instance.", - "allOf": [ - { - "$ref": "#/components/schemas/Generation" - } - ] - }, "propolis_id": { - "description": "which propolis-server is running this Instance", - "type": "string", - "format": "uuid" - }, - "run_state": { - "description": "runtime state of the Instance", - "allOf": [ - { - "$ref": "#/components/schemas/InstanceState" - } - ] - }, - "sled_id": { - "description": "which sled is running this Instance", + "nullable": true, + "description": "The instance's currently active VMM ID.", "type": "string", "format": "uuid" }, "time_updated": { - "description": "timestamp for this information", + "description": "Timestamp for this information.", "type": "string", "format": "date-time" } }, "required": [ "gen", - "hostname", - "memory", - "ncpus", - "propolis_gen", - "propolis_id", - "run_state", - "sled_id", "time_updated" ] }, @@ -2075,7 +2073,7 @@ "description": "The current state of the instance after handling the request to unregister it. If the instance's state did not change, this field is `None`.", "allOf": [ { - "$ref": "#/components/schemas/InstanceRuntimeState" + "$ref": "#/components/schemas/SledInstanceState" } ] } @@ -2701,6 +2699,38 @@ "vni" ] }, + "SledInstanceState": { + "description": "A wrapper type containing a sled's total knowledge of the state of a specific VMM and the instance it incarnates.", + "type": "object", + "properties": { + "instance_state": { + "description": "The sled's conception of the state of the instance.", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceRuntimeState" + } + ] + }, + "propolis_id": { + "description": "The ID of the VMM whose state is being reported.", + "type": "string", + "format": "uuid" + }, + "vmm_state": { + "description": "The most recent state of the sled's VMM process.", + "allOf": [ + { + "$ref": "#/components/schemas/VmmRuntimeState" + } + ] + } + }, + "required": [ + "instance_state", + "propolis_id", + "vmm_state" + ] + }, "SledRole": { "oneOf": [ { @@ -2834,6 +2864,38 @@ "version" ] }, + "VmmRuntimeState": { + "description": "The dynamic runtime properties of an individual VMM process.", + "type": "object", + "properties": { + "gen": { + "description": "The generation number for this VMM's state.", + "allOf": [ + { + "$ref": "#/components/schemas/Generation" + } + ] + }, + "state": { + "description": "The last state reported by this VMM.", + "allOf": [ + { + "$ref": "#/components/schemas/InstanceState" + } + ] + }, + "time_updated": { + "description": "Timestamp for the VMM's state.", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "gen", + "state", + "time_updated" + ] + }, "Vni": { "description": "A Geneve Virtual Network Identifier", "type": "integer", diff --git a/schema/crdb/6.0.0/README.adoc b/schema/crdb/6.0.0/README.adoc new file mode 100644 index 0000000000..e59c04fe83 --- /dev/null +++ b/schema/crdb/6.0.0/README.adoc @@ -0,0 +1,14 @@ +This upgrade turns VMM processes into first-class objects in the Omicron data +model. Instead of storing an instance's runtime state entirely in the Instance +table, Nexus stores per-VMM state and uses an Instance's active_propolis_id to +determine which VMM (if any) holds the instance's current runtime state. This +makes it much easier for Nexus to reason about the lifecycles of Propolis jobs +and their resource requirements. + +In this scheme: + +* Sled assignments and Propolis server IPs are tracked per-VMM. +* An instance may not have an active VMM at all. In that case its own `state` + column supplies the instance's logical state. +* An instance's two generation numbers (one for the reported instance state and + one for its Propolis IDs) are once again collapsed into a single number. diff --git a/schema/crdb/6.0.0/up01.sql b/schema/crdb/6.0.0/up01.sql new file mode 100644 index 0000000000..b532fc8019 --- /dev/null +++ b/schema/crdb/6.0.0/up01.sql @@ -0,0 +1,6 @@ +/* + * Drop the instance-by-sled index since there will no longer be a sled ID in + * the instance table. + */ + +DROP INDEX IF EXISTS lookup_instance_by_sled; diff --git a/schema/crdb/6.0.0/up02.sql b/schema/crdb/6.0.0/up02.sql new file mode 100644 index 0000000000..51f796f512 --- /dev/null +++ b/schema/crdb/6.0.0/up02.sql @@ -0,0 +1,13 @@ +/* + * The sled_instance view cannot be modified in place because it depends on the + * VMM table. It would be nice to define the VMM table and then alter the + * sled_instance table, but there's no way to express this correctly in the + * clean-slate DB initialization SQL (dbinit.sql) because it requires inserting + * a table into the middle of an existing sequence of table definitions. (See + * the README for more on why this causes problems.) Instead, delete the + * `sled_instance` view, then add the VMM table, then add the view back and + * leave it to `dbinit.sql` to re-create the resulting object ordering when + * creating a database from a clean slate. + */ + +DROP VIEW IF EXISTS omicron.public.sled_instance; diff --git a/schema/crdb/6.0.0/up03.sql b/schema/crdb/6.0.0/up03.sql new file mode 100644 index 0000000000..698a5f6f2d --- /dev/null +++ b/schema/crdb/6.0.0/up03.sql @@ -0,0 +1,11 @@ +CREATE TABLE IF NOT EXISTS omicron.public.vmm ( + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ, + instance_id UUID NOT NULL, + state omicron.public.instance_state NOT NULL, + time_state_updated TIMESTAMPTZ NOT NULL, + state_generation INT NOT NULL, + sled_id UUID NOT NULL, + propolis_ip INET NOT NULL +); diff --git a/schema/crdb/6.0.0/up04.sql b/schema/crdb/6.0.0/up04.sql new file mode 100644 index 0000000000..b1a96ece52 --- /dev/null +++ b/schema/crdb/6.0.0/up04.sql @@ -0,0 +1,23 @@ +CREATE OR REPLACE VIEW omicron.public.sled_instance +AS SELECT + instance.id, + instance.name, + silo.name as silo_name, + project.name as project_name, + vmm.sled_id as active_sled_id, + instance.time_created, + instance.time_modified, + instance.migration_id, + instance.ncpus, + instance.memory, + vmm.state +FROM + omicron.public.instance AS instance + JOIN omicron.public.project AS project ON + instance.project_id = project.id + JOIN omicron.public.silo AS silo ON + project.silo_id = silo.id + JOIN omicron.public.vmm AS vmm ON + instance.active_propolis_id = vmm.id +WHERE + instance.time_deleted IS NULL AND vmm.time_deleted IS NULL; diff --git a/schema/crdb/6.0.0/up05.sql b/schema/crdb/6.0.0/up05.sql new file mode 100644 index 0000000000..034d2f75e8 --- /dev/null +++ b/schema/crdb/6.0.0/up05.sql @@ -0,0 +1,8 @@ +/* + * Now that the sled_instance view is up-to-date, begin to drop columns from the + * instance table that are no longer needed. This needs to be done after + * altering the sled_instance view because it's illegal to drop columns that a + * view depends on. + */ + +ALTER TABLE omicron.public.instance DROP COLUMN IF EXISTS active_sled_id; diff --git a/schema/crdb/6.0.0/up06.sql b/schema/crdb/6.0.0/up06.sql new file mode 100644 index 0000000000..42f73d82b8 --- /dev/null +++ b/schema/crdb/6.0.0/up06.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.instance DROP COLUMN IF EXISTS active_propolis_ip; diff --git a/schema/crdb/6.0.0/up07.sql b/schema/crdb/6.0.0/up07.sql new file mode 100644 index 0000000000..d8bc3cae13 --- /dev/null +++ b/schema/crdb/6.0.0/up07.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.instance DROP COLUMN IF EXISTS propolis_generation; diff --git a/schema/crdb/6.0.0/up08.sql b/schema/crdb/6.0.0/up08.sql new file mode 100644 index 0000000000..776b794a44 --- /dev/null +++ b/schema/crdb/6.0.0/up08.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.instance ALTER COLUMN active_propolis_id DROP NOT NULL; diff --git a/schema/crdb/6.0.0/up09.sql b/schema/crdb/6.0.0/up09.sql new file mode 100644 index 0000000000..1d435cec6c --- /dev/null +++ b/schema/crdb/6.0.0/up09.sql @@ -0,0 +1,10 @@ +/* + * Because this is an offline update, the system comes back up with no active + * VMMs. Ensure all active Propolis IDs are cleared. This guileless approach + * gets planned as a full table scan, so explicitly (but temporarily) allow + * those. + */ + +set disallow_full_table_scans = off; +UPDATE omicron.public.instance SET active_propolis_id = NULL; +set disallow_full_table_scans = on; diff --git a/schema/crdb/README.adoc b/schema/crdb/README.adoc index ef96571d00..c15b51e374 100644 --- a/schema/crdb/README.adoc +++ b/schema/crdb/README.adoc @@ -62,7 +62,7 @@ Process: after your update is applied. Don't forget to update the version field of `db_metadata` at the bottom of the file! ** If necessary, do the same thing for `schema/crdb/dbwipe.sql`. -* Update Nexus's idea of the latest schema, by updating it's `SCHEMA_VERSION` to +* Update Nexus's idea of the latest schema, by updating its `SCHEMA_VERSION` to `NEW_VERSION` within `nexus/db-model/src/schema.rs`. SQL Validation, via Automated Tests: @@ -70,3 +70,65 @@ SQL Validation, via Automated Tests: * The `SCHEMA_VERSION` matches the version used in `dbinit.sql` * The combination of all `up.sql` files results in the same schema as `dbinit.sql` * All `up.sql` files can be applied twice without error + +==== Handling common schema changes + +CockroachDB's schema includes a description of all of the database's CHECK +constraints. If a CHECK constraint is anonymous (i.e. it is written simply as +`CHECK ` and not `CONSTRAINT CHECK expression`), CRDB +assigns it a name based on the table and column to which the constraint applies. +The challenge is that CRDB identifies tables and columns using opaque +identifiers whose values depend on the order in which tables and views were +defined in the current database. This means that adding, removing, or renaming +objects needs to be done carefully to preserve the relative ordering of objects +in new databases created by `dbinit.sql` and upgraded databases created by +applying `up.sql` transformations. + +===== Adding new columns with constraints + +Strongly consider naming new constraints (`CONSTRAINT `) to +avoid the problems with anonymous constraints described above. + +===== Adding tables and views + +New tables and views must be added to the end of `dbinit.sql` so that the order +of preceding `CREATE` statements is left unchanged. If your changes fail the +`CHECK` constraints test and you get a constraint name diff like this... + +``` +NamedSqlValue { + column: "constraint_name", + value: Some( + String( +< "4101115737_149_10_not_null", +> "4101115737_148_10_not_null", +``` + +...then you've probably inadvertently added a table or view in the wrong place. + +==== Adding new source tables to an existing view + +An upgrade can add a new table and then use a `CREATE OR REPLACE VIEW` statement +to make an existing view depend on that table. To do this in `dbinit.sql` while +maintaining table and view ordering, use `CREATE VIEW` to create a "placeholder" +view in the correct position, then add the table to the bottom of `dbinit.sql` +and use `CREATE OR REPLACE VIEW` to "fill out" the placeholder definition to +refer to the new table. (You may need to do the `CREATE OR REPLACE VIEW` in a +separate transaction from the original `CREATE VIEW`.) + +Note that `CREATE OR REPLACE VIEW` requires that the new view maintain all of +the columns of the old view with the same type and same order (though the query +used to populate them can change. See +https://www.postgresql.org/docs/15/sql-createview.html. + +==== Renaming columns + +Idempotently renaming existing columns is unfortunately not possible in our +current database configuration. (Postgres doesn't support the use of an `IF +EXISTS` qualifier on an `ALTER TABLE RENAME COLUMN` statement, and the version +of CockroachDB we use at this writing doesn't support the use of user-defined +functions as a workaround.) + +An (imperfect) workaround is to use the `#[diesel(column_name = foo)]` attribute +in Rust code to preserve the existing name of a column in the database while +giving its corresponding struct field a different, more meaningful name. diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index a62cbae5ea..2b06e4cbd6 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -834,44 +834,20 @@ CREATE TABLE IF NOT EXISTS omicron.public.instance ( /* user data for instance initialization systems (e.g. cloud-init) */ user_data BYTES NOT NULL, - /* - * TODO Would it make sense for the runtime state to live in a separate - * table? - */ - /* Runtime state */ + /* The state of the instance when it has no active VMM. */ state omicron.public.instance_state NOT NULL, time_state_updated TIMESTAMPTZ NOT NULL, state_generation INT NOT NULL, - /* - * Sled where the VM is currently running, if any. Note that when we - * support live migration, there may be multiple sleds associated with - * this Instance, but only one will be truly active. Still, consumers of - * this information should consider whether they also want to know the other - * sleds involved in the migration. - */ - active_sled_id UUID, - /* Identifies the underlying propolis-server backing the instance. */ - active_propolis_id UUID NOT NULL, - active_propolis_ip INET, + /* FK into `vmm` for the Propolis server that's backing this instance. */ + active_propolis_id UUID, - /* Identifies the target propolis-server during a migration of the instance. */ + /* FK into `vmm` for the migration target Propolis server, if one exists. */ target_propolis_id UUID, - /* - * Identifies an ongoing migration for this instance. - */ + /* Identifies any ongoing migration for this instance. */ migration_id UUID, - /* - * A generation number protecting information about the "location" of a - * running instance: its active server ID, Propolis ID and IP, and migration - * information. This is used for mutual exclusion (to allow only one - * migration to proceed at a time) and to coordinate state changes when a - * migration finishes. - */ - propolis_generation INT NOT NULL, - /* Instance configuration */ ncpus INT NOT NULL, memory INT NOT NULL, @@ -886,42 +862,23 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_instance_by_project ON omicron.public.i ) WHERE time_deleted IS NULL; --- Allow looking up instances by server. This is particularly --- useful for resource accounting within a sled. -CREATE UNIQUE INDEX IF NOT EXISTS lookup_instance_by_sled ON omicron.public.instance ( - active_sled_id, - id -) WHERE - time_deleted IS NULL; - /* * A special view of an instance provided to operators for insights into what's running * on a sled. + * + * This view requires the VMM table, which doesn't exist yet, so create a + * "placeholder" view here and replace it with the full view once the table is + * defined. See the README for more context. */ -CREATE VIEW IF NOT EXISTS omicron.public.sled_instance +CREATE VIEW IF NOT EXISTS omicron.public.sled_instance AS SELECT - instance.id, - instance.name, - silo.name as silo_name, - project.name as project_name, - instance.active_sled_id, - instance.time_created, - instance.time_modified, - instance.migration_id, - instance.ncpus, - instance.memory, - instance.state + instance.id FROM omicron.public.instance AS instance - JOIN omicron.public.project AS project ON - instance.project_id = project.id - JOIN omicron.public.silo AS silo ON - project.silo_id = silo.id WHERE instance.time_deleted IS NULL; - /* * Guest-Visible, Virtual Disks */ @@ -2543,8 +2500,13 @@ CREATE TABLE IF NOT EXISTS omicron.public.switch_port_settings_address_config ( PRIMARY KEY (port_settings_id, address, interface_name) ); +/* + * The `sled_instance` view's definition needs to be modified in a separate + * transaction from the transaction that created it. + */ -/*******************************************************************/ +COMMIT; +BEGIN; /* * Metadata for the schema itself. This version number isn't great, as there's @@ -2580,4 +2542,52 @@ INSERT INTO omicron.public.db_metadata ( ( TRUE, NOW(), NOW(), '6.0.0', NULL) ON CONFLICT DO NOTHING; + + +-- Per-VMM state. +CREATE TABLE IF NOT EXISTS omicron.public.vmm ( + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ, + instance_id UUID NOT NULL, + state omicron.public.instance_state NOT NULL, + time_state_updated TIMESTAMPTZ NOT NULL, + state_generation INT NOT NULL, + sled_id UUID NOT NULL, + propolis_ip INET NOT NULL +); + +/* + * A special view of an instance provided to operators for insights into what's + * running on a sled. + * + * This view replaces the placeholder `sled_instance` view defined above. Any + * columns in the placeholder must appear in the replacement in the same order + * and with the same types they had in the placeholder. + */ + +CREATE OR REPLACE VIEW omicron.public.sled_instance +AS SELECT + instance.id, + instance.name, + silo.name as silo_name, + project.name as project_name, + vmm.sled_id as active_sled_id, + instance.time_created, + instance.time_modified, + instance.migration_id, + instance.ncpus, + instance.memory, + vmm.state +FROM + omicron.public.instance AS instance + JOIN omicron.public.project AS project ON + instance.project_id = project.id + JOIN omicron.public.silo AS silo ON + project.silo_id = silo.id + JOIN omicron.public.vmm AS vmm ON + instance.active_propolis_id = vmm.id +WHERE + instance.time_deleted IS NULL AND vmm.time_deleted IS NULL; + COMMIT; diff --git a/sled-agent/src/common/instance.rs b/sled-agent/src/common/instance.rs index 0f7b91e56b..9e285840e0 100644 --- a/sled-agent/src/common/instance.rs +++ b/sled-agent/src/common/instance.rs @@ -5,12 +5,65 @@ //! Describes the states of VM instances. use crate::params::InstanceMigrationSourceParams; -use chrono::Utc; +use chrono::{DateTime, Utc}; use omicron_common::api::external::InstanceState as ApiInstanceState; -use omicron_common::api::internal::nexus::InstanceRuntimeState; +use omicron_common::api::internal::nexus::{ + InstanceRuntimeState, SledInstanceState, VmmRuntimeState, +}; use propolis_client::api::{ - InstanceState as PropolisInstanceState, InstanceStateMonitorResponse, + InstanceState as PropolisApiState, InstanceStateMonitorResponse, }; +use uuid::Uuid; + +/// The instance and VMM state that sled agent maintains on a per-VMM basis. +#[derive(Clone, Debug)] +pub struct InstanceStates { + instance: InstanceRuntimeState, + vmm: VmmRuntimeState, + propolis_id: Uuid, +} + +/// Newtype to allow conversion from Propolis API states (returned by the +/// Propolis state monitor) to Nexus VMM states. +#[derive(Clone, Copy, Debug)] +pub(crate) struct PropolisInstanceState(PropolisApiState); + +impl From for PropolisInstanceState { + fn from(value: PropolisApiState) -> Self { + Self(value) + } +} + +impl From for ApiInstanceState { + fn from(value: PropolisInstanceState) -> Self { + use propolis_client::api::InstanceState as State; + match value.0 { + // Nexus uses the VMM state as the externally-visible instance state + // when an instance has an active VMM. A Propolis that is "creating" + // its virtual machine objects is "starting" from the external API's + // perspective. + State::Creating | State::Starting => ApiInstanceState::Starting, + State::Running => ApiInstanceState::Running, + State::Stopping => ApiInstanceState::Stopping, + // A Propolis that is stopped but not yet destroyed should still + // appear to be Stopping from an external API perspective, since + // they cannot be restarted yet. Instances become logically Stopped + // once Propolis reports that the VM is Destroyed (see below). + State::Stopped => ApiInstanceState::Stopping, + State::Rebooting => ApiInstanceState::Rebooting, + State::Migrating => ApiInstanceState::Migrating, + State::Repairing => ApiInstanceState::Repairing, + State::Failed => ApiInstanceState::Failed, + // Nexus needs to learn when a VM has entered the "destroyed" state + // so that it can release its resource reservation. When this + // happens, this module also clears the active VMM ID from the + // instance record, which will accordingly set the Nexus-owned + // instance state to Stopped, preventing this state from being used + // as an externally-visible instance state. + State::Destroyed => ApiInstanceState::Destroyed, + } + } +} /// Describes the status of the migration identified in an instance's runtime /// state as it relates to any migration status information reported by the @@ -21,30 +74,12 @@ pub enum ObservedMigrationStatus { /// progress. NoMigration, - /// Propolis thinks a migration is in progress, but its migration ID does - /// not agree with the instance's current runtime state: either the current - /// runtime state has no ID, or Propolis has an older ID than sled agent - /// does because a newer migration has begun (see below). - /// - /// This is expected in the following scenarios: - /// - /// - Propolis was initialized via migration in, after which Nexus cleared - /// the instance's migration IDs. - /// - Propolis was initialized via migration in, and the instance is about - /// to migrate again. Propolis will have the old ID (from the migration - /// in) while the instance runtime state has the new ID (from the pending - /// migration out). - MismatchedId, - - /// Either: - /// - /// - The instance's runtime state contains a migration ID, but Propolis did - /// not report any migration was in progress, or - /// - Propolis reported that the active migration is not done yet. - /// - /// The first case occurs when the current instance is queued to be a - /// migration source, but its Propolis changed state before any migration - /// request reached that Propolis. + /// The instance has a migration ID, but Propolis either has no migration ID + /// or a different ID from this one (possible if the Propolis was + /// initialized via migration in). + Pending, + + /// Propolis reported that migration isn't done yet. InProgress, /// Propolis reported that the migration completed successfully. @@ -56,17 +91,16 @@ pub enum ObservedMigrationStatus { /// The information observed by the instance's Propolis state monitor. #[derive(Clone, Copy, Debug)] -pub struct ObservedPropolisState { +pub(crate) struct ObservedPropolisState { /// The state reported by Propolis's instance state monitor API. - /// - /// Note that this API allows transitions to be missed (if multiple - /// transitions occur between calls to the monitor, only the most recent - /// state is reported). - pub instance_state: PropolisInstanceState, + pub vmm_state: PropolisInstanceState, /// Information about whether the state observer queried migration status at /// all and, if so, what response it got from Propolis. pub migration_status: ObservedMigrationStatus, + + /// The approximate time at which this observation was made. + pub time: DateTime, } impl ObservedPropolisState { @@ -74,11 +108,11 @@ impl ObservedPropolisState { /// runtime state and an instance state monitor response received from /// Propolis. pub fn new( - runtime_state: &InstanceRuntimeState, + instance_runtime: &InstanceRuntimeState, propolis_state: &InstanceStateMonitorResponse, ) -> Self { let migration_status = - match (runtime_state.migration_id, &propolis_state.migration) { + match (instance_runtime.migration_id, &propolis_state.migration) { // If the runtime state and Propolis state agree that there's // a migration in progress, and they agree on its ID, the // Propolis migration state determines the migration status. @@ -97,22 +131,33 @@ impl ObservedPropolisState { } } - // If the migration IDs don't match, or Propolis thinks a - // migration is in progress but the instance's runtime state - // does not, report the mismatch. - (_, Some(_)) => ObservedMigrationStatus::MismatchedId, + // If both sides have a migration ID, but the IDs don't match, + // assume the instance's migration ID is newer. This can happen + // if Propolis was initialized via migration in and has not yet + // been told to migrate out. + (Some(_), Some(_)) => ObservedMigrationStatus::Pending, + + // If only Propolis has a migration ID, assume it was from a + // prior migration in and report that no migration is in + // progress. This could be improved with propolis#508. + (None, Some(_)) => ObservedMigrationStatus::NoMigration, // A migration source's migration IDs get set before its // Propolis actually gets asked to migrate, so it's possible for // the runtime state to contain an ID while the Propolis has // none, in which case the migration is pending. - (Some(_), None) => ObservedMigrationStatus::InProgress, + (Some(_), None) => ObservedMigrationStatus::Pending, // If neither side has a migration ID, then there's clearly no // migration. (None, None) => ObservedMigrationStatus::NoMigration, }; - Self { instance_state: propolis_state.state, migration_status } + + Self { + vmm_state: PropolisInstanceState(propolis_state.state), + migration_status, + time: Utc::now(), + } } } @@ -120,218 +165,261 @@ impl ObservedPropolisState { /// a subset of the instance states Nexus knows about: the Creating and /// Destroyed states are reserved for Nexus to use for instances that are being /// created for the very first time or have been explicitly deleted. -pub enum PublishedInstanceState { - Starting, - Running, +pub enum PublishedVmmState { Stopping, - Stopped, Rebooting, - Migrating, - Repairing, - Failed, } -impl From for PublishedInstanceState { - fn from(value: PropolisInstanceState) -> Self { +impl From for ApiInstanceState { + fn from(value: PublishedVmmState) -> Self { match value { - // From an external perspective, the instance has already been - // created. Creating the propolis instance is an internal detail and - // happens every time we start the instance, so we map it to - // "Starting" here. - PropolisInstanceState::Creating - | PropolisInstanceState::Starting => { - PublishedInstanceState::Starting - } - PropolisInstanceState::Running => PublishedInstanceState::Running, - PropolisInstanceState::Stopping => PublishedInstanceState::Stopping, - PropolisInstanceState::Stopped => PublishedInstanceState::Stopped, - PropolisInstanceState::Rebooting => { - PublishedInstanceState::Rebooting - } - PropolisInstanceState::Migrating => { - PublishedInstanceState::Migrating - } - PropolisInstanceState::Repairing => { - PublishedInstanceState::Repairing - } - PropolisInstanceState::Failed => PublishedInstanceState::Failed, - // NOTE: This is a bit of an odd one - we intentionally do *not* - // translate the "destroyed" propolis state to the destroyed instance - // API state. - // - // When a propolis instance reports that it has been destroyed, - // this does not necessarily mean the customer-visible instance - // should be torn down. Instead, it implies that the Propolis service - // should be stopped, but the VM could be allocated to a different - // machine. - PropolisInstanceState::Destroyed => PublishedInstanceState::Stopped, + PublishedVmmState::Stopping => ApiInstanceState::Stopping, + PublishedVmmState::Rebooting => ApiInstanceState::Rebooting, } } } -impl From for ApiInstanceState { - fn from(value: PublishedInstanceState) -> Self { - match value { - PublishedInstanceState::Starting => ApiInstanceState::Starting, - PublishedInstanceState::Running => ApiInstanceState::Running, - PublishedInstanceState::Stopping => ApiInstanceState::Stopping, - PublishedInstanceState::Stopped => ApiInstanceState::Stopped, - PublishedInstanceState::Rebooting => ApiInstanceState::Rebooting, - PublishedInstanceState::Migrating => ApiInstanceState::Migrating, - PublishedInstanceState::Repairing => ApiInstanceState::Repairing, - PublishedInstanceState::Failed => ApiInstanceState::Failed, - } - } +/// The possible roles a VMM can have vis-a-vis an instance. +#[derive(Clone, Copy, Debug, PartialEq)] +enum PropolisRole { + /// The VMM is its instance's current active VMM. + Active, + + /// The VMM is its instance's migration target VMM. + MigrationTarget, + + /// The instance does not refer to this VMM (but it may have done so in the + /// past). + Retired, } /// Action to be taken on behalf of state transition. -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Copy, Debug, PartialEq)] pub enum Action { - /// Update the VM state to cause it to run. - Run, - /// Update the VM state to cause it to stop. - Stop, - /// Invoke a reboot of the VM. - Reboot, /// Terminate the VM and associated service. Destroy, } -/// A wrapper around an instance's current state, represented as a Nexus -/// `InstanceRuntimeState`. The externally-visible instance state in this -/// structure is mostly changed when the instance's Propolis state changes. -#[derive(Clone, Debug)] -pub struct InstanceStates { - current: InstanceRuntimeState, -} - impl InstanceStates { - pub fn new(current: InstanceRuntimeState) -> Self { - InstanceStates { current } + pub fn new( + instance: InstanceRuntimeState, + vmm: VmmRuntimeState, + propolis_id: Uuid, + ) -> Self { + InstanceStates { instance, vmm, propolis_id } + } + + pub fn instance(&self) -> &InstanceRuntimeState { + &self.instance + } + + pub fn vmm(&self) -> &VmmRuntimeState { + &self.vmm } - /// Returns the current instance state. - pub fn current(&self) -> &InstanceRuntimeState { - &self.current + pub fn propolis_id(&self) -> Uuid { + self.propolis_id } - /// Returns the current instance state. - pub fn current_mut(&mut self) -> &mut InstanceRuntimeState { - &mut self.current + /// Creates a `SledInstanceState` structure containing the entirety of this + /// structure's runtime state. This requires cloning; for simple read access + /// use the `instance` or `vmm` accessors instead. + pub fn sled_instance_state(&self) -> SledInstanceState { + SledInstanceState { + instance_state: self.instance.clone(), + vmm_state: self.vmm.clone(), + propolis_id: self.propolis_id, + } } /// Update the known state of an instance based on an observed state from /// Propolis. - pub fn apply_propolis_observation( + pub(crate) fn apply_propolis_observation( &mut self, observed: &ObservedPropolisState, ) -> Option { - // The state after this transition will be published to Nexus, so some - // care is required around migration to ensure that Nexus's instance - // state remains consistent even in the face of racing updates from a - // migration source and a migration target. The possibilities are as - // follows: - // - // 1. The current migration succeeded. - // 1a. Migration source: ideally this case would pass control to the - // target explicitly, but there currently isn't enough information - // to do that (the source doesn't know the target's sled ID or - // Propolis IP), so just let the target deal with updating - // everything. - // - // This is the one case in which this routine explicitly *should - // not* transition the current state (to avoid having a "stopped" - // state reach Nexus before the target takes control of the state - // machine). - // 1b. Migration target: Signal that migration is done by bumping the - // Propolis generation number and clearing the migration ID and - // destination Propolis ID from the instance record. - // 2. The current migration failed. - // 2a. Migration source: The source is running now. Clear the - // migration IDs, bump the Propolis generation number, and publish - // the updated state, ending the migration. - // 2b. Migration target: The target has failed and control of the - // instance remains with the source. Don't update the Propolis - // generation number. Updating state is OK here because migration - // targets can't update Nexus instance states without changing the - // Propolis generation. - // 3. No migration is ongoing, or the migration ID in the instance - // record doesn't line up with the putatively ongoing migration. Just - // update state normally in this case; whichever sled has the current - // Propolis generation will have its update applied. - // - // There is an additional exceptional case here: when an instance stops, - // its migration IDs should be cleared so that it can migrate when it is - // started again. If the VM is in a terminal state, and this is *not* - // case 1a above (i.e. the Propolis is stopping because the instance - // migrated out), clear any leftover migration IDs. - // - // TODO(#2315): Terminal-state cleanup should also clear an instance's - // sled assignment and Propolis ID, but that requires Nexus work to - // repopulate these when the instance starts again. - let action = if matches!( - observed.instance_state, - PropolisInstanceState::Destroyed | PropolisInstanceState::Failed - ) { - Some(Action::Destroy) - } else { - None - }; + let vmm_gone = matches!( + observed.vmm_state.0, + PropolisApiState::Destroyed | PropolisApiState::Failed + ); + + // Apply this observation to the VMM record. It is safe to apply the + // Destroyed state directly here because this routine ensures that if + // this VMM is active, it will be retired and an appropriate + // non-Destroyed state applied to the instance itself. + self.vmm.state = observed.vmm_state.into(); + self.vmm.gen = self.vmm.gen.next(); + self.vmm.time_updated = observed.time; - let next_state = PublishedInstanceState::from(observed.instance_state); + // Update the instance record to reflect the result of any completed + // migration. match observed.migration_status { - // Case 3: Update normally if there is no migration in progress or - // the current migration is unrecognized or in flight. - ObservedMigrationStatus::NoMigration - | ObservedMigrationStatus::MismatchedId - | ObservedMigrationStatus::InProgress => { - self.transition(next_state); - } + ObservedMigrationStatus::Succeeded => match self.propolis_role() { + // This is a successful migration out. Point the instance to the + // target VMM, but don't clear migration IDs; let the target do + // that so that the instance will continue to appear to be + // migrating until it is safe to migrate again. + PropolisRole::Active => { + self.switch_propolis_id_to_target(observed.time); + + assert_eq!(self.propolis_role(), PropolisRole::Retired); + } - // Case 1: Migration succeeded. Only update the instance record if - // this is a migration target. - // - // Calling `is_migration_target` is safe here because the instance - // must have had a migration ID in its record to have inferred that - // an ongoing migration succeeded. - ObservedMigrationStatus::Succeeded => { - if self.is_migration_target() { - self.transition(next_state); - self.clear_migration_ids(); - } else { - // Case 1a: Short-circuit without touching the instance - // record. - return action; + // This is a successful migration in. Point the instance to the + // target VMM and clear migration IDs so that another migration + // in can begin. Propolis will continue reporting that this + // migration was successful, but because its ID has been + // discarded the observed migration status will change from + // Succeeded to NoMigration. + // + // Note that these calls increment the instance's generation + // number twice. This is by design and allows the target's + // migration-ID-clearing update to overtake the source's update. + PropolisRole::MigrationTarget => { + self.switch_propolis_id_to_target(observed.time); + self.clear_migration_ids(observed.time); + + assert_eq!(self.propolis_role(), PropolisRole::Active); } - } - // Case 2: Migration failed. Only update the instance record if this - // is a migration source. (Updating the target record is allowed, - // but still has to short-circuit so that the call to - // `clear_migration_ids` below is not reached.) - ObservedMigrationStatus::Failed => { - if self.is_migration_target() { - return action; - } else { - self.transition(next_state); - self.clear_migration_ids(); + // This is a migration source that previously reported success + // and removed itself from the active Propolis position. Don't + // touch the instance. + PropolisRole::Retired => {} + }, + ObservedMigrationStatus::Failed => match self.propolis_role() { + // This is a failed migration out. CLear migration IDs so that + // Nexus can try again. + PropolisRole::Active => { + self.clear_migration_ids(observed.time); } + + // This is a failed migration in. Leave the migration IDs alone + // so that the migration won't appear to have concluded until + // the source is ready to start a new one. + PropolisRole::MigrationTarget => {} + + // This VMM was part of a failed migration and was subsequently + // removed from the instance record entirely. There's nothing to + // update. + PropolisRole::Retired => {} + }, + ObservedMigrationStatus::NoMigration + | ObservedMigrationStatus::InProgress + | ObservedMigrationStatus::Pending => {} + } + + // If this Propolis has exited, tear down its zone. If it was in the + // active position, immediately retire any migration that might have + // been pending and clear the active Propolis ID so that the instance + // can start somewhere else. + // + // N.B. It is important to refetch the current Propolis role here, + // because it might have changed in the course of dealing with a + // completed migration. (In particular, if this VMM is gone because + // it was the source of a successful migration out, control has + // been transferred to the target, and what was once an active VMM + // is now retired.) + if vmm_gone { + if self.propolis_role() == PropolisRole::Active { + self.clear_migration_ids(observed.time); + self.retire_active_propolis(observed.time); + } + Some(Action::Destroy) + } else { + None + } + } + + /// Yields the role that this structure's VMM has given the structure's + /// current instance state. + fn propolis_role(&self) -> PropolisRole { + if let Some(active_id) = self.instance.propolis_id { + if active_id == self.propolis_id { + return PropolisRole::Active; } } - if matches!(action, Some(Action::Destroy)) { - self.clear_migration_ids(); + if let Some(dst_id) = self.instance.dst_propolis_id { + if dst_id == self.propolis_id { + return PropolisRole::MigrationTarget; + } } - action + PropolisRole::Retired } - // Transitions to a new InstanceState value, updating the timestamp and - // generation number. - pub(crate) fn transition(&mut self, next: PublishedInstanceState) { - self.current.run_state = next.into(); - self.current.gen = self.current.gen.next(); - self.current.time_updated = Utc::now(); + /// Sets the no-VMM fallback state of the current instance to reflect the + /// state of its terminated VMM and clears the instance's current Propolis + /// ID. Note that this routine does not touch any migration IDs. + /// + /// This should only be called by the state block for an active VMM and only + /// when that VMM is in a terminal state (Destroyed or Failed). + fn retire_active_propolis(&mut self, now: DateTime) { + assert!(self.propolis_role() == PropolisRole::Active); + + self.instance.propolis_id = None; + self.instance.gen = self.instance.gen.next(); + self.instance.time_updated = now; + } + + /// Moves the instance's destination Propolis ID into the current active + /// position and updates the generation number, but does not clear the + /// destination ID or the active migration ID. This promotes a migration + /// target VMM into the active position without actually allowing a new + /// migration to begin. + /// + /// This routine should only be called when + /// `instance.dst_propolis_id.is_some()`. + fn switch_propolis_id_to_target(&mut self, now: DateTime) { + assert!(self.instance.dst_propolis_id.is_some()); + + self.instance.propolis_id = self.instance.dst_propolis_id; + self.instance.gen = self.instance.gen.next(); + self.instance.time_updated = now; + } + + /// Forcibly transitions this instance's VMM into the specified `next` + /// state and updates its generation number. + pub(crate) fn transition_vmm( + &mut self, + next: PublishedVmmState, + now: DateTime, + ) { + self.vmm.state = next.into(); + self.vmm.gen = self.vmm.gen.next(); + self.vmm.time_updated = now; + } + + /// Updates the state of this instance in response to a rude termination of + /// its Propolis zone, marking the VMM as destroyed and applying any + /// consequent state updates. + /// + /// # Synchronization + /// + /// A caller who is rudely terminating a Propolis zone must hold locks + /// sufficient to ensure that no other Propolis observations arrive in the + /// transaction that terminates the zone and then calls this function. + /// + /// TODO(#4004): This routine works by synthesizing a Propolis state change + /// that says "this Propolis is destroyed and its active migration failed." + /// If this conflicts with the actual Propolis state--e.g., if the + /// underlying Propolis was destroyed but migration *succeeded*--the + /// instance's state in Nexus may become inconsistent. This routine should + /// therefore only be invoked by callers who know that an instance is not + /// migrating. + pub(crate) fn terminate_rudely(&mut self) { + let fake_observed = ObservedPropolisState { + vmm_state: PropolisInstanceState(PropolisApiState::Destroyed), + migration_status: if self.instance.migration_id.is_some() { + ObservedMigrationStatus::Failed + } else { + ObservedMigrationStatus::NoMigration + }, + time: Utc::now(), + }; + + self.apply_propolis_observation(&fake_observed); } /// Sets or clears this instance's migration IDs and advances its Propolis @@ -339,24 +427,27 @@ impl InstanceStates { pub(crate) fn set_migration_ids( &mut self, ids: &Option, + now: DateTime, ) { if let Some(ids) = ids { - self.current.migration_id = Some(ids.migration_id); - self.current.dst_propolis_id = Some(ids.dst_propolis_id); + self.instance.migration_id = Some(ids.migration_id); + self.instance.dst_propolis_id = Some(ids.dst_propolis_id); } else { - self.current.migration_id = None; - self.current.dst_propolis_id = None; + self.instance.migration_id = None; + self.instance.dst_propolis_id = None; } - self.current.propolis_gen = self.current.propolis_gen.next(); + self.instance.gen = self.instance.gen.next(); + self.instance.time_updated = now; } /// Unconditionally clears the instance's migration IDs and advances its /// Propolis generation. Not public; used internally to conclude migrations. - fn clear_migration_ids(&mut self) { - self.current.migration_id = None; - self.current.dst_propolis_id = None; - self.current.propolis_gen = self.current.propolis_gen.next(); + fn clear_migration_ids(&mut self, now: DateTime) { + self.instance.migration_id = None; + self.instance.dst_propolis_id = None; + self.instance.gen = self.instance.gen.next(); + self.instance.time_updated = now; } /// Returns true if the migration IDs in this instance are already set as they @@ -384,15 +475,15 @@ impl InstanceStates { // A simple less-than check allows the migration to sled 3 to proceed // even though the most-recently-expressed intent to migrate put the // instance on sled 1. - if old_runtime.propolis_gen.next() != self.current.propolis_gen { + if old_runtime.gen.next() != self.instance.gen { return false; } - match (self.current.migration_id, migration_ids) { + match (self.instance.migration_id, migration_ids) { // If the migration ID is already set, and this is a request to set // IDs, the records match if the relevant IDs match. (Some(current_migration_id), Some(ids)) => { - let current_dst_id = self.current.dst_propolis_id.expect( + let current_dst_id = self.instance.dst_propolis_id.expect( "migration ID and destination ID must be set together", ); @@ -402,23 +493,12 @@ impl InstanceStates { // If the migration ID is already cleared, and this is a request to // clear IDs, the records match. (None, None) => { - assert!(self.current.dst_propolis_id.is_none()); + assert!(self.instance.dst_propolis_id.is_none()); true } _ => false, } } - - /// Indicates whether this instance incarnation is a migration source or - /// target by comparing the instance's current active Propolis ID with its - /// migration destination ID. - /// - /// # Panics - /// - /// Panics if the instance has no destination Propolis ID set. - fn is_migration_target(&self) -> bool { - self.current.propolis_id == self.current.dst_propolis_id.unwrap() - } } #[cfg(test)] @@ -428,43 +508,45 @@ mod test { use crate::params::InstanceMigrationSourceParams; use chrono::Utc; - use omicron_common::api::external::{ - ByteCount, Generation, InstanceCpuCount, InstanceState as State, - }; + use omicron_common::api::external::Generation; use omicron_common::api::internal::nexus::InstanceRuntimeState; use propolis_client::api::InstanceState as Observed; use uuid::Uuid; fn make_instance() -> InstanceStates { - InstanceStates::new(InstanceRuntimeState { - run_state: State::Creating, - sled_id: Uuid::new_v4(), - propolis_id: Uuid::new_v4(), + let propolis_id = Uuid::new_v4(); + let now = Utc::now(); + let instance = InstanceRuntimeState { + propolis_id: Some(propolis_id), dst_propolis_id: None, - propolis_addr: None, migration_id: None, - propolis_gen: Generation::new(), - ncpus: InstanceCpuCount(2), - memory: ByteCount::from_mebibytes_u32(512), - hostname: "myvm".to_string(), gen: Generation::new(), - time_updated: Utc::now(), - }) + time_updated: now, + }; + + let vmm = VmmRuntimeState { + state: ApiInstanceState::Starting, + gen: Generation::new(), + time_updated: now, + }; + + InstanceStates::new(instance, vmm, propolis_id) } fn make_migration_source_instance() -> InstanceStates { let mut state = make_instance(); - state.current.run_state = State::Migrating; - state.current.migration_id = Some(Uuid::new_v4()); - state.current.dst_propolis_id = Some(Uuid::new_v4()); + state.vmm.state = ApiInstanceState::Migrating; + state.instance.migration_id = Some(Uuid::new_v4()); + state.instance.dst_propolis_id = Some(Uuid::new_v4()); state } fn make_migration_target_instance() -> InstanceStates { let mut state = make_instance(); - state.current.run_state = State::Migrating; - state.current.migration_id = Some(Uuid::new_v4()); - state.current.dst_propolis_id = Some(state.current.propolis_id); + state.vmm.state = ApiInstanceState::Migrating; + state.instance.migration_id = Some(Uuid::new_v4()); + state.propolis_id = Uuid::new_v4(); + state.instance.dst_propolis_id = Some(state.propolis_id); state } @@ -472,124 +554,239 @@ mod test { propolis_state: PropolisInstanceState, ) -> ObservedPropolisState { ObservedPropolisState { - instance_state: propolis_state, + vmm_state: propolis_state, migration_status: ObservedMigrationStatus::NoMigration, + time: Utc::now(), + } + } + + /// Checks to see if the instance state structures `prev` and `next` have a + /// difference that should produce a change in generation and asserts that + /// such a change occurred. + fn assert_state_change_has_gen_change( + prev: &InstanceStates, + next: &InstanceStates, + ) { + // The predicate under test below is "if an interesting field changed, + // then the generation number changed." Testing the contrapositive is a + // little nicer because the assertion that trips identifies exactly + // which field changed without updating the generation number. + // + // The else branch tests the converse to make sure the generation number + // does not update unexpectedly. While this won't cause an important + // state update to be dropped, it can interfere with updates from other + // sleds that expect their own attempts to advance the generation number + // to cause new state to be recorded. + if prev.instance.gen == next.instance.gen { + assert_eq!(prev.instance.propolis_id, next.instance.propolis_id); + assert_eq!( + prev.instance.dst_propolis_id, + next.instance.dst_propolis_id + ); + assert_eq!(prev.instance.migration_id, next.instance.migration_id); + } else { + assert!( + (prev.instance.propolis_id != next.instance.propolis_id) + || (prev.instance.dst_propolis_id + != next.instance.dst_propolis_id) + || (prev.instance.migration_id + != next.instance.migration_id), + "prev: {:?}, next: {:?}", + prev, + next + ); + } + + // Propolis is free to publish no-op VMM state updates (e.g. when an + // in-progress migration's state changes but the migration is not yet + // complete), so don't test the converse here. + if prev.vmm.gen == next.vmm.gen { + assert_eq!(prev.vmm.state, next.vmm.state); } } #[test] fn propolis_terminal_states_request_destroy_action() { for state in [Observed::Destroyed, Observed::Failed] { - let mut instance = make_instance(); - let original_instance = instance.clone(); - let requested_action = instance - .apply_propolis_observation(&make_observed_state(state)); + let mut instance_state = make_instance(); + let original_instance_state = instance_state.clone(); + let requested_action = instance_state + .apply_propolis_observation(&make_observed_state(state.into())); assert!(matches!(requested_action, Some(Action::Destroy))); - assert!(instance.current.gen > original_instance.current.gen); + assert!( + instance_state.instance.gen + > original_instance_state.instance.gen + ); } } #[test] fn destruction_after_migration_out_does_not_transition() { - let mut instance = make_migration_source_instance(); + let mut state = make_migration_source_instance(); + assert!(state.instance.dst_propolis_id.is_some()); + assert_ne!(state.instance.propolis_id, state.instance.dst_propolis_id); + + // After a migration succeeds, the source VM appears to stop but reports + // that the migration has succeeded. let mut observed = ObservedPropolisState { - instance_state: Observed::Stopping, + vmm_state: PropolisInstanceState(Observed::Stopping), migration_status: ObservedMigrationStatus::Succeeded, + time: Utc::now(), }; - let original = instance.clone(); - assert!(instance.apply_propolis_observation(&observed).is_none()); - assert_eq!(instance.current.gen, original.current.gen); - - observed.instance_state = Observed::Stopped; - assert!(instance.apply_propolis_observation(&observed).is_none()); - assert_eq!(instance.current.gen, original.current.gen); - - observed.instance_state = Observed::Destroyed; + // This transition should transfer control to the target VMM without + // actually marking the migration as completed. This advances the + // instance's state generation. + let prev = state.clone(); + assert!(state.apply_propolis_observation(&observed).is_none()); + assert_state_change_has_gen_change(&prev, &state); + assert!(state.instance.gen > prev.instance.gen); + assert_eq!( + state.instance.dst_propolis_id, + prev.instance.dst_propolis_id + ); + assert_eq!(state.instance.propolis_id, state.instance.dst_propolis_id); + assert!(state.instance.migration_id.is_some()); + + // Once a successful migration is observed, the VMM's state should + // continue to update, but the instance's state shouldn't change + // anymore. + let prev = state.clone(); + observed.vmm_state = PropolisInstanceState(Observed::Stopped); + assert!(state.apply_propolis_observation(&observed).is_none()); + assert_state_change_has_gen_change(&prev, &state); + assert_eq!(state.instance.gen, prev.instance.gen); + + // The Stopped state is translated internally to Stopping to prevent + // external viewers from perceiving that the instance is stopped before + // the VMM is fully retired. + assert_eq!(state.vmm.state, ApiInstanceState::Stopping); + assert!(state.vmm.gen > prev.vmm.gen); + + let prev = state.clone(); + observed.vmm_state = PropolisInstanceState(Observed::Destroyed); assert!(matches!( - instance.apply_propolis_observation(&observed), + state.apply_propolis_observation(&observed), Some(Action::Destroy) )); - assert_eq!(instance.current.gen, original.current.gen); + assert_state_change_has_gen_change(&prev, &state); + assert_eq!(state.instance.gen, prev.instance.gen); + assert_eq!(state.vmm.state, ApiInstanceState::Destroyed); + assert!(state.vmm.gen > prev.vmm.gen); } #[test] fn failure_after_migration_in_does_not_transition() { - let mut instance = make_migration_target_instance(); + let mut state = make_migration_target_instance(); + + // Failure to migrate into an instance should mark the VMM as destroyed + // but should not change the instance's migration IDs. let observed = ObservedPropolisState { - instance_state: Observed::Failed, + vmm_state: PropolisInstanceState(Observed::Failed), migration_status: ObservedMigrationStatus::Failed, + time: Utc::now(), }; - let original = instance.clone(); + let prev = state.clone(); assert!(matches!( - instance.apply_propolis_observation(&observed), + state.apply_propolis_observation(&observed), Some(Action::Destroy) )); - assert_eq!(instance.current.gen, original.current.gen); + assert_state_change_has_gen_change(&prev, &state); + assert_eq!(state.instance.gen, prev.instance.gen); + assert_eq!(state.vmm.state, ApiInstanceState::Failed); + assert!(state.vmm.gen > prev.vmm.gen); + } + + // Verifies that the rude-termination state change doesn't update the + // instance record if the VMM under consideration is a migration target. + // + // The live migration saga relies on this property for correctness (it needs + // to know that unwinding its "create destination VMM" step will not produce + // an updated instance record). + #[test] + fn rude_terminate_of_migration_target_does_not_transition_instance() { + let mut state = make_migration_target_instance(); + assert_eq!(state.propolis_role(), PropolisRole::MigrationTarget); + + let prev = state.clone(); + state.terminate_rudely(); + + assert_state_change_has_gen_change(&prev, &state); + assert_eq!(state.instance.gen, prev.instance.gen); } #[test] fn migration_out_after_migration_in() { - let mut instance = make_migration_target_instance(); + let mut state = make_migration_target_instance(); let mut observed = ObservedPropolisState { - instance_state: Observed::Running, + vmm_state: PropolisInstanceState(Observed::Running), migration_status: ObservedMigrationStatus::Succeeded, + time: Utc::now(), }; // The transition into the Running state on the migration target should // take over for the source, updating the Propolis generation. - let prev = instance.clone(); - assert!(instance.apply_propolis_observation(&observed).is_none()); - assert!(instance.current.migration_id.is_none()); - assert!(instance.current.dst_propolis_id.is_none()); - assert!(instance.current.gen > prev.current.gen); - assert!(instance.current.propolis_gen > prev.current.propolis_gen); + let prev = state.clone(); + assert!(state.apply_propolis_observation(&observed).is_none()); + assert_state_change_has_gen_change(&prev, &state); + assert!(state.instance.migration_id.is_none()); + assert!(state.instance.dst_propolis_id.is_none()); + assert!(state.instance.gen > prev.instance.gen); + assert_eq!(state.vmm.state, ApiInstanceState::Running); + assert!(state.vmm.gen > prev.vmm.gen); // Pretend Nexus set some new migration IDs. - let prev = instance.clone(); - instance.set_migration_ids(&Some(InstanceMigrationSourceParams { - migration_id: Uuid::new_v4(), - dst_propolis_id: Uuid::new_v4(), - })); - assert!(instance.current.propolis_gen > prev.current.propolis_gen); - - // Mark that the new migration out is in progress. - let prev = instance.clone(); - observed.instance_state = Observed::Migrating; + let prev = state.clone(); + state.set_migration_ids( + &Some(InstanceMigrationSourceParams { + migration_id: Uuid::new_v4(), + dst_propolis_id: Uuid::new_v4(), + }), + Utc::now(), + ); + assert_state_change_has_gen_change(&prev, &state); + assert!(state.instance.gen > prev.instance.gen); + assert_eq!(state.vmm.gen, prev.vmm.gen); + + // Mark that the new migration out is in progress. This doesn't change + // anything in the instance runtime state, but does update the VMM state + // generation. + let prev = state.clone(); + observed.vmm_state = PropolisInstanceState(Observed::Migrating); observed.migration_status = ObservedMigrationStatus::InProgress; - assert!(instance.apply_propolis_observation(&observed).is_none()); + assert!(state.apply_propolis_observation(&observed).is_none()); + assert_state_change_has_gen_change(&prev, &state); assert_eq!( - instance.current.migration_id.unwrap(), - prev.current.migration_id.unwrap() + state.instance.migration_id.unwrap(), + prev.instance.migration_id.unwrap() ); assert_eq!( - instance.current.dst_propolis_id.unwrap(), - prev.current.dst_propolis_id.unwrap() + state.instance.dst_propolis_id.unwrap(), + prev.instance.dst_propolis_id.unwrap() ); - assert_eq!(instance.current.run_state, State::Migrating); - assert!(instance.current.gen > prev.current.gen); - assert_eq!(instance.current.propolis_gen, prev.current.propolis_gen); + assert_eq!(state.vmm.state, ApiInstanceState::Migrating); + assert!(state.vmm.gen > prev.vmm.gen); + assert_eq!(state.instance.gen, prev.instance.gen); // Propolis will publish that the migration succeeds before changing any - // state. Because this is now a successful migration source, the - // instance record is not updated. - observed.instance_state = Observed::Migrating; + // state. This should transfer control to the target but should not + // touch the migration ID (that is the new target's job). + let prev = state.clone(); + observed.vmm_state = PropolisInstanceState(Observed::Migrating); observed.migration_status = ObservedMigrationStatus::Succeeded; - let prev = instance.clone(); - assert!(instance.apply_propolis_observation(&observed).is_none()); - assert_eq!(instance.current.run_state, State::Migrating); - assert_eq!( - instance.current.migration_id.unwrap(), - prev.current.migration_id.unwrap() - ); + assert!(state.apply_propolis_observation(&observed).is_none()); + assert_state_change_has_gen_change(&prev, &state); + assert_eq!(state.vmm.state, ApiInstanceState::Migrating); + assert!(state.vmm.gen > prev.vmm.gen); + assert_eq!(state.instance.migration_id, prev.instance.migration_id); assert_eq!( - instance.current.dst_propolis_id.unwrap(), - prev.current.dst_propolis_id.unwrap() + state.instance.dst_propolis_id, + prev.instance.dst_propolis_id, ); - assert_eq!(instance.current.gen, prev.current.gen); - assert_eq!(instance.current.propolis_gen, prev.current.propolis_gen); + assert_eq!(state.instance.propolis_id, state.instance.dst_propolis_id); + assert!(state.instance.gen > prev.instance.gen); // The rest of the destruction sequence is covered by other tests. } @@ -607,51 +804,49 @@ mod test { dst_propolis_id: Uuid::new_v4(), }; - new_instance.set_migration_ids(&Some(migration_ids)); + new_instance.set_migration_ids(&Some(migration_ids), Utc::now()); assert!(new_instance.migration_ids_already_set( - old_instance.current(), + old_instance.instance(), &Some(migration_ids) )); // The IDs aren't already set if the new record has an ID that's // advanced from the old record by more than one generation. let mut newer_instance = new_instance.clone(); - newer_instance.current.propolis_gen = - newer_instance.current.propolis_gen.next(); + newer_instance.instance.gen = newer_instance.instance.gen.next(); assert!(!newer_instance.migration_ids_already_set( - old_instance.current(), + old_instance.instance(), &Some(migration_ids) )); // They also aren't set if the old generation has somehow equaled or // surpassed the current generation. - old_instance.current.propolis_gen = - old_instance.current.propolis_gen.next(); + old_instance.instance.gen = old_instance.instance.gen.next(); assert!(!new_instance.migration_ids_already_set( - old_instance.current(), + old_instance.instance(), &Some(migration_ids) )); // If the generation numbers are right, but either requested ID is not // present in the current instance, the requested IDs aren't set. old_instance = orig_instance; - new_instance.current.migration_id = Some(Uuid::new_v4()); + new_instance.instance.migration_id = Some(Uuid::new_v4()); assert!(!new_instance.migration_ids_already_set( - old_instance.current(), + old_instance.instance(), &Some(migration_ids) )); - new_instance.current.migration_id = Some(migration_ids.migration_id); - new_instance.current.dst_propolis_id = Some(Uuid::new_v4()); + new_instance.instance.migration_id = Some(migration_ids.migration_id); + new_instance.instance.dst_propolis_id = Some(Uuid::new_v4()); assert!(!new_instance.migration_ids_already_set( - old_instance.current(), + old_instance.instance(), &Some(migration_ids) )); - new_instance.current.migration_id = None; - new_instance.current.dst_propolis_id = None; + new_instance.instance.migration_id = None; + new_instance.instance.dst_propolis_id = None; assert!(!new_instance.migration_ids_already_set( - old_instance.current(), + old_instance.instance(), &Some(migration_ids) )); } diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 440ccb73ee..2ab8273e39 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -25,7 +25,7 @@ use illumos_utils::opte::params::{ }; use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::DiskRuntimeState; -use omicron_common::api::internal::nexus::InstanceRuntimeState; +use omicron_common::api::internal::nexus::SledInstanceState; use omicron_common::api::internal::nexus::UpdateArtifactId; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -393,12 +393,20 @@ async fn instance_register( rqctx: RequestContext, path_params: Path, body: TypedBody, -) -> Result, HttpError> { +) -> Result, HttpError> { let sa = rqctx.context(); let instance_id = path_params.into_inner().instance_id; let body_args = body.into_inner(); Ok(HttpResponseOk( - sa.instance_ensure_registered(instance_id, body_args.initial).await?, + sa.instance_ensure_registered( + instance_id, + body_args.propolis_id, + body_args.hardware, + body_args.instance_runtime, + body_args.vmm_runtime, + body_args.propolis_addr, + ) + .await?, )) } @@ -440,7 +448,7 @@ async fn instance_put_migration_ids( rqctx: RequestContext, path_params: Path, body: TypedBody, -) -> Result, HttpError> { +) -> Result, HttpError> { let sa = rqctx.context(); let instance_id = path_params.into_inner().instance_id; let body_args = body.into_inner(); diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index baf92af28a..ce1ef662dc 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -6,9 +6,9 @@ use crate::common::instance::{ Action as InstanceAction, InstanceStates, ObservedPropolisState, - PublishedInstanceState, + PublishedVmmState, }; -use crate::instance_manager::InstanceTicket; +use crate::instance_manager::{InstanceManagerServices, InstanceTicket}; use crate::nexus::NexusClientWithResolver; use crate::params::ZoneBundleCause; use crate::params::ZoneBundleMetadata; @@ -22,6 +22,7 @@ use crate::zone_bundle::BundleError; use crate::zone_bundle::ZoneBundler; use anyhow::anyhow; use backoff::BackoffError; +use chrono::Utc; use futures::lock::{Mutex, MutexGuard}; use illumos_utils::dladm::Etherstub; use illumos_utils::link::VnicAllocator; @@ -32,7 +33,9 @@ use illumos_utils::zone::Zones; use illumos_utils::zone::PROPOLIS_ZONE_PREFIX; use omicron_common::address::NEXUS_INTERNAL_PORT; use omicron_common::address::PROPOLIS_PORT; -use omicron_common::api::internal::nexus::InstanceRuntimeState; +use omicron_common::api::internal::nexus::{ + InstanceRuntimeState, SledInstanceState, VmmRuntimeState, +}; use omicron_common::api::internal::shared::{ NetworkInterface, SourceNatConfig, }; @@ -45,7 +48,6 @@ use slog::Logger; use std::net::IpAddr; use std::net::{SocketAddr, SocketAddrV6}; use std::sync::Arc; -use tokio::task::JoinHandle; use uuid::Uuid; #[derive(thiserror::Error, Debug)] @@ -171,34 +173,10 @@ enum Reaction { struct RunningState { // Connection to Propolis. client: Arc, - // Handle to task monitoring for Propolis state changes. - monitor_task: Option>, // Handle to the zone. running_zone: RunningZone, } -impl Drop for RunningState { - fn drop(&mut self) { - if let Some(task) = self.monitor_task.take() { - // NOTE: We'd prefer to actually await the task, since it - // will be completed at this point, but async drop doesn't exist. - // - // At a minimum, this implementation ensures the background task - // is not executing after RunningState terminates. - // - // "InstanceManager" contains... - // ... "Instance", which contains... - // ... "InstanceInner", which contains... - // ... "RunningState", which owns the "monitor_task". - // - // The "monitor_task" removes the instance from the - // "InstanceManager", triggering it's eventual drop. - // When this happens, the "monitor_task" exits anyway. - task.abort() - } - } -} - // Named type for values returned during propolis zone creation struct PropolisSetup { client: Arc, @@ -263,15 +241,15 @@ impl InstanceInner { &self.propolis_id } - async fn publish_state_to_nexus(&self) -> Result<(), Error> { + async fn publish_state_to_nexus(&self) { // Retry until Nexus acknowledges that it has applied this state update. // Note that Nexus may receive this call but then fail while reacting // to it. If that failure is transient, Nexus expects this routine to // retry the state update. - backoff::retry_notify( + let result = backoff::retry_notify( backoff::retry_policy_internal_service(), || async { - let state = self.state.current().clone(); + let state = self.state.sled_instance_state(); info!(self.log, "Publishing instance state update to Nexus"; "instance_id" => %self.id(), "state" => ?state, @@ -330,9 +308,15 @@ impl InstanceInner { "retry_after" => ?delay); }, ) - .await?; + .await; - Ok(()) + if let Err(e) = result { + error!( + self.log, + "Failed to publish state to Nexus, will not retry: {:?}", e; + "instance_id" => %self.id() + ); + } } /// Processes a Propolis state change observed by the Propolis monitoring @@ -365,28 +349,26 @@ impl InstanceInner { let action = self.state.apply_propolis_observation(state); info!( self.log, - "New state: {:?}, action: {:?}", - self.state.current().run_state, - action + "updated state after observing Propolis state change"; + "propolis_id" => %self.state.propolis_id(), + "new_instance_state" => ?self.state.instance(), + "new_vmm_state" => ?self.state.vmm() ); - // Publish the updated instance state to Nexus. The callee retries - // transient errors. If an error is permanent, log a message but - // continue monitoring so that the monitor will continue to take - // actions in response to future Propolis state changes. - if let Err(e) = self.publish_state_to_nexus().await { - let state = self.state.current(); - error!(self.log, - "Failed to publish state to Nexus, will not retry: {:?}", e; - "instance_id" => %self.id(), - "state" => ?state); - } + // If the zone is now safe to terminate, tear it down and discard the + // instance ticket before returning and publishing the new instance + // state to Nexus. This ensures that the instance is actually gone from + // the sled when Nexus receives the state update saying it's actually + // destroyed. + match action { + Some(InstanceAction::Destroy) => { + info!(self.log, "terminating VMM that has exited"; + "instance_id" => %self.id()); - // Take the next action, if any. - if let Some(action) = action { - self.take_action(action).await - } else { - Ok(Reaction::Continue) + self.terminate().await?; + Ok(Reaction::Terminate) + } + None => Ok(Reaction::Continue), } } @@ -434,7 +416,7 @@ impl InstanceInner { let migrate = match migrate { Some(params) => { let migration_id = - self.state.current().migration_id.ok_or_else(|| { + self.state.instance().migration_id.ok_or_else(|| { Error::Migration(anyhow!("Missing Migration UUID")) })?; Some(propolis_client::api::InstanceMigrateInitiateRequest { @@ -485,49 +467,26 @@ impl InstanceInner { self.propolis_ensure(&client, &running_zone, migrate).await?; // Monitor propolis for state changes in the background. + // + // This task exits after its associated Propolis has been terminated + // (either because the task observed a message from Propolis saying that + // it exited or because the Propolis server was terminated by other + // means). let monitor_client = client.clone(); - let monitor_task = Some(tokio::task::spawn(async move { + let _monitor_task = tokio::task::spawn(async move { let r = instance.monitor_state_task(monitor_client).await; let log = &instance.inner.lock().await.log; match r { Err(e) => warn!(log, "State monitoring task failed: {}", e), Ok(()) => info!(log, "State monitoring task complete"), } - })); + }); - self.running_state = - Some(RunningState { client, monitor_task, running_zone }); + self.running_state = Some(RunningState { client, running_zone }); Ok(()) } - async fn take_action( - &self, - action: InstanceAction, - ) -> Result { - info!(self.log, "Taking action: {:#?}", action); - let requested_state = match action { - InstanceAction::Run => { - propolis_client::api::InstanceStateRequested::Run - } - InstanceAction::Stop => { - propolis_client::api::InstanceStateRequested::Stop - } - InstanceAction::Reboot => { - propolis_client::api::InstanceStateRequested::Reboot - } - InstanceAction::Destroy => { - // Unlike the other actions, which update the Propolis state, - // the "destroy" action indicates that the service should be - // terminated. - info!(self.log, "take_action: Taking the Destroy action"); - return Ok(Reaction::Terminate); - } - }; - self.propolis_state_put(requested_state).await?; - Ok(Reaction::Continue) - } - /// Immediately terminates this instance's Propolis zone and cleans up any /// runtime objects associated with the instance. /// @@ -547,6 +506,10 @@ impl InstanceInner { self.log, "Instance::terminate() called with no running state" ); + + // Ensure the instance is removed from the instance manager's table + // so that a new instance can take its place. + self.instance_ticket.terminate(); return Ok(()); }; @@ -599,59 +562,84 @@ pub struct Instance { inner: Arc>, } +#[derive(Debug)] +pub(crate) struct InstanceInitialState { + pub hardware: InstanceHardware, + pub instance_runtime: InstanceRuntimeState, + pub vmm_runtime: VmmRuntimeState, + pub propolis_addr: SocketAddr, +} + impl Instance { /// Creates a new (not yet running) instance object. /// - /// Arguments: + /// # Arguments + /// /// * `log`: Logger for dumping debug information. /// * `id`: UUID of the instance to be created. - /// * `initial`: State of the instance at initialization time. - /// * `vnic_allocator`: A unique (to the sled) ID generator to - /// refer to a VNIC. (This exists because of a restriction on VNIC name - /// lengths, otherwise the UUID would be used instead). - /// * `port_manager`: Handle to the object responsible for managing OPTE - /// ports. - /// * `nexus_client`: Connection to Nexus, used for sending notifications. - // TODO: This arg list is getting a little long; can we clean this up? - #[allow(clippy::too_many_arguments)] - pub fn new( + /// * `propolis_id`: UUID for the VMM to be created. + /// * `ticket`: A ticket that ensures this instance is a member of its + /// instance manager's tracking table. + /// * `state`: The initial state of this instance. + /// * `services`: A set of instance manager-provided services. + pub(crate) fn new( log: Logger, id: Uuid, + propolis_id: Uuid, ticket: InstanceTicket, - initial: InstanceHardware, - vnic_allocator: VnicAllocator, - port_manager: PortManager, - nexus_client: NexusClientWithResolver, - storage: StorageResources, - zone_bundler: ZoneBundler, + state: InstanceInitialState, + services: InstanceManagerServices, ) -> Result { - info!(log, "Instance::new w/initial HW: {:?}", initial); + info!(log, "initializing new Instance"; + "instance_id" => %id, + "propolis_id" => %propolis_id, + "state" => ?state); + + let InstanceInitialState { + hardware, + instance_runtime, + vmm_runtime, + propolis_addr, + } = state; + + let InstanceManagerServices { + nexus_client, + vnic_allocator, + port_manager, + storage, + zone_bundler, + } = services; + let instance = InstanceInner { log: log.new(o!("instance_id" => id.to_string())), // NOTE: Mostly lies. properties: propolis_client::api::InstanceProperties { id, - name: initial.runtime.hostname.clone(), + name: hardware.properties.hostname.clone(), description: "Test description".to_string(), image_id: Uuid::nil(), bootrom_id: Uuid::nil(), // TODO: Align the byte type w/propolis. - memory: initial.runtime.memory.to_whole_mebibytes(), + memory: hardware.properties.memory.to_whole_mebibytes(), // TODO: we should probably make propolis aligned with // InstanceCpuCount here, to avoid any casting... - vcpus: initial.runtime.ncpus.0 as u8, + vcpus: hardware.properties.ncpus.0 as u8, }, - propolis_id: initial.runtime.propolis_id, - propolis_ip: initial.runtime.propolis_addr.unwrap().ip(), + propolis_id, + propolis_ip: propolis_addr.ip(), vnic_allocator, port_manager, - requested_nics: initial.nics, - source_nat: initial.source_nat, - external_ips: initial.external_ips, - firewall_rules: initial.firewall_rules, - requested_disks: initial.disks, - cloud_init_bytes: initial.cloud_init_bytes, - state: InstanceStates::new(initial.runtime), + requested_nics: hardware.nics, + source_nat: hardware.source_nat, + external_ips: hardware.external_ips, + firewall_rules: hardware.firewall_rules, + requested_disks: hardware.disks, + cloud_init_bytes: hardware.cloud_init_bytes, + state: InstanceStates::new( + instance_runtime, + vmm_runtime, + propolis_id, + ), running_state: None, nexus_client, storage, @@ -686,9 +674,9 @@ impl Instance { } } - pub async fn current_state(&self) -> InstanceRuntimeState { + pub async fn current_state(&self) -> SledInstanceState { let inner = self.inner.lock().await; - inner.state.current().clone() + inner.state.sled_instance_state() } /// Ensures that a Propolis process exists for this instance, then sends it @@ -712,25 +700,6 @@ impl Instance { .await?; } else { let setup_result: Result<(), Error> = 'setup: { - // If there's no Propolis yet, and this instance is not being - // initialized via migration, immediately send a state update to - // Nexus to reflect that the instance is starting (so that the - // external API will display this state while the zone is being - // started). - // - // Migration targets don't do this because the instance is still - // logically running (on the source) while the target Propolis - // is being launched. - if migration_params.is_none() { - info!(&inner.log, "Ensuring new instance"); - inner.state.transition(PublishedInstanceState::Starting); - if let Err(e) = inner.publish_state_to_nexus().await { - break 'setup Err(e); - } - } else { - info!(&inner.log, "Ensuring new instance (migration)"); - } - // Set up the Propolis zone and the objects associated with it. let setup = match self.setup_propolis_locked(inner).await { Ok(setup) => setup, @@ -757,9 +726,12 @@ impl Instance { // start a migration target simply leaves the VM running untouched // on the source. if migration_params.is_none() && setup_result.is_err() { - error!(&inner.log, "instance setup failed: {:?}", setup_result); - inner.state.transition(PublishedInstanceState::Failed); - inner.publish_state_to_nexus().await?; + error!(&inner.log, "vmm setup failed: {:?}", setup_result); + + // This case is morally equivalent to starting Propolis and then + // rudely terminating it before asking it to do anything. Update + // the VMM and instance states accordingly. + inner.state.terminate_rudely(); } setup_result?; } @@ -780,7 +752,7 @@ impl Instance { pub async fn put_state( &self, state: crate::params::InstanceStateRequested, - ) -> Result { + ) -> Result { use propolis_client::api::InstanceStateRequested as PropolisRequest; let mut inner = self.inner.lock().await; let (propolis_state, next_published) = match state { @@ -800,11 +772,12 @@ impl Instance { // "Destroyed" state and return it to the caller. if inner.running_state.is_none() { inner.terminate().await?; - (None, Some(PublishedInstanceState::Stopped)) + inner.state.terminate_rudely(); + (None, None) } else { ( Some(PropolisRequest::Stop), - Some(PublishedInstanceState::Stopping), + Some(PublishedVmmState::Stopping), ) } } @@ -814,7 +787,7 @@ impl Instance { } ( Some(PropolisRequest::Reboot), - Some(PublishedInstanceState::Rebooting), + Some(PublishedVmmState::Rebooting), ) } }; @@ -823,43 +796,43 @@ impl Instance { inner.propolis_state_put(p).await?; } if let Some(s) = next_published { - inner.state.transition(s); + inner.state.transition_vmm(s, Utc::now()); } - Ok(inner.state.current().clone()) + Ok(inner.state.sled_instance_state()) } pub async fn put_migration_ids( &self, old_runtime: &InstanceRuntimeState, migration_ids: &Option, - ) -> Result { + ) -> Result { let mut inner = self.inner.lock().await; // Check that the instance's current generation matches the one the // caller expects to transition from. This helps Nexus ensure that if // multiple migration sagas launch at Propolis generation N, then only // one of them will successfully set the instance's migration IDs. - if inner.state.current().propolis_gen != old_runtime.propolis_gen { + if inner.state.instance().gen != old_runtime.gen { // Allow this transition for idempotency if the instance is // already in the requested goal state. if inner.state.migration_ids_already_set(old_runtime, migration_ids) { - return Ok(inner.state.current().clone()); + return Ok(inner.state.sled_instance_state()); } return Err(Error::Transition( omicron_common::api::external::Error::Conflict { internal_message: format!( - "wrong Propolis ID generation: expected {}, got {}", - inner.state.current().propolis_gen, - old_runtime.propolis_gen + "wrong instance state generation: expected {}, got {}", + inner.state.instance().gen, + old_runtime.gen ), }, )); } - inner.state.set_migration_ids(migration_ids); - Ok(inner.state.current().clone()) + inner.state.set_migration_ids(migration_ids, Utc::now()); + Ok(inner.state.sled_instance_state()) } async fn setup_propolis_locked( @@ -979,7 +952,6 @@ impl Instance { info!(inner.log, "Propolis SMF service is online"); let server_addr = SocketAddr::new(inner.propolis_ip, PROPOLIS_PORT); - inner.state.current_mut().propolis_addr = Some(server_addr); // We use a custom client builder here because the default progenitor // one has a timeout of 15s but we want to be able to wait indefinitely. @@ -1000,11 +972,15 @@ impl Instance { /// Rudely terminates this instance's Propolis (if it has one) and /// immediately transitions the instance to the Destroyed state. - pub async fn terminate(&self) -> Result { + pub async fn terminate(&self) -> Result { let mut inner = self.inner.lock().await; inner.terminate().await?; - inner.state.transition(PublishedInstanceState::Stopped); - Ok(inner.state.current().clone()) + + // Rude termination is safe here because this routine took the lock + // before terminating the zone, which will cause any pending + // observations from the instance state monitor to be + inner.state.terminate_rudely(); + Ok(inner.state.sled_instance_state()) } // Monitors propolis until explicitly told to disconnect. @@ -1031,17 +1007,16 @@ impl Instance { // stabilize that state across this entire operation. let mut inner = self.inner.lock().await; let observed = ObservedPropolisState::new( - inner.state.current(), + inner.state.instance(), &response, ); - inner.observe_state(&observed).await? + let reaction = inner.observe_state(&observed).await?; + inner.publish_state_to_nexus().await; + reaction }; - match reaction { - Reaction::Continue => {} - Reaction::Terminate => { - return self.terminate().await.map(|_| ()); - } + if let Reaction::Terminate = reaction { + return Ok(()); } // Update the generation number we're asking for, to ensure the diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index bdd29e4d1f..2860f0624b 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -21,8 +21,11 @@ use illumos_utils::opte::PortManager; use illumos_utils::vmm_reservoir; use omicron_common::api::external::ByteCount; use omicron_common::api::internal::nexus::InstanceRuntimeState; +use omicron_common::api::internal::nexus::SledInstanceState; +use omicron_common::api::internal::nexus::VmmRuntimeState; use slog::Logger; use std::collections::BTreeMap; +use std::net::SocketAddr; use std::sync::{Arc, Mutex}; use uuid::Uuid; @@ -66,6 +69,14 @@ struct InstanceManagerInternal { zone_bundler: ZoneBundler, } +pub(crate) struct InstanceManagerServices { + pub nexus_client: NexusClientWithResolver, + pub vnic_allocator: VnicAllocator, + pub port_manager: PortManager, + pub storage: StorageResources, + pub zone_bundler: ZoneBundler, +} + /// All instances currently running on the sled. pub struct InstanceManager { inner: Arc, @@ -168,14 +179,21 @@ impl InstanceManager { pub async fn ensure_registered( &self, instance_id: Uuid, - initial_hardware: InstanceHardware, - ) -> Result { - let requested_propolis_id = initial_hardware.runtime.propolis_id; + propolis_id: Uuid, + hardware: InstanceHardware, + instance_runtime: InstanceRuntimeState, + vmm_runtime: VmmRuntimeState, + propolis_addr: SocketAddr, + ) -> Result { info!( &self.inner.log, "ensuring instance is registered"; "instance_id" => %instance_id, - "propolis_id" => %requested_propolis_id + "propolis_id" => %propolis_id, + "hardware" => ?hardware, + "instance_runtime" => ?instance_runtime, + "vmm_runtime" => ?vmm_runtime, + "propolis_addr" => ?propolis_addr, ); let instance = { @@ -183,7 +201,7 @@ impl InstanceManager { if let Some((existing_propolis_id, existing_instance)) = instances.get(&instance_id) { - if requested_propolis_id != *existing_propolis_id { + if propolis_id != *existing_propolis_id { info!(&self.inner.log, "instance already registered with another Propolis ID"; "instance_id" => %instance_id, @@ -207,20 +225,33 @@ impl InstanceManager { let instance_log = self.inner.log.new(o!()); let ticket = InstanceTicket::new(instance_id, self.inner.clone()); + + let services = InstanceManagerServices { + nexus_client: self.inner.nexus_client.clone(), + vnic_allocator: self.inner.vnic_allocator.clone(), + port_manager: self.inner.port_manager.clone(), + storage: self.inner.storage.clone(), + zone_bundler: self.inner.zone_bundler.clone(), + }; + + let state = crate::instance::InstanceInitialState { + hardware, + instance_runtime, + vmm_runtime, + propolis_addr, + }; + let instance = Instance::new( instance_log, instance_id, + propolis_id, ticket, - initial_hardware, - self.inner.vnic_allocator.clone(), - self.inner.port_manager.clone(), - self.inner.nexus_client.clone(), - self.inner.storage.clone(), - self.inner.zone_bundler.clone(), + state, + services, )?; let instance_clone = instance.clone(); - let _old = instances - .insert(instance_id, (requested_propolis_id, instance)); + let _old = + instances.insert(instance_id, (propolis_id, instance)); assert!(_old.is_none()); instance_clone } @@ -299,7 +330,7 @@ impl InstanceManager { instance_id: Uuid, old_runtime: &InstanceRuntimeState, migration_ids: &Option, - ) -> Result { + ) -> Result { let (_, instance) = self .inner .instances diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index d0fa2fbe4d..84ec1ef0dc 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -9,7 +9,8 @@ pub use crate::zone_bundle::ZoneBundleMetadata; pub use illumos_utils::opte::params::VpcFirewallRule; pub use illumos_utils::opte::params::VpcFirewallRulesEnsureBody; use omicron_common::api::internal::nexus::{ - DiskRuntimeState, InstanceRuntimeState, + DiskRuntimeState, InstanceProperties, InstanceRuntimeState, + SledInstanceState, VmmRuntimeState, }; use omicron_common::api::internal::shared::{ NetworkInterface, SourceNatConfig, @@ -60,7 +61,7 @@ pub struct DiskEnsureBody { /// Describes the instance hardware. #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] pub struct InstanceHardware { - pub runtime: InstanceRuntimeState, + pub properties: InstanceProperties, pub nics: Vec, pub source_nat: SourceNatConfig, /// Zero or more external IP addresses (either floating or ephemeral), @@ -72,12 +73,27 @@ pub struct InstanceHardware { pub cloud_init_bytes: Option, } -/// The body of a request to ensure that an instance is known to a sled agent. +/// The body of a request to ensure that a instance and VMM are known to a sled +/// agent. #[derive(Serialize, Deserialize, JsonSchema)] pub struct InstanceEnsureBody { /// A description of the instance's virtual hardware and the initial runtime /// state this sled agent should store for this incarnation of the instance. - pub initial: InstanceHardware, + pub hardware: InstanceHardware, + + /// The instance runtime state for the instance being registered. + pub instance_runtime: InstanceRuntimeState, + + /// The initial VMM runtime state for the VMM being registered. + pub vmm_runtime: VmmRuntimeState, + + /// The ID of the VMM being registered. This may not be the active VMM ID in + /// the instance runtime state (e.g. if the new VMM is going to be a + /// migration target). + pub propolis_id: Uuid, + + /// The address at which this VMM should serve a Propolis server API. + pub propolis_addr: SocketAddr, } /// The body of a request to move a previously-ensured instance into a specific @@ -95,7 +111,7 @@ pub struct InstancePutStateResponse { /// The current runtime state of the instance after handling the request to /// change its state. If the instance's state did not change, this field is /// `None`. - pub updated_runtime: Option, + pub updated_runtime: Option, } /// The response sent from a request to unregister an instance. @@ -104,7 +120,7 @@ pub struct InstanceUnregisterResponse { /// The current state of the instance after handling the request to /// unregister it. If the instance's state did not change, this field is /// `None`. - pub updated_runtime: Option, + pub updated_runtime: Option, } /// Parameters used when directing Propolis to initialize itself via live @@ -175,8 +191,8 @@ pub struct InstanceMigrationSourceParams { /// sled agent's instance state records. #[derive(Debug, Serialize, Deserialize, JsonSchema)] pub struct InstancePutMigrationIdsBody { - /// The last runtime state known to this requestor. This request will - /// succeed if either (a) the Propolis generation in the sled agent's + /// The last instance runtime state known to this requestor. This request + /// will succeed if either (a) the state generation in the sled agent's /// runtime state matches the generation in this record, or (b) the sled /// agent's runtime state matches what would result from applying this /// request to the caller's runtime state. This latter condition provides diff --git a/sled-agent/src/sim/collection.rs b/sled-agent/src/sim/collection.rs index ab6940b165..bd6ed4aa90 100644 --- a/sled-agent/src/sim/collection.rs +++ b/sled-agent/src/sim/collection.rs @@ -248,6 +248,9 @@ impl SimCollection { if object.object.desired().is_none() && object.object.ready_to_destroy() { + info!(&self.log, "object is ready to destroy"; + "object_id" => %id); + (after, Some(object)) } else { objects.insert(id, object); @@ -405,37 +408,42 @@ mod test { use chrono::Utc; use dropshot::test_util::LogContext; use futures::channel::mpsc::Receiver; - use omicron_common::api::external::ByteCount; use omicron_common::api::external::DiskState; use omicron_common::api::external::Error; use omicron_common::api::external::Generation; - use omicron_common::api::external::InstanceCpuCount; use omicron_common::api::external::InstanceState; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::InstanceRuntimeState; + use omicron_common::api::internal::nexus::SledInstanceState; + use omicron_common::api::internal::nexus::VmmRuntimeState; use omicron_test_utils::dev::test_setup_log; + use uuid::Uuid; fn make_instance( logctx: &LogContext, ) -> (SimObject, Receiver<()>) { - let initial_runtime = { - InstanceRuntimeState { - run_state: InstanceState::Creating, - sled_id: uuid::Uuid::new_v4(), - propolis_id: uuid::Uuid::new_v4(), - dst_propolis_id: None, - propolis_addr: None, - migration_id: None, - propolis_gen: Generation::new(), - ncpus: InstanceCpuCount(2), - memory: ByteCount::from_mebibytes_u32(512), - hostname: "myvm".to_string(), - gen: Generation::new(), - time_updated: Utc::now(), - } + let propolis_id = Uuid::new_v4(); + let instance_vmm = InstanceRuntimeState { + propolis_id: Some(propolis_id), + dst_propolis_id: None, + migration_id: None, + gen: Generation::new(), + time_updated: Utc::now(), }; - SimObject::new_simulated_auto(&initial_runtime, logctx.log.new(o!())) + let vmm_state = VmmRuntimeState { + state: InstanceState::Starting, + gen: Generation::new(), + time_updated: Utc::now(), + }; + + let state = SledInstanceState { + instance_state: instance_vmm, + vmm_state, + propolis_id, + }; + + SimObject::new_simulated_auto(&state, logctx.log.new(o!())) } fn make_disk( @@ -459,32 +467,38 @@ mod test { let (mut instance, mut rx) = make_instance(&logctx); let r1 = instance.object.current(); - info!(logctx.log, "new instance"; "run_state" => ?r1.run_state); - assert_eq!(r1.run_state, InstanceState::Creating); - assert_eq!(r1.gen, Generation::new()); + info!(logctx.log, "new instance"; "state" => ?r1); + assert_eq!(r1.vmm_state.state, InstanceState::Starting); + assert_eq!(r1.vmm_state.gen, Generation::new()); // There's no asynchronous transition going on yet so a // transition_finish() shouldn't change anything. assert!(instance.object.desired().is_none()); instance.transition_finish(); + let rnext = instance.object.current(); assert!(instance.object.desired().is_none()); - assert_eq!(&r1.time_updated, &instance.object.current().time_updated); - assert_eq!(&r1.run_state, &instance.object.current().run_state); - assert_eq!(r1.gen, instance.object.current().gen); + assert_eq!(r1.vmm_state.time_updated, rnext.vmm_state.time_updated); + assert_eq!(rnext.vmm_state.state, rnext.vmm_state.state); + assert_eq!(r1.vmm_state.gen, rnext.vmm_state.gen); assert!(rx.try_next().is_err()); - // Stopping an instance that was never started synchronously marks it - // stopped. + // Stopping an instance that was never started synchronously destroys + // its VMM. let rprev = r1; - assert!(rprev.run_state.is_stopped()); let dropped = instance.transition(InstanceStateRequested::Stopped).unwrap(); assert!(dropped.is_none()); assert!(instance.object.desired().is_none()); let rnext = instance.object.current(); - assert!(rnext.gen > rprev.gen); - assert!(rnext.time_updated >= rprev.time_updated); - assert_eq!(rnext.run_state, InstanceState::Stopped); + assert!(rnext.instance_state.gen > rprev.instance_state.gen); + assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); + assert!( + rnext.instance_state.time_updated + >= rprev.instance_state.time_updated + ); + assert!(rnext.vmm_state.time_updated >= rprev.vmm_state.time_updated); + assert!(rnext.instance_state.propolis_id.is_none()); + assert_eq!(rnext.vmm_state.state, InstanceState::Destroyed); assert!(rx.try_next().is_err()); logctx.cleanup_successful(); @@ -499,106 +513,115 @@ mod test { let (mut instance, mut rx) = make_instance(&logctx); let r1 = instance.object.current(); - info!(logctx.log, "new instance"; "run_state" => ?r1.run_state); - assert_eq!(r1.run_state, InstanceState::Creating); - assert_eq!(r1.gen, Generation::new()); + info!(logctx.log, "new instance"; "state" => ?r1); + assert_eq!(r1.vmm_state.state, InstanceState::Starting); + assert_eq!(r1.vmm_state.gen, Generation::new()); // There's no asynchronous transition going on yet so a // transition_finish() shouldn't change anything. assert!(instance.object.desired().is_none()); instance.transition_finish(); assert!(instance.object.desired().is_none()); - assert_eq!(&r1.time_updated, &instance.object.current().time_updated); - assert_eq!(&r1.run_state, &instance.object.current().run_state); - assert_eq!(r1.gen, instance.object.current().gen); + let rnext = instance.object.current(); + assert_eq!(r1.vmm_state.time_updated, rnext.vmm_state.time_updated); + assert_eq!(r1.vmm_state.state, rnext.vmm_state.state); + assert_eq!(r1.vmm_state.gen, rnext.vmm_state.gen); assert!(rx.try_next().is_err()); - // Now, if we transition to "Running", we must go through the async - // process. + // Set up a transition to Running. This has no immediate effect on the + // simulated instance's state, but it does queue up a transition. let mut rprev = r1; assert!(rx.try_next().is_err()); let dropped = instance.transition(InstanceStateRequested::Running).unwrap(); assert!(dropped.is_none()); assert!(instance.object.desired().is_some()); - assert!(rx.try_next().is_ok()); + assert!(rx.try_next().is_err()); + + // The VMM should still be Starting and its generation should not have + // changed (the transition to Running is queued but hasn't executed). let rnext = instance.object.current(); - assert!(rnext.gen > rprev.gen); - assert!(rnext.time_updated >= rprev.time_updated); - assert_eq!(rnext.run_state, InstanceState::Starting); - assert!(!rnext.run_state.is_stopped()); + assert_eq!(rnext.vmm_state.gen, rprev.vmm_state.gen); + assert_eq!(rnext.vmm_state.time_updated, rprev.vmm_state.time_updated); + assert_eq!(rnext.vmm_state.state, InstanceState::Starting); rprev = rnext; + // Now poke the instance. It should transition to Running. instance.transition_finish(); let rnext = instance.object.current(); - assert!(rnext.gen > rprev.gen); - assert!(rnext.time_updated >= rprev.time_updated); + assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); + assert!(rnext.vmm_state.time_updated >= rprev.vmm_state.time_updated); assert!(instance.object.desired().is_none()); assert!(rx.try_next().is_err()); - assert_eq!(rprev.run_state, InstanceState::Starting); - assert_eq!(rnext.run_state, InstanceState::Running); + assert_eq!(rprev.vmm_state.state, InstanceState::Starting); + assert_eq!(rnext.vmm_state.state, InstanceState::Running); + assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); rprev = rnext; + + // There shouldn't be anything left on the queue now. instance.transition_finish(); let rnext = instance.object.current(); - assert_eq!(rprev.gen, rnext.gen); + assert_eq!(rprev.vmm_state.gen, rnext.vmm_state.gen); // If we transition again to "Running", the process should complete // immediately. - assert!(!rprev.run_state.is_stopped()); let dropped = instance.transition(InstanceStateRequested::Running).unwrap(); assert!(dropped.is_none()); assert!(instance.object.desired().is_none()); assert!(rx.try_next().is_err()); let rnext = instance.object.current(); - assert_eq!(rnext.gen, rprev.gen); - assert_eq!(rnext.time_updated, rprev.time_updated); - assert_eq!(rnext.run_state, rprev.run_state); + assert_eq!(rnext.vmm_state.gen, rprev.vmm_state.gen); + assert_eq!(rnext.vmm_state.time_updated, rprev.vmm_state.time_updated); + assert_eq!(rnext.vmm_state.state, rprev.vmm_state.state); rprev = rnext; // If we go back to any stopped state, we go through the async process // again. - assert!(!rprev.run_state.is_stopped()); assert!(rx.try_next().is_err()); let dropped = instance.transition(InstanceStateRequested::Stopped).unwrap(); assert!(dropped.is_none()); assert!(instance.object.desired().is_some()); let rnext = instance.object.current(); - assert!(rnext.gen > rprev.gen); - assert!(rnext.time_updated >= rprev.time_updated); - assert_eq!(rnext.run_state, InstanceState::Stopping); - assert!(!rnext.run_state.is_stopped()); + assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); + assert!(rnext.vmm_state.time_updated >= rprev.vmm_state.time_updated); + assert_eq!(rnext.vmm_state.state, InstanceState::Stopping); rprev = rnext; // Propolis publishes its own transition to Stopping before it publishes // Stopped. instance.transition_finish(); let rnext = instance.object.current(); - assert!(rnext.gen > rprev.gen); - assert!(rnext.time_updated >= rprev.time_updated); + assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); + assert!(rnext.vmm_state.time_updated >= rprev.vmm_state.time_updated); assert!(instance.object.desired().is_some()); - assert_eq!(rprev.run_state, InstanceState::Stopping); - assert_eq!(rnext.run_state, InstanceState::Stopping); + assert_eq!(rprev.vmm_state.state, InstanceState::Stopping); + assert_eq!(rnext.vmm_state.state, InstanceState::Stopping); rprev = rnext; - // Stopping goes to Stopped... + // The Stopping-to-Stopped transition is masked from external viewers of + // the instance so that the instance doesn't appear to be Stopped before + // it is ready to be started again. instance.transition_finish(); let rnext = instance.object.current(); - assert!(rnext.gen > rprev.gen); - assert!(rnext.time_updated >= rprev.time_updated); + assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); + assert!(rnext.vmm_state.time_updated >= rprev.vmm_state.time_updated); assert!(instance.object.desired().is_some()); - assert_eq!(rprev.run_state, InstanceState::Stopping); - assert_eq!(rnext.run_state, InstanceState::Stopped); + assert_eq!(rprev.vmm_state.state, InstanceState::Stopping); + assert_eq!(rnext.vmm_state.state, InstanceState::Stopping); rprev = rnext; - // ...and Stopped (internally) goes to Destroyed, though the sled agent - // hides this state from clients. + // ...and Stopped (internally) goes to Destroyed. This transition is + // hidden from external viewers of the instance by retiring the active + // Propolis ID. instance.transition_finish(); let rnext = instance.object.current(); - assert!(rnext.gen > rprev.gen); - assert_eq!(rprev.run_state, InstanceState::Stopped); - assert_eq!(rnext.run_state, InstanceState::Stopped); + assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); + assert!(rnext.vmm_state.time_updated >= rprev.vmm_state.time_updated); + assert_eq!(rprev.vmm_state.state, InstanceState::Stopping); + assert_eq!(rnext.vmm_state.state, InstanceState::Destroyed); + assert!(rnext.instance_state.gen > rprev.instance_state.gen); logctx.cleanup_successful(); } @@ -611,9 +634,9 @@ mod test { let (mut instance, _rx) = make_instance(&logctx); let r1 = instance.object.current(); - info!(logctx.log, "new instance"; "run_state" => ?r1.run_state); - assert_eq!(r1.run_state, InstanceState::Creating); - assert_eq!(r1.gen, Generation::new()); + info!(logctx.log, "new instance"; "state" => ?r1); + assert_eq!(r1.vmm_state.state, InstanceState::Starting); + assert_eq!(r1.vmm_state.gen, Generation::new()); assert!(instance .transition(InstanceStateRequested::Running) .unwrap() @@ -626,7 +649,7 @@ mod test { std::thread::sleep(std::time::Duration::from_millis(100)); } - assert!(rnext.gen > rprev.gen); + assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); // Now reboot the instance. This is dispatched to Propolis, which will // move to the Rebooting state and then back to Running. @@ -635,9 +658,9 @@ mod test { .unwrap() .is_none()); let (rprev, rnext) = (rnext, instance.object.current()); - assert!(rnext.gen > rprev.gen); - assert!(rnext.time_updated > rprev.time_updated); - assert_eq!(rnext.run_state, InstanceState::Rebooting); + assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); + assert!(rnext.vmm_state.time_updated > rprev.vmm_state.time_updated); + assert_eq!(rnext.vmm_state.state, InstanceState::Rebooting); instance.transition_finish(); let (rprev, rnext) = (rnext, instance.object.current()); @@ -646,9 +669,9 @@ mod test { std::thread::sleep(std::time::Duration::from_millis(100)); } - assert!(rnext.gen > rprev.gen); - assert!(rnext.time_updated > rprev.time_updated); - assert_eq!(rnext.run_state, InstanceState::Rebooting); + assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); + assert!(rnext.vmm_state.time_updated > rprev.vmm_state.time_updated); + assert_eq!(rnext.vmm_state.state, InstanceState::Rebooting); assert!(instance.object.desired().is_some()); instance.transition_finish(); let (rprev, rnext) = (rnext, instance.object.current()); @@ -658,9 +681,9 @@ mod test { std::thread::sleep(std::time::Duration::from_millis(100)); } - assert!(rnext.gen > rprev.gen); - assert!(rnext.time_updated > rprev.time_updated); - assert_eq!(rnext.run_state, InstanceState::Running); + assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); + assert!(rnext.vmm_state.time_updated > rprev.vmm_state.time_updated); + assert_eq!(rnext.vmm_state.state, InstanceState::Running); logctx.cleanup_successful(); } diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index bda34dec3f..08f6c7d10b 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -20,7 +20,7 @@ use dropshot::TypedBody; use illumos_utils::opte::params::DeleteVirtualNetworkInterfaceHost; use illumos_utils::opte::params::SetVirtualNetworkInterfaceHost; use omicron_common::api::internal::nexus::DiskRuntimeState; -use omicron_common::api::internal::nexus::InstanceRuntimeState; +use omicron_common::api::internal::nexus::SledInstanceState; use omicron_common::api::internal::nexus::UpdateArtifactId; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -71,12 +71,19 @@ async fn instance_register( rqctx: RequestContext>, path_params: Path, body: TypedBody, -) -> Result, HttpError> { +) -> Result, HttpError> { let sa = rqctx.context(); let instance_id = path_params.into_inner().instance_id; let body_args = body.into_inner(); Ok(HttpResponseOk( - sa.instance_register(instance_id, body_args.initial).await?, + sa.instance_register( + instance_id, + body_args.propolis_id, + body_args.hardware, + body_args.instance_runtime, + body_args.vmm_runtime, + ) + .await?, )) } @@ -118,7 +125,7 @@ async fn instance_put_migration_ids( rqctx: RequestContext>, path_params: Path, body: TypedBody, -) -> Result, HttpError> { +) -> Result, HttpError> { let sa = rqctx.context(); let instance_id = path_params.into_inner().instance_id; let body_args = body.into_inner(); diff --git a/sled-agent/src/sim/instance.rs b/sled-agent/src/sim/instance.rs index 7283148563..397a1980a5 100644 --- a/sled-agent/src/sim/instance.rs +++ b/sled-agent/src/sim/instance.rs @@ -6,16 +6,19 @@ use super::simulatable::Simulatable; -use crate::common::instance::{ObservedPropolisState, PublishedInstanceState}; +use crate::common::instance::{ObservedPropolisState, PublishedVmmState}; use crate::nexus::NexusClient; use crate::params::{InstanceMigrationSourceParams, InstanceStateRequested}; use async_trait::async_trait; +use chrono::Utc; use nexus_client; use omicron_common::api::external::Error; use omicron_common::api::external::Generation; use omicron_common::api::external::InstanceState as ApiInstanceState; use omicron_common::api::external::ResourceType; -use omicron_common::api::internal::nexus::InstanceRuntimeState; +use omicron_common::api::internal::nexus::{ + InstanceRuntimeState, SledInstanceState, +}; use propolis_client::api::InstanceMigrateStatusResponse as PropolisMigrateStatus; use propolis_client::api::InstanceState as PropolisInstanceState; use propolis_client::api::InstanceStateMonitorResponse; @@ -96,54 +99,60 @@ impl SimInstanceInner { target: &InstanceStateRequested, ) -> Result, Error> { match target { + // When Nexus intends to migrate into a VMM, it should create that + // VMM in the Migrating state and shouldn't request anything else + // from it before asking to migrate in. InstanceStateRequested::MigrationTarget(_) => { - match self.next_resting_state() { - ApiInstanceState::Creating => { - self.queue_propolis_state( - PropolisInstanceState::Migrating, - ); - - let migration_id = - self.state.current().migration_id.expect( - "should have migration ID set before getting \ - request to migrate in", - ); - self.queue_migration_status(PropolisMigrateStatus { - migration_id, - state: propolis_client::api::MigrationState::Sync, - }); - self.queue_migration_status(PropolisMigrateStatus { - migration_id, - state: propolis_client::api::MigrationState::Finish, - }); - self.queue_propolis_state( - PropolisInstanceState::Running, - ); - } - _ => { - return Err(Error::invalid_request(&format!( - "can't request migration in with pending resting \ - state {}", - self.next_resting_state() - ))) - } + if !self.queue.is_empty() { + return Err(Error::invalid_request(&format!( + "can't request migration in with a non-empty state + transition queue (current state: {:?})", + self + ))); + } + if self.state.vmm().state != ApiInstanceState::Migrating { + return Err(Error::invalid_request(&format!( + "can't request migration in for a vmm that wasn't \ + created in the migrating state (current state: {:?})", + self + ))); } + + // Propolis transitions to the Migrating state once before + // actually starting migration. + self.queue_propolis_state(PropolisInstanceState::Migrating); + let migration_id = + self.state.instance().migration_id.unwrap_or_else(|| { + panic!( + "should have migration ID set before getting request to + migrate in (current state: {:?})", + self + ) + }); + self.queue_migration_status(PropolisMigrateStatus { + migration_id, + state: propolis_client::api::MigrationState::Sync, + }); + self.queue_migration_status(PropolisMigrateStatus { + migration_id, + state: propolis_client::api::MigrationState::Finish, + }); + self.queue_propolis_state(PropolisInstanceState::Running); } InstanceStateRequested::Running => { match self.next_resting_state() { - ApiInstanceState::Creating => { - // The non-simulated sled agent explicitly and - // synchronously publishes the "Starting" state when - // cold-booting a new VM (so that the VM appears to be - // starting while its Propolis process is being - // launched). - self.state.transition(PublishedInstanceState::Starting); + // It's only valid to request the Running state after + // successfully registering a VMM, and a registered VMM + // should never be in the Creating state. + ApiInstanceState::Creating => unreachable!( + "VMMs should never try to reach the Creating state" + ), + ApiInstanceState::Starting => { self.queue_propolis_state( PropolisInstanceState::Running, ); } - ApiInstanceState::Starting - | ApiInstanceState::Running + ApiInstanceState::Running | ApiInstanceState::Rebooting | ApiInstanceState::Migrating => {} @@ -157,19 +166,26 @@ impl SimInstanceInner { | ApiInstanceState::Destroyed => { return Err(Error::invalid_request(&format!( "can't request state Running with pending resting \ - state {}", - self.next_resting_state() + state {} (current state: {:?})", + self.next_resting_state(), + self ))) } } } InstanceStateRequested::Stopped => { match self.next_resting_state() { - ApiInstanceState::Creating => { - self.state.transition(PublishedInstanceState::Stopped); + ApiInstanceState::Creating => unreachable!( + "VMMs should never try to reach the Creating state" + ), + ApiInstanceState::Starting => { + self.state.terminate_rudely(); } ApiInstanceState::Running => { - self.state.transition(PublishedInstanceState::Stopping); + self.state.transition_vmm( + PublishedVmmState::Stopping, + Utc::now(), + ); self.queue_propolis_state( PropolisInstanceState::Stopping, ); @@ -188,8 +204,9 @@ impl SimInstanceInner { _ => { return Err(Error::invalid_request(&format!( "can't request state Stopped with pending resting \ - state {}", - self.next_resting_state() + state {} (current state: {:?})", + self.next_resting_state(), + self ))) } } @@ -198,12 +215,13 @@ impl SimInstanceInner { ApiInstanceState::Running => { // Further requests to reboot are ignored if the instance // is currently rebooting or about to reboot. - if self.state.current().run_state - != ApiInstanceState::Rebooting + if self.state.vmm().state != ApiInstanceState::Rebooting && !self.reboot_pending() { - self.state - .transition(PublishedInstanceState::Rebooting); + self.state.transition_vmm( + PublishedVmmState::Rebooting, + Utc::now(), + ); self.queue_propolis_state( PropolisInstanceState::Rebooting, ); @@ -214,8 +232,10 @@ impl SimInstanceInner { } _ => { return Err(Error::invalid_request(&format!( - "can't request Reboot with pending resting state {}", - self.next_resting_state() + "can't request Reboot with pending resting state {} \ + (current state: {:?})", + self.next_resting_state(), + self ))) } }, @@ -240,7 +260,7 @@ impl SimInstanceInner { } self.state.apply_propolis_observation(&ObservedPropolisState::new( - &self.current(), + &self.state.instance(), &self.last_response, )) } else { @@ -248,11 +268,6 @@ impl SimInstanceInner { } } - /// Yields the current simulated instance runtime state. - fn current(&self) -> InstanceRuntimeState { - self.state.current().clone() - } - /// If the state change queue contains at least once instance state change, /// returns the requested instance state associated with the last instance /// state on the queue. Returns None otherwise. @@ -291,15 +306,26 @@ impl SimInstanceInner { /// queue is drained. fn next_resting_state(&self) -> ApiInstanceState { if self.queue.is_empty() { - self.state.current().run_state + self.state.vmm().state } else { if let Some(last_state) = self.last_queued_instance_state() { - crate::common::instance::PublishedInstanceState::from( - last_state, - ) - .into() + use ApiInstanceState as ApiState; + use PropolisInstanceState as PropolisState; + match last_state { + PropolisState::Creating | PropolisState::Starting => { + ApiState::Starting + } + PropolisState::Running => ApiState::Running, + PropolisState::Stopping => ApiState::Stopping, + PropolisState::Stopped => ApiState::Stopped, + PropolisState::Rebooting => ApiState::Rebooting, + PropolisState::Migrating => ApiState::Migrating, + PropolisState::Repairing => ApiState::Repairing, + PropolisState::Failed => ApiState::Failed, + PropolisState::Destroyed => ApiState::Destroyed, + } } else { - self.state.current().run_state + self.state.vmm().state } } } @@ -317,10 +343,11 @@ impl SimInstanceInner { /// Simulates rude termination by moving the instance to the Destroyed state /// immediately and clearing the queue of pending state transitions. - fn terminate(&mut self) -> InstanceRuntimeState { - self.state.transition(PublishedInstanceState::Stopped); + fn terminate(&mut self) -> SledInstanceState { + self.state.terminate_rudely(); self.queue.clear(); - self.state.current().clone() + self.destroyed = true; + self.state.sled_instance_state() } /// Stores a set of migration IDs in the instance's runtime state. @@ -328,23 +355,23 @@ impl SimInstanceInner { &mut self, old_runtime: &InstanceRuntimeState, ids: &Option, - ) -> Result { + ) -> Result { if self.state.migration_ids_already_set(old_runtime, ids) { - return Ok(self.state.current().clone()); + return Ok(self.state.sled_instance_state()); } - if self.state.current().propolis_gen != old_runtime.propolis_gen { + if self.state.instance().gen != old_runtime.gen { return Err(Error::InvalidRequest { message: format!( "wrong Propolis ID generation: expected {}, got {}", - self.state.current().propolis_gen, - old_runtime.propolis_gen + self.state.instance().gen, + old_runtime.gen ), }); } - self.state.set_migration_ids(ids); - Ok(self.state.current().clone()) + self.state.set_migration_ids(ids, Utc::now()); + Ok(self.state.sled_instance_state()) } } @@ -369,7 +396,7 @@ pub struct SimInstance { } impl SimInstance { - pub fn terminate(&self) -> InstanceRuntimeState { + pub fn terminate(&self) -> SledInstanceState { self.inner.lock().unwrap().terminate() } @@ -377,7 +404,7 @@ impl SimInstance { &self, old_runtime: &InstanceRuntimeState, ids: &Option, - ) -> Result { + ) -> Result { let mut inner = self.inner.lock().unwrap(); inner.put_migration_ids(old_runtime, ids) } @@ -385,18 +412,30 @@ impl SimInstance { #[async_trait] impl Simulatable for SimInstance { - type CurrentState = InstanceRuntimeState; + type CurrentState = SledInstanceState; type RequestedState = InstanceStateRequested; type ProducerArgs = (); type Action = InstanceAction; - fn new(current: InstanceRuntimeState) -> Self { + fn new(current: SledInstanceState) -> Self { + assert!(matches!( + current.vmm_state.state, + ApiInstanceState::Starting | ApiInstanceState::Migrating), + "new VMMs should always be registered in the Starting or Migrating \ + state (supplied state: {:?})", + current.vmm_state.state + ); + SimInstance { inner: Arc::new(Mutex::new(SimInstanceInner { - state: InstanceStates::new(current), + state: InstanceStates::new( + current.instance_state, + current.vmm_state, + current.propolis_id, + ), last_response: InstanceStateMonitorResponse { gen: 1, - state: PropolisInstanceState::Creating, + state: PropolisInstanceState::Starting, migration: None, }, queue: VecDeque::new(), @@ -425,11 +464,11 @@ impl Simulatable for SimInstance { } fn generation(&self) -> Generation { - self.inner.lock().unwrap().current().gen + self.inner.lock().unwrap().state.vmm().gen } fn current(&self) -> Self::CurrentState { - self.inner.lock().unwrap().current() + self.inner.lock().unwrap().state.sled_instance_state() } fn desired(&self) -> Option { @@ -448,7 +487,7 @@ impl Simulatable for SimInstance { nexus_client .cpapi_instances_put( id, - &nexus_client::types::InstanceRuntimeState::from(current), + &nexus_client::types::SledInstanceState::from(current), ) .await .map(|_| ()) diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 42fff355a5..e4dac2f4b9 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -21,8 +21,12 @@ use crate::sim::simulatable::Simulatable; use crate::updates::UpdateManager; use futures::lock::Mutex; use omicron_common::api::external::{DiskState, Error, ResourceType}; -use omicron_common::api::internal::nexus::DiskRuntimeState; -use omicron_common::api::internal::nexus::InstanceRuntimeState; +use omicron_common::api::internal::nexus::{ + DiskRuntimeState, SledInstanceState, +}; +use omicron_common::api::internal::nexus::{ + InstanceRuntimeState, VmmRuntimeState, +}; use slog::Logger; use std::net::{IpAddr, Ipv6Addr, SocketAddr}; use std::sync::Arc; @@ -219,18 +223,21 @@ impl SledAgent { pub async fn instance_register( self: &Arc, instance_id: Uuid, - mut initial_hardware: InstanceHardware, - ) -> Result { + propolis_id: Uuid, + hardware: InstanceHardware, + instance_runtime: InstanceRuntimeState, + vmm_runtime: VmmRuntimeState, + ) -> Result { // respond with a fake 500 level failure if asked to ensure an instance // with more than 16 CPUs. - let ncpus: i64 = (&initial_hardware.runtime.ncpus).into(); + let ncpus: i64 = (&hardware.properties.ncpus).into(); if ncpus > 16 { return Err(Error::internal_error( &"could not allocate an instance: ran out of CPUs!", )); }; - for disk in &initial_hardware.disks { + for disk in &hardware.disks { let initial_state = DiskRuntimeState { disk_state: DiskState::Attached(instance_id), gen: omicron_common::api::external::Generation::new(), @@ -255,27 +262,24 @@ impl SledAgent { .await?; } - // if we're making our first instance and a mock propolis-server - // is running, interact with it, and patch the instance's - // reported propolis-server IP for reports back to nexus. + // If the user of this simulated agent previously requested a mock + // Propolis server, start that server. + // + // N.B. The server serves on localhost and not on the per-sled IPv6 + // address that Nexus chose when starting the instance. Tests that + // use the mock are expected to correct the contents of CRDB to + // point to the correct address. let mock_lock = self.mock_propolis.lock().await; if let Some((_srv, client)) = mock_lock.as_ref() { - if let Some(addr) = initial_hardware.runtime.propolis_addr.as_mut() - { - addr.set_ip(Ipv6Addr::LOCALHOST.into()); - } if !self.instances.contains_key(&instance_id).await { let properties = propolis_client::types::InstanceProperties { - id: initial_hardware.runtime.propolis_id, - name: initial_hardware.runtime.hostname.clone(), + id: propolis_id, + name: hardware.properties.hostname.clone(), description: "sled-agent-sim created instance".to_string(), image_id: Uuid::default(), bootrom_id: Uuid::default(), - memory: initial_hardware - .runtime - .memory - .to_whole_mebibytes(), - vcpus: initial_hardware.runtime.ncpus.0 as u8, + memory: hardware.properties.memory.to_whole_mebibytes(), + vcpus: hardware.properties.ncpus.0 as u8, }; let body = propolis_client::types::InstanceEnsureRequest { properties, @@ -298,10 +302,18 @@ impl SledAgent { let instance_run_time_state = self .instances - .sim_ensure(&instance_id, initial_hardware.runtime, None) + .sim_ensure( + &instance_id, + SledInstanceState { + instance_state: instance_runtime, + vmm_state: vmm_runtime, + propolis_id, + }, + None, + ) .await?; - for disk_request in &initial_hardware.disks { + for disk_request in &hardware.disks { let vcr = &disk_request.volume_construction_request; self.map_disk_ids_to_region_ids(&vcr).await?; } @@ -328,9 +340,20 @@ impl SledAgent { }; self.detach_disks_from_instance(instance_id).await?; - Ok(InstanceUnregisterResponse { + let response = InstanceUnregisterResponse { updated_runtime: Some(instance.terminate()), - }) + }; + + // Poke the now-destroyed instance to force it to be removed from the + // collection. + // + // TODO: In the real sled agent, this happens inline without publishing + // any other state changes, whereas this call causes any pending state + // changes to be published. This can be fixed by adding a simulated + // object collection function to forcibly remove an object from a + // collection. + self.instances.sim_poke(instance_id, PokeMode::Drain).await; + Ok(response) } /// Asks the supplied instance to transition to the requested state. @@ -418,7 +441,7 @@ impl SledAgent { instance_id: Uuid, old_runtime: &InstanceRuntimeState, migration_ids: &Option, - ) -> Result { + ) -> Result { let instance = self.instances.sim_get_cloned_object(&instance_id).await?; diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 5574edca55..b6f910220e 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -35,6 +35,9 @@ use omicron_common::address::{ get_sled_address, get_switch_zone_address, Ipv6Subnet, SLED_PREFIX, }; use omicron_common::api::external::Vni; +use omicron_common::api::internal::nexus::{ + SledInstanceState, VmmRuntimeState, +}; use omicron_common::api::internal::shared::RackNetworkConfig; use omicron_common::api::{ internal::nexus::DiskRuntimeState, internal::nexus::InstanceRuntimeState, @@ -48,7 +51,7 @@ use sled_hardware::underlay; use sled_hardware::HardwareManager; use slog::Logger; use std::collections::BTreeMap; -use std::net::{Ipv6Addr, SocketAddrV6}; +use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::sync::Arc; use uuid::Uuid; @@ -770,11 +773,22 @@ impl SledAgent { pub async fn instance_ensure_registered( &self, instance_id: Uuid, - initial: InstanceHardware, - ) -> Result { + propolis_id: Uuid, + hardware: InstanceHardware, + instance_runtime: InstanceRuntimeState, + vmm_runtime: VmmRuntimeState, + propolis_addr: SocketAddr, + ) -> Result { self.inner .instances - .ensure_registered(instance_id, initial) + .ensure_registered( + instance_id, + propolis_id, + hardware, + instance_runtime, + vmm_runtime, + propolis_addr, + ) .await .map_err(|e| Error::Instance(e)) } @@ -818,7 +832,7 @@ impl SledAgent { instance_id: Uuid, old_runtime: &InstanceRuntimeState, migration_ids: &Option, - ) -> Result { + ) -> Result { self.inner .instances .put_migration_ids(instance_id, old_runtime, migration_ids) From 7e88bdffbae7a8796a4ebc6a5bfe80d1fedd4bb7 Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Thu, 12 Oct 2023 11:06:18 -0700 Subject: [PATCH 11/13] Move instance/VMM table schema upgrade to version 7.0.0 (#4270) The instance/VMM table schema change was slated to be 6.0.0 in the original version of its pull request. That version was then added by a separate PR, but this didn't cause a merge conflict because the instance/VMM upgrade used an extra "0" in its schema upgrade files (for fear that there might be more than nine of them and that the leading 0 would be necessary to ensure they had the correct lexographical ordering). The schema changes don't conflict with each other, so everything (probably) works fine, but having two logically separate updates in one version is at the very least aesthetically displeasing. Move the instance schema upgrade to version 7.0.0. Rename the files to remove the leading 0 in their numbers, since that turned out not to be needed. Tested via cargo tests (there are no other functional or schema changes beyond renaming and updating version constants). --- dev-tools/omdb/tests/env.out | 6 +++--- dev-tools/omdb/tests/successes.out | 12 ++++++------ nexus/db-model/src/schema.rs | 2 +- schema/crdb/{6.0.0 => 7.0.0}/README.adoc | 0 schema/crdb/{6.0.0/up01.sql => 7.0.0/up1.sql} | 0 schema/crdb/{6.0.0/up02.sql => 7.0.0/up2.sql} | 0 schema/crdb/{6.0.0/up03.sql => 7.0.0/up3.sql} | 0 schema/crdb/{6.0.0/up04.sql => 7.0.0/up4.sql} | 0 schema/crdb/{6.0.0/up05.sql => 7.0.0/up5.sql} | 0 schema/crdb/{6.0.0/up06.sql => 7.0.0/up6.sql} | 0 schema/crdb/{6.0.0/up07.sql => 7.0.0/up7.sql} | 0 schema/crdb/{6.0.0/up08.sql => 7.0.0/up8.sql} | 0 schema/crdb/{6.0.0/up09.sql => 7.0.0/up9.sql} | 0 schema/crdb/dbinit.sql | 2 +- 14 files changed, 11 insertions(+), 11 deletions(-) rename schema/crdb/{6.0.0 => 7.0.0}/README.adoc (100%) rename schema/crdb/{6.0.0/up01.sql => 7.0.0/up1.sql} (100%) rename schema/crdb/{6.0.0/up02.sql => 7.0.0/up2.sql} (100%) rename schema/crdb/{6.0.0/up03.sql => 7.0.0/up3.sql} (100%) rename schema/crdb/{6.0.0/up04.sql => 7.0.0/up4.sql} (100%) rename schema/crdb/{6.0.0/up05.sql => 7.0.0/up5.sql} (100%) rename schema/crdb/{6.0.0/up06.sql => 7.0.0/up6.sql} (100%) rename schema/crdb/{6.0.0/up07.sql => 7.0.0/up7.sql} (100%) rename schema/crdb/{6.0.0/up08.sql => 7.0.0/up8.sql} (100%) rename schema/crdb/{6.0.0/up09.sql => 7.0.0/up9.sql} (100%) diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 07a6d3fae5..8e345b78d1 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -7,7 +7,7 @@ sim-b6d65341 [::1]:REDACTED_PORT - REDACTED_UUID_REDACTED_UUID_REDACTED --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (6.0.0) +note: database schema version matches expected (7.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "--db-url", "junk", "sleds"] termination: Exited(2) @@ -172,7 +172,7 @@ stderr: note: database URL not specified. Will search DNS. note: (override with --db-url or OMDB_DB_URL) note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (6.0.0) +note: database schema version matches expected (7.0.0) ============================================= EXECUTING COMMAND: omdb ["--dns-server", "[::1]:REDACTED_PORT", "db", "sleds"] termination: Exited(0) @@ -185,5 +185,5 @@ stderr: note: database URL not specified. Will search DNS. note: (override with --db-url or OMDB_DB_URL) note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (6.0.0) +note: database schema version matches expected (7.0.0) ============================================= diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 038f365e8e..6fd84c5eb3 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -8,7 +8,7 @@ external oxide-dev.test 2 create silo: "tes --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (6.0.0) +note: database schema version matches expected (7.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "dns", "diff", "external", "2"] termination: Exited(0) @@ -24,7 +24,7 @@ changes: names added: 1, names removed: 0 --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (6.0.0) +note: database schema version matches expected (7.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "dns", "names", "external", "2"] termination: Exited(0) @@ -36,7 +36,7 @@ External zone: oxide-dev.test --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (6.0.0) +note: database schema version matches expected (7.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "services", "list-instances"] termination: Exited(0) @@ -52,7 +52,7 @@ Nexus REDACTED_UUID_REDACTED_UUID_REDACTED [::ffff:127.0.0.1]:REDACTED_ --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (6.0.0) +note: database schema version matches expected (7.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "services", "list-by-sled"] termination: Exited(0) @@ -71,7 +71,7 @@ sled: sim-b6d65341 (id REDACTED_UUID_REDACTED_UUID_REDACTED) --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (6.0.0) +note: database schema version matches expected (7.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "sleds"] termination: Exited(0) @@ -82,7 +82,7 @@ sim-b6d65341 [::1]:REDACTED_PORT - REDACTED_UUID_REDACTED_UUID_REDACTED --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (6.0.0) +note: database schema version matches expected (7.0.0) ============================================= EXECUTING COMMAND: omdb ["mgs", "inventory"] termination: Exited(0) diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 2d6970452d..61a05754c6 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1142,7 +1142,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(6, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(7, 0, 0); allow_tables_to_appear_in_same_query!( system_update, diff --git a/schema/crdb/6.0.0/README.adoc b/schema/crdb/7.0.0/README.adoc similarity index 100% rename from schema/crdb/6.0.0/README.adoc rename to schema/crdb/7.0.0/README.adoc diff --git a/schema/crdb/6.0.0/up01.sql b/schema/crdb/7.0.0/up1.sql similarity index 100% rename from schema/crdb/6.0.0/up01.sql rename to schema/crdb/7.0.0/up1.sql diff --git a/schema/crdb/6.0.0/up02.sql b/schema/crdb/7.0.0/up2.sql similarity index 100% rename from schema/crdb/6.0.0/up02.sql rename to schema/crdb/7.0.0/up2.sql diff --git a/schema/crdb/6.0.0/up03.sql b/schema/crdb/7.0.0/up3.sql similarity index 100% rename from schema/crdb/6.0.0/up03.sql rename to schema/crdb/7.0.0/up3.sql diff --git a/schema/crdb/6.0.0/up04.sql b/schema/crdb/7.0.0/up4.sql similarity index 100% rename from schema/crdb/6.0.0/up04.sql rename to schema/crdb/7.0.0/up4.sql diff --git a/schema/crdb/6.0.0/up05.sql b/schema/crdb/7.0.0/up5.sql similarity index 100% rename from schema/crdb/6.0.0/up05.sql rename to schema/crdb/7.0.0/up5.sql diff --git a/schema/crdb/6.0.0/up06.sql b/schema/crdb/7.0.0/up6.sql similarity index 100% rename from schema/crdb/6.0.0/up06.sql rename to schema/crdb/7.0.0/up6.sql diff --git a/schema/crdb/6.0.0/up07.sql b/schema/crdb/7.0.0/up7.sql similarity index 100% rename from schema/crdb/6.0.0/up07.sql rename to schema/crdb/7.0.0/up7.sql diff --git a/schema/crdb/6.0.0/up08.sql b/schema/crdb/7.0.0/up8.sql similarity index 100% rename from schema/crdb/6.0.0/up08.sql rename to schema/crdb/7.0.0/up8.sql diff --git a/schema/crdb/6.0.0/up09.sql b/schema/crdb/7.0.0/up9.sql similarity index 100% rename from schema/crdb/6.0.0/up09.sql rename to schema/crdb/7.0.0/up9.sql diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 2b06e4cbd6..9f5f78326c 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2539,7 +2539,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '6.0.0', NULL) + ( TRUE, NOW(), NOW(), '7.0.0', NULL) ON CONFLICT DO NOTHING; From 7d5538267e45737d951df440879d96cea533592f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 12 Oct 2023 14:54:57 -0700 Subject: [PATCH 12/13] [oximeter] Use stable hash, stable format, for deriving timeseries_key (#4251) - Uses [bcs](https://crates.io/crates/bcs) as a stable binary format for inputs to the `timeseries_key` function - Uses [highway](https://crates.io/crates/highway) as a stable hash algorithm (it's keyed, fast, portable, and well-distributed). - Additionally, adds an EXPECTORATE test validating the stability of timeseries_key values. Fixes https://github.com/oxidecomputer/omicron/issues/4008 , and also addresses the issue raised in https://github.com/oxidecomputer/omicron/issues/4221 regarding stable input NOTE: This PR itself *also* breaks the stability of the `timeseries_key` (hopefully for the last time), and will rely on https://github.com/oxidecomputer/omicron/pull/4246 to wipe the metrics DB for the next release. --- Cargo.lock | 21 +++ Cargo.toml | 1 + oximeter/db/Cargo.toml | 4 + oximeter/db/src/lib.rs | 130 ++++++++++++++++-- .../db/test-output/field-timeseries-keys.txt | 12 ++ .../db/test-output/sample-timeseries-key.txt | 1 + oximeter/oximeter/Cargo.toml | 1 + oximeter/oximeter/src/types.rs | 20 ++- 8 files changed, 180 insertions(+), 10 deletions(-) create mode 100644 oximeter/db/test-output/field-timeseries-keys.txt create mode 100644 oximeter/db/test-output/sample-timeseries-key.txt diff --git a/Cargo.lock b/Cargo.lock index d5a90f7f85..ce17dbe311 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -493,6 +493,16 @@ dependencies = [ "sha2", ] +[[package]] +name = "bcs" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bd3ffe8b19a604421a5d461d4a70346223e535903fbc3067138bddbebddcf77" +dependencies = [ + "serde", + "thiserror", +] + [[package]] name = "bhyve_api" version = "0.0.0" @@ -3098,6 +3108,12 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46" +[[package]] +name = "highway" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ba82c000837f4e74df01a5520f0dc48735d4aed955a99eae4428bab7cf3acd" + [[package]] name = "hkdf" version = "0.12.3" @@ -5732,6 +5748,7 @@ dependencies = [ "rstest", "schemars", "serde", + "strum", "thiserror", "trybuild", "uuid", @@ -5810,10 +5827,13 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", + "bcs", "bytes", "chrono", "clap 4.4.3", "dropshot", + "expectorate", + "highway", "itertools 0.11.0", "omicron-test-utils", "omicron-workspace-hack", @@ -5827,6 +5847,7 @@ dependencies = [ "slog-async", "slog-dtrace", "slog-term", + "strum", "thiserror", "tokio", "usdt", diff --git a/Cargo.toml b/Cargo.toml index 7521bb4d45..5c10a94706 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -199,6 +199,7 @@ headers = "0.3.9" heck = "0.4" hex = "0.4.3" hex-literal = "0.4.1" +highway = "1.1.0" hkdf = "0.12.3" http = "0.2.9" httptest = "0.15.4" diff --git a/oximeter/db/Cargo.toml b/oximeter/db/Cargo.toml index ad6d584b1b..d37c57ccce 100644 --- a/oximeter/db/Cargo.toml +++ b/oximeter/db/Cargo.toml @@ -8,10 +8,12 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true async-trait.workspace = true +bcs.workspace = true bytes = { workspace = true, features = [ "serde" ] } chrono.workspace = true clap.workspace = true dropshot.workspace = true +highway.workspace = true oximeter.workspace = true regex.workspace = true reqwest = { workspace = true, features = [ "json" ] } @@ -28,9 +30,11 @@ uuid.workspace = true omicron-workspace-hack.workspace = true [dev-dependencies] +expectorate.workspace = true itertools.workspace = true omicron-test-utils.workspace = true slog-dtrace.workspace = true +strum.workspace = true [[bin]] name = "oxdb" diff --git a/oximeter/db/src/lib.rs b/oximeter/db/src/lib.rs index 9720d6914d..c878b8ff2a 100644 --- a/oximeter/db/src/lib.rs +++ b/oximeter/db/src/lib.rs @@ -328,19 +328,46 @@ pub struct TimeseriesPageSelector { pub(crate) type TimeseriesKey = u64; pub(crate) fn timeseries_key(sample: &Sample) -> TimeseriesKey { - timeseries_key_for(sample.target_fields(), sample.metric_fields()) + timeseries_key_for( + &sample.timeseries_name, + sample.sorted_target_fields(), + sample.sorted_metric_fields(), + sample.measurement.datum_type(), + ) } -pub(crate) fn timeseries_key_for<'a>( - target_fields: impl Iterator, - metric_fields: impl Iterator, +// It's critical that values used for derivation of the timeseries_key are stable. +// We use "bcs" to ensure stability of the derivation across hardware and rust toolchain revisions. +fn canonicalize(what: &str, value: &T) -> Vec { + bcs::to_bytes(value) + .unwrap_or_else(|_| panic!("Failed to serialize {what}")) +} + +fn timeseries_key_for( + timeseries_name: &str, + target_fields: &BTreeMap, + metric_fields: &BTreeMap, + datum_type: DatumType, ) -> TimeseriesKey { - use std::collections::hash_map::DefaultHasher; + // We use HighwayHasher primarily for stability - it should provide a stable + // hash for the values used to derive the timeseries_key. + use highway::HighwayHasher; use std::hash::{Hash, Hasher}; - let mut hasher = DefaultHasher::new(); - for field in target_fields.chain(metric_fields) { - field.hash(&mut hasher); + + // NOTE: The order of these ".hash" calls matters, changing them will change + // the derivation of the "timeseries_key". We have change-detector tests for + // modifications like this, but be cautious, making such a change will + // impact all currently-provisioned databases. + let mut hasher = HighwayHasher::default(); + canonicalize("timeseries name", timeseries_name).hash(&mut hasher); + for field in target_fields.values() { + canonicalize("target field", &field).hash(&mut hasher); } + for field in metric_fields.values() { + canonicalize("metric field", &field).hash(&mut hasher); + } + canonicalize("datum type", &datum_type).hash(&mut hasher); + hasher.finish() } @@ -370,8 +397,9 @@ const TIMESERIES_NAME_REGEX: &str = #[cfg(test)] mod tests { - use super::TimeseriesName; + use super::*; use std::convert::TryFrom; + use uuid::Uuid; #[test] fn test_timeseries_name() { @@ -393,4 +421,88 @@ mod tests { assert!(TimeseriesName::try_from("a:").is_err()); assert!(TimeseriesName::try_from("123").is_err()); } + + // Validates that the timeseries_key stability for a sample is stable. + #[test] + fn test_timeseries_key_sample_stability() { + #[derive(oximeter::Target)] + pub struct TestTarget { + pub name: String, + pub num: i64, + } + + #[derive(oximeter::Metric)] + pub struct TestMetric { + pub id: Uuid, + pub datum: i64, + } + + let target = TestTarget { name: String::from("Hello"), num: 1337 }; + let metric = TestMetric { id: Uuid::nil(), datum: 0x1de }; + let sample = Sample::new(&target, &metric).unwrap(); + let key = super::timeseries_key(&sample); + + expectorate::assert_contents( + "test-output/sample-timeseries-key.txt", + &format!("{key}"), + ); + } + + // Validates that the timeseries_key stability for specific fields is + // stable. + #[test] + fn test_timeseries_key_field_stability() { + use oximeter::{Field, FieldValue}; + use strum::EnumCount; + + let values = [ + ("string", FieldValue::String(String::default())), + ("i8", FieldValue::I8(-0x0A)), + ("u8", FieldValue::U8(0x0A)), + ("i16", FieldValue::I16(-0x0ABC)), + ("u16", FieldValue::U16(0x0ABC)), + ("i32", FieldValue::I32(-0x0ABC_0000)), + ("u32", FieldValue::U32(0x0ABC_0000)), + ("i64", FieldValue::I64(-0x0ABC_0000_0000_0000)), + ("u64", FieldValue::U64(0x0ABC_0000_0000_0000)), + ( + "ipaddr", + FieldValue::IpAddr(std::net::IpAddr::V4( + std::net::Ipv4Addr::LOCALHOST, + )), + ), + ("uuid", FieldValue::Uuid(uuid::Uuid::nil())), + ("bool", FieldValue::Bool(true)), + ]; + + // Exhaustively testing enums is a bit tricky. Although it's easy to + // check "all variants of an enum are matched", it harder to test "all + // variants of an enum have been supplied". + // + // We use this as a proxy, confirming that each variant is represented + // here for the purposes of tracking stability. + assert_eq!(values.len(), FieldValue::COUNT); + + let mut output = vec![]; + for (name, value) in values { + let target_fields = BTreeMap::from([( + "field".to_string(), + Field { name: name.to_string(), value }, + )]); + let metric_fields = BTreeMap::new(); + let key = timeseries_key_for( + "timeseries name", + &target_fields, + &metric_fields, + // ... Not actually, but we are only trying to compare fields here. + DatumType::Bool, + ); + output.push(format!("{name} -> {key}")); + } + + expectorate::assert_contents( + "test-output/field-timeseries-keys.txt", + &output.join("\n"), + ); + } } diff --git a/oximeter/db/test-output/field-timeseries-keys.txt b/oximeter/db/test-output/field-timeseries-keys.txt new file mode 100644 index 0000000000..d82c143600 --- /dev/null +++ b/oximeter/db/test-output/field-timeseries-keys.txt @@ -0,0 +1,12 @@ +string -> 5554437373902071418 +i8 -> 8051527130089763326 +u8 -> 1403385090410880239 +i16 -> 4425083437960417917 +u16 -> 13883626507745758865 +i32 -> 14729289749324644435 +u32 -> 12103188004421096629 +i64 -> 961258395613152243 +u64 -> 15804125619400967189 +ipaddr -> 14737150884237616680 +uuid -> 16911606541498230091 +bool -> 10983724023695040909 \ No newline at end of file diff --git a/oximeter/db/test-output/sample-timeseries-key.txt b/oximeter/db/test-output/sample-timeseries-key.txt new file mode 100644 index 0000000000..aeb515c78e --- /dev/null +++ b/oximeter/db/test-output/sample-timeseries-key.txt @@ -0,0 +1 @@ +365003276793586811 \ No newline at end of file diff --git a/oximeter/oximeter/Cargo.toml b/oximeter/oximeter/Cargo.toml index 7d01b8f8be..8a69494d5a 100644 --- a/oximeter/oximeter/Cargo.toml +++ b/oximeter/oximeter/Cargo.toml @@ -13,6 +13,7 @@ omicron-common.workspace = true oximeter-macro-impl.workspace = true schemars = { workspace = true, features = [ "uuid1", "bytes", "chrono" ] } serde.workspace = true +strum.workspace = true thiserror.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/oximeter/oximeter/src/types.rs b/oximeter/oximeter/src/types.rs index aa61a426e3..d3f1b9e746 100644 --- a/oximeter/oximeter/src/types.rs +++ b/oximeter/oximeter/src/types.rs @@ -90,7 +90,15 @@ impl_field_type_from! { bool, FieldType::Bool } /// The `FieldValue` contains the value of a target or metric field. #[derive( - Clone, Debug, Hash, PartialEq, Eq, JsonSchema, Serialize, Deserialize, + Clone, + Debug, + Hash, + PartialEq, + Eq, + JsonSchema, + Serialize, + Deserialize, + strum::EnumCount, )] #[serde(tag = "type", content = "value", rename_all = "snake_case")] pub enum FieldValue { @@ -761,6 +769,11 @@ impl Sample { self.target.fields.values() } + /// Return the sorted fields of this sample's target. + pub fn sorted_target_fields(&self) -> &BTreeMap { + &self.target.fields + } + /// Return the name of this sample's metric. pub fn metric_name(&self) -> &str { &self.metric.name @@ -771,6 +784,11 @@ impl Sample { self.metric.fields.values() } + /// Return the sorted fields of this sample's metric + pub fn sorted_metric_fields(&self) -> &BTreeMap { + &self.metric.fields + } + // Check validity of field names for the target and metric. Currently this // just verifies there are no duplicate names between them. fn verify_field_names( From 2af5ec9fd63495a3d0aa90f7d4b1cd1b9eae17ff Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 12 Oct 2023 14:55:25 -0700 Subject: [PATCH 13/13] [oximeter] Add minimal versioning support (#4246) Provides a mechanism to explicitly version Oximeter schema used within Clickhouse, and to wipe that data if any schema-breaking changes are made. This is retroactively intended to solve the problems discussed within https://github.com/oxidecomputer/omicron/issues/4221 , where the `timeseries_key` generation was altered. --- oximeter/collector/src/lib.rs | 7 +- oximeter/db/src/client.rs | 265 +++++++++++++++++++++++- oximeter/db/src/db-replicated-init.sql | 8 + oximeter/db/src/db-single-node-init.sql | 8 + oximeter/db/src/model.rs | 8 + 5 files changed, 281 insertions(+), 15 deletions(-) diff --git a/oximeter/collector/src/lib.rs b/oximeter/collector/src/lib.rs index 6674d65ecd..b7a14cec45 100644 --- a/oximeter/collector/src/lib.rs +++ b/oximeter/collector/src/lib.rs @@ -35,6 +35,7 @@ use omicron_common::backoff; use omicron_common::FileKv; use oximeter::types::ProducerResults; use oximeter::types::ProducerResultsItem; +use oximeter_db::model::OXIMETER_VERSION; use oximeter_db::Client; use oximeter_db::DbWrite; use serde::Deserialize; @@ -455,11 +456,7 @@ impl OximeterAgent { }; let client = Client::new(db_address, &log); let replicated = client.is_oximeter_cluster().await?; - if !replicated { - client.init_single_node_db().await?; - } else { - client.init_replicated_db().await?; - } + client.initialize_db_with_version(replicated, OXIMETER_VERSION).await?; // Spawn the task for aggregating and inserting all metrics tokio::spawn(async move { diff --git a/oximeter/db/src/client.rs b/oximeter/db/src/client.rs index 8629e4b8ef..ffa5d97d52 100644 --- a/oximeter/db/src/client.rs +++ b/oximeter/db/src/client.rs @@ -25,7 +25,9 @@ use dropshot::WhichPage; use oximeter::types::Sample; use slog::debug; use slog::error; +use slog::info; use slog::trace; +use slog::warn; use slog::Logger; use std::collections::btree_map::Entry; use std::collections::BTreeMap; @@ -269,7 +271,103 @@ impl Client { .map_err(|e| Error::Database(e.to_string())) } - // Verifies if instance is part of oximeter_cluster + /// Validates that the schema used by the DB matches the version used by + /// the executable using it. + /// + /// This function will wipe metrics data if the version stored within + /// the DB is less than the schema version of Oximeter. + /// If the version in the DB is newer than what is known to Oximeter, an + /// error is returned. + /// + /// NOTE: This function is not safe for concurrent usage! + pub async fn initialize_db_with_version( + &self, + replicated: bool, + expected_version: u64, + ) -> Result<(), Error> { + info!(self.log, "reading db version"); + + // Read the version from the DB + let version = self.read_latest_version().await?; + info!(self.log, "read oximeter database version"; "version" => version); + + // Decide how to conform the on-disk version with this version of + // Oximeter. + if version < expected_version { + info!(self.log, "wiping and re-initializing oximeter schema"); + // If the on-storage version is less than the constant embedded into + // this binary, the DB is out-of-date. Drop it, and re-populate it + // later. + if !replicated { + self.wipe_single_node_db().await?; + self.init_single_node_db().await?; + } else { + self.wipe_replicated_db().await?; + self.init_replicated_db().await?; + } + } else if version > expected_version { + // If the on-storage version is greater than the constant embedded + // into this binary, we may have downgraded. + return Err(Error::Database( + format!( + "Expected version {expected_version}, saw {version}. Downgrading is not supported.", + ) + )); + } else { + // If the version matches, we don't need to update the DB + return Ok(()); + } + + info!(self.log, "inserting current version"; "version" => expected_version); + self.insert_version(expected_version).await?; + Ok(()) + } + + async fn read_latest_version(&self) -> Result { + let sql = format!( + "SELECT MAX(value) FROM {db_name}.version;", + db_name = crate::DATABASE_NAME, + ); + + let version = match self.execute_with_body(sql).await { + Ok(body) if body.is_empty() => { + warn!( + self.log, + "no version in database (treated as 'version 0')" + ); + 0 + } + Ok(body) => body.trim().parse::().map_err(|err| { + Error::Database(format!("Cannot read version: {err}")) + })?, + Err(Error::Database(err)) + // Case 1: The database has not been created. + if err.contains("Database oximeter doesn't exist") || + // Case 2: The database has been created, but it's old (exists + // prior to the version table). + err.contains("Table oximeter.version doesn't exist") => + { + warn!(self.log, "oximeter database does not exist, or is out-of-date"); + 0 + } + Err(err) => { + warn!(self.log, "failed to read version"; "error" => err.to_string()); + return Err(err); + } + }; + Ok(version) + } + + async fn insert_version(&self, version: u64) -> Result<(), Error> { + let sql = format!( + "INSERT INTO {db_name}.version (*) VALUES ({version}, now());", + db_name = crate::DATABASE_NAME, + ); + self.execute_with_body(sql).await?; + Ok(()) + } + + /// Verifies if instance is part of oximeter_cluster pub async fn is_oximeter_cluster(&self) -> Result { let sql = String::from("SHOW CLUSTERS FORMAT JSONEachRow;"); let res = self.execute_with_body(sql).await?; @@ -710,7 +808,8 @@ mod tests { // on the ubuntu CI job with "Failed to detect ClickHouse subprocess within timeout" #[ignore] async fn test_build_replicated() { - let log = slog::Logger::root(slog::Discard, o!()); + let logctx = test_setup_log("test_build_replicated"); + let log = &logctx.log; // Start all Keeper coordinator nodes let cur_dir = std::env::current_dir().unwrap(); @@ -819,11 +918,14 @@ mod tests { k3.cleanup().await.expect("Failed to cleanup ClickHouse keeper 3"); db_1.cleanup().await.expect("Failed to cleanup ClickHouse server 1"); db_2.cleanup().await.expect("Failed to cleanup ClickHouse server 2"); + + logctx.cleanup_successful(); } #[tokio::test] async fn test_client_insert() { - let log = slog::Logger::root(slog::Discard, o!()); + let logctx = test_setup_log("test_client_insert"); + let log = &logctx.log; // Let the OS assign a port and discover it after ClickHouse starts let mut db = ClickHouseInstance::new_single_node(0) @@ -845,6 +947,7 @@ mod tests { }; client.insert_samples(&samples).await.unwrap(); db.cleanup().await.expect("Failed to cleanup ClickHouse server"); + logctx.cleanup_successful(); } // This is a target with the same name as that in `lib.rs` used for other tests, but with a @@ -1307,7 +1410,8 @@ mod tests { #[tokio::test] async fn test_schema_mismatch() { - let log = slog::Logger::root(slog::Discard, o!()); + let logctx = test_setup_log("test_schema_mismatch"); + let log = &logctx.log; // Let the OS assign a port and discover it after ClickHouse starts let mut db = ClickHouseInstance::new_single_node(0) @@ -1337,11 +1441,141 @@ mod tests { let result = client.verify_sample_schema(&sample).await; assert!(matches!(result, Err(Error::SchemaMismatch { .. }))); db.cleanup().await.expect("Failed to cleanup ClickHouse server"); + logctx.cleanup_successful(); + } + + // Returns the number of timeseries schemas being used. + async fn get_schema_count(client: &Client) -> usize { + client + .execute_with_body( + "SELECT * FROM oximeter.timeseries_schema FORMAT JSONEachRow;", + ) + .await + .expect("Failed to SELECT from database") + .lines() + .count() + } + + #[tokio::test] + async fn test_database_version_update_idempotent() { + let logctx = test_setup_log("test_database_version_update_idempotent"); + let log = &logctx.log; + + let mut db = ClickHouseInstance::new_single_node(0) + .await + .expect("Failed to start ClickHouse"); + let address = SocketAddr::new("::1".parse().unwrap(), db.port()); + + let replicated = false; + + // Initialize the database... + let client = Client::new(address, &log); + client + .initialize_db_with_version(replicated, model::OXIMETER_VERSION) + .await + .expect("Failed to initialize timeseries database"); + + // Insert data here so we can verify it still exists later. + // + // The values here don't matter much, we just want to check that + // the database data hasn't been dropped. + assert_eq!(0, get_schema_count(&client).await); + let sample = test_util::make_sample(); + client.insert_samples(&[sample.clone()]).await.unwrap(); + assert_eq!(1, get_schema_count(&client).await); + + // Re-initialize the database, see that our data still exists + client + .initialize_db_with_version(replicated, model::OXIMETER_VERSION) + .await + .expect("Failed to initialize timeseries database"); + + assert_eq!(1, get_schema_count(&client).await); + + db.cleanup().await.expect("Failed to cleanup ClickHouse server"); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_database_version_will_not_downgrade() { + let logctx = test_setup_log("test_database_version_will_not_downgrade"); + let log = &logctx.log; + + let mut db = ClickHouseInstance::new_single_node(0) + .await + .expect("Failed to start ClickHouse"); + let address = SocketAddr::new("::1".parse().unwrap(), db.port()); + + let replicated = false; + + // Initialize the database + let client = Client::new(address, &log); + client + .initialize_db_with_version(replicated, model::OXIMETER_VERSION) + .await + .expect("Failed to initialize timeseries database"); + + // Bump the version of the database to a "too new" version + client + .insert_version(model::OXIMETER_VERSION + 1) + .await + .expect("Failed to insert very new DB version"); + + // Expect a failure re-initializing the client. + // + // This will attempt to initialize the client with "version = + // model::OXIMETER_VERSION", which is "too old". + client + .initialize_db_with_version(replicated, model::OXIMETER_VERSION) + .await + .expect_err("Should have failed, downgrades are not supported"); + + db.cleanup().await.expect("Failed to cleanup ClickHouse server"); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_database_version_wipes_old_version() { + let logctx = test_setup_log("test_database_version_wipes_old_version"); + let log = &logctx.log; + let mut db = ClickHouseInstance::new_single_node(0) + .await + .expect("Failed to start ClickHouse"); + let address = SocketAddr::new("::1".parse().unwrap(), db.port()); + + let replicated = false; + + // Initialize the Client + let client = Client::new(address, &log); + client + .initialize_db_with_version(replicated, model::OXIMETER_VERSION) + .await + .expect("Failed to initialize timeseries database"); + + // Insert data here so we can remove it later. + // + // The values here don't matter much, we just want to check that + // the database data gets dropped later. + assert_eq!(0, get_schema_count(&client).await); + let sample = test_util::make_sample(); + client.insert_samples(&[sample.clone()]).await.unwrap(); + assert_eq!(1, get_schema_count(&client).await); + + // If we try to upgrade to a newer version, we'll drop old data. + client + .initialize_db_with_version(replicated, model::OXIMETER_VERSION + 1) + .await + .expect("Should have initialized database successfully"); + assert_eq!(0, get_schema_count(&client).await); + + db.cleanup().await.expect("Failed to cleanup ClickHouse server"); + logctx.cleanup_successful(); } #[tokio::test] async fn test_schema_update() { - let log = slog::Logger::root(slog::Discard, o!()); + let logctx = test_setup_log("test_schema_update"); + let log = &logctx.log; // Let the OS assign a port and discover it after ClickHouse starts let mut db = ClickHouseInstance::new_single_node(0) @@ -1415,6 +1649,7 @@ mod tests { assert_eq!(expected_schema, schema[0]); db.cleanup().await.expect("Failed to cleanup ClickHouse server"); + logctx.cleanup_successful(); } async fn setup_filter_testcase() -> (ClickHouseInstance, Client, Vec) @@ -1589,12 +1824,14 @@ mod tests { #[tokio::test] async fn test_bad_database_connection() { - let log = slog::Logger::root(slog::Discard, o!()); + let logctx = test_setup_log("test_bad_database_connection"); + let log = &logctx.log; let client = Client::new("127.0.0.1:443".parse().unwrap(), &log); assert!(matches!( client.ping().await, Err(Error::DatabaseUnavailable(_)) )); + logctx.cleanup_successful(); } #[tokio::test] @@ -1617,7 +1854,8 @@ mod tests { datum: i64, } - let log = Logger::root(slog::Discard, o!()); + let logctx = test_setup_log("test_differentiate_by_timeseries_name"); + let log = &logctx.log; // Let the OS assign a port and discover it after ClickHouse starts let db = ClickHouseInstance::new_single_node(0) @@ -1665,6 +1903,7 @@ mod tests { ); assert_eq!(timeseries.target.name, "my_target"); assert_eq!(timeseries.metric.name, "second_metric"); + logctx.cleanup_successful(); } #[derive(Debug, Clone, oximeter::Target)] @@ -1980,7 +2219,8 @@ mod tests { .await .expect("Failed to start ClickHouse"); let address = SocketAddr::new("::1".parse().unwrap(), db.port()); - let log = Logger::root(slog::Discard, o!()); + let logctx = test_setup_log("test_select_timeseries_with_start_time"); + let log = &logctx.log; let client = Client::new(address, &log); client .init_single_node_db() @@ -2015,6 +2255,7 @@ mod tests { } } db.cleanup().await.expect("Failed to cleanup database"); + logctx.cleanup_successful(); } #[tokio::test] @@ -2024,7 +2265,8 @@ mod tests { .await .expect("Failed to start ClickHouse"); let address = SocketAddr::new("::1".parse().unwrap(), db.port()); - let log = Logger::root(slog::Discard, o!()); + let logctx = test_setup_log("test_select_timeseries_with_limit"); + let log = &logctx.log; let client = Client::new(address, &log); client .init_single_node_db() @@ -2133,6 +2375,7 @@ mod tests { ); db.cleanup().await.expect("Failed to cleanup database"); + logctx.cleanup_successful(); } #[tokio::test] @@ -2142,7 +2385,8 @@ mod tests { .await .expect("Failed to start ClickHouse"); let address = SocketAddr::new("::1".parse().unwrap(), db.port()); - let log = Logger::root(slog::Discard, o!()); + let logctx = test_setup_log("test_select_timeseries_with_order"); + let log = &logctx.log; let client = Client::new(address, &log); client .init_single_node_db() @@ -2234,6 +2478,7 @@ mod tests { ); db.cleanup().await.expect("Failed to cleanup database"); + logctx.cleanup_successful(); } #[tokio::test] diff --git a/oximeter/db/src/db-replicated-init.sql b/oximeter/db/src/db-replicated-init.sql index 7b92d967af..7b756f4b0d 100644 --- a/oximeter/db/src/db-replicated-init.sql +++ b/oximeter/db/src/db-replicated-init.sql @@ -1,5 +1,13 @@ CREATE DATABASE IF NOT EXISTS oximeter ON CLUSTER oximeter_cluster; -- +CREATE TABLE IF NOT EXISTS oximeter.version ON CLUSTER oximeter_cluster +( + value UInt64, + timestamp DateTime64(9, 'UTC') +) +ENGINE = ReplicatedMergeTree() +ORDER BY (value, timestamp); +-- CREATE TABLE IF NOT EXISTS oximeter.measurements_bool_local ON CLUSTER oximeter_cluster ( timeseries_name String, diff --git a/oximeter/db/src/db-single-node-init.sql b/oximeter/db/src/db-single-node-init.sql index 5f805f5725..1f648fd5d5 100644 --- a/oximeter/db/src/db-single-node-init.sql +++ b/oximeter/db/src/db-single-node-init.sql @@ -1,5 +1,13 @@ CREATE DATABASE IF NOT EXISTS oximeter; -- +CREATE TABLE IF NOT EXISTS oximeter.version +( + value UInt64, + timestamp DateTime64(9, 'UTC') +) +ENGINE = MergeTree() +ORDER BY (value, timestamp); +-- CREATE TABLE IF NOT EXISTS oximeter.measurements_bool ( timeseries_name String, diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs index 1b3b75320f..1314c5c649 100644 --- a/oximeter/db/src/model.rs +++ b/oximeter/db/src/model.rs @@ -35,6 +35,14 @@ use std::net::IpAddr; use std::net::Ipv6Addr; use uuid::Uuid; +/// Describes the version of the Oximeter database. +/// +/// See: [crate::Client::initialize_db_with_version] for usage. +/// +/// TODO(#4271): The current implementation of versioning will wipe the metrics +/// database if this number is incremented. +pub const OXIMETER_VERSION: u64 = 1; + // Wrapper type to represent a boolean in the database. // // ClickHouse's type system lacks a boolean, and using `u8` to represent them. This a safe wrapper