diff --git a/.config/nextest.toml b/.config/nextest.toml index ba07186c8a..79774e3658 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -18,3 +18,15 @@ fail-fast = false [script.crdb-seed] command = 'cargo run -p crdb-seed' + +# The ClickHouse cluster tests currently rely on a hard-coded set of ports for +# the nodes in the cluster. We would like to relax this in the future, at which +# point this test-group configuration can be removed or at least loosened to +# support testing in parallel. For now, enforce strict serialization for all +# tests with `replicated` in the name. +[test-groups] +clickhouse-cluster = { max-threads = 1 } + +[[profile.default.overrides]] +filter = 'package(oximeter-db) and test(replicated)' +test-group = 'clickhouse-cluster' diff --git a/.github/buildomat/build-and-test.sh b/.github/buildomat/build-and-test.sh index d8de288239..6fda8bb8d7 100755 --- a/.github/buildomat/build-and-test.sh +++ b/.github/buildomat/build-and-test.sh @@ -51,6 +51,7 @@ export RUSTFLAGS="-D warnings" export RUSTDOCFLAGS="-D warnings" export TMPDIR=$TEST_TMPDIR export RUST_BACKTRACE=1 +export CARGO_INCREMENTAL=0 ptime -m cargo test --locked --verbose --no-run # diff --git a/.github/buildomat/jobs/ci-tools.sh b/.github/buildomat/jobs/ci-tools.sh index 702561a951..07a63af30c 100755 --- a/.github/buildomat/jobs/ci-tools.sh +++ b/.github/buildomat/jobs/ci-tools.sh @@ -28,6 +28,7 @@ banner end-to-end-tests # export CARGO_PROFILE_DEV_DEBUG=1 export CARGO_PROFILE_TEST_DEBUG=1 +export CARGO_INCREMENTAL=0 ptime -m cargo build --locked -p end-to-end-tests --tests --bin bootstrap \ --message-format json-render-diagnostics >/tmp/output.end-to-end.json diff --git a/.github/buildomat/jobs/clippy.sh b/.github/buildomat/jobs/clippy.sh index 5fd31adb76..abbcda2150 100755 --- a/.github/buildomat/jobs/clippy.sh +++ b/.github/buildomat/jobs/clippy.sh @@ -28,5 +28,6 @@ banner prerequisites ptime -m bash ./tools/install_builder_prerequisites.sh -y banner clippy +export CARGO_INCREMENTAL=0 ptime -m cargo xtask clippy ptime -m cargo doc diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh index 86937d1908..0605ab6883 100755 --- a/.github/buildomat/jobs/package.sh +++ b/.github/buildomat/jobs/package.sh @@ -44,6 +44,7 @@ ptime -m ./tools/install_builder_prerequisites.sh -yp ptime -m ./tools/ci_download_softnpu_machinery # Build the test target +export CARGO_INCREMENTAL=0 ptime -m cargo run --locked --release --bin omicron-package -- \ -t test target create -i standard -m non-gimlet -s softnpu -r single-sled ptime -m cargo run --locked --release --bin omicron-package -- \ diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 1b3a892338..f2581845d9 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -19,6 +19,8 @@ jobs: check-omicron-deployment: runs-on: ${{ matrix.os }} + env: + CARGO_INCREMENTAL: 0 strategy: fail-fast: false matrix: @@ -49,6 +51,8 @@ jobs: # of our code. clippy-lint: runs-on: ubuntu-22.04 + env: + CARGO_INCREMENTAL: 0 steps: # This repo is unstable and unnecessary: https://github.com/microsoft/linux-package-repositories/issues/34 - name: Disable packages.microsoft.com repo @@ -75,6 +79,8 @@ jobs: # the separate "rustdocs" repo. build-docs: runs-on: ubuntu-22.04 + env: + CARGO_INCREMENTAL: 0 steps: # This repo is unstable and unnecessary: https://github.com/microsoft/linux-package-repositories/issues/34 - name: Disable packages.microsoft.com repo diff --git a/Cargo.lock b/Cargo.lock index 0ca56f02b4..bbb08791e0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -704,8 +704,6 @@ dependencies = [ name = "bootstrap-agent-client" version = "0.1.0" dependencies = [ - "async-trait", - "chrono", "ipnetwork", "omicron-common 0.1.0", "omicron-workspace-hack", @@ -2043,9 +2041,7 @@ dependencies = [ "reqwest", "schemars", "serde", - "serde_json", "slog", - "uuid", ] [[package]] @@ -2275,9 +2271,7 @@ dependencies = [ "anyhow", "async-trait", "base64 0.21.5", - "camino", "chrono", - "futures", "http", "omicron-sled-agent", "omicron-test-utils", @@ -2287,11 +2281,9 @@ dependencies = [ "reqwest", "russh", "russh-keys", - "serde_json", "tokio", "toml 0.8.8", "trust-dns-resolver", - "uuid", ] [[package]] @@ -2699,8 +2691,6 @@ dependencies = [ "futures", "gateway-client", "gateway-messages", - "hex", - "libc", "omicron-common 0.1.0", "omicron-workspace-hack", "reqwest", @@ -3485,12 +3475,9 @@ dependencies = [ "omicron-common 0.1.0", "omicron-test-utils", "omicron-workspace-hack", - "once_cell", "partial-io", - "progenitor-client", "proptest", "reqwest", - "serde", "sha2", "sled-hardware", "sled-storage", @@ -3504,7 +3491,6 @@ dependencies = [ "thiserror", "tokio", "tokio-stream", - "toml 0.8.8", "tufaceous-lib", "update-engine", "uuid", @@ -3533,7 +3519,6 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", - "camino", "clap 4.4.3", "dropshot", "expectorate", @@ -3549,7 +3534,6 @@ dependencies = [ "serde_json", "slog", "subprocess", - "tokio", "uuid", ] @@ -3560,7 +3544,6 @@ dependencies = [ "anyhow", "camino", "illumos-utils", - "omicron-common 0.1.0", "omicron-workspace-hack", "schemars", "serde", @@ -3604,7 +3587,6 @@ dependencies = [ "tempfile", "thiserror", "tokio", - "trust-dns-proto", "trust-dns-resolver", "uuid", ] @@ -4369,7 +4351,6 @@ dependencies = [ "sled-agent-client", "steno", "strum", - "thiserror", "uuid", ] @@ -4395,7 +4376,6 @@ dependencies = [ "futures", "gateway-client", "headers", - "hex", "http", "hyper", "hyper-rustls", @@ -4406,7 +4386,6 @@ dependencies = [ "macaddr", "newtype_derive", "nexus-db-model", - "nexus-defaults", "nexus-inventory", "nexus-test-utils", "nexus-types", @@ -4416,23 +4395,17 @@ dependencies = [ "omicron-sled-agent", "omicron-test-utils", "omicron-workspace-hack", - "once_cell", "openapiv3", "openssl", - "openssl-probe", - "openssl-sys", "oso", "oximeter 0.1.0", "paste", "pem 1.1.1", "petgraph", "pq-sys", - "rand 0.8.5", "rcgen", "ref-cast", "regex", - "reqwest", - "ring 0.16.20", "rustls", "samael", "serde", @@ -4449,8 +4422,6 @@ dependencies = [ "term", "thiserror", "tokio", - "tokio-postgres", - "toml 0.8.8", "usdt", "uuid", ] @@ -4474,7 +4445,6 @@ dependencies = [ "anyhow", "chrono", "expectorate", - "futures", "gateway-client", "gateway-messages", "gateway-test-utils", @@ -4492,8 +4462,6 @@ name = "nexus-test-interface" version = "0.1.0" dependencies = [ "async-trait", - "dropshot", - "internal-dns 0.1.0", "nexus-types", "omicron-common 0.1.0", "omicron-workspace-hack", @@ -4529,16 +4497,12 @@ dependencies = [ "omicron-test-utils", "omicron-workspace-hack", "oximeter 0.1.0", - "oximeter-client", "oximeter-collector", "oximeter-producer 0.1.0", - "parse-display 0.8.2", "serde", "serde_json", "serde_urlencoded", "slog", - "tempfile", - "trust-dns-proto", "trust-dns-resolver", "uuid", ] @@ -4548,7 +4512,6 @@ name = "nexus-test-utils-macros" version = "0.1.0" dependencies = [ "omicron-workspace-hack", - "proc-macro2", "quote", "syn 2.0.32", ] @@ -4564,13 +4527,10 @@ dependencies = [ "dns-service-client 0.1.0", "futures", "gateway-client", - "newtype_derive", "omicron-common 0.1.0", "omicron-passwords 0.1.0", "omicron-workspace-hack", "openssl", - "openssl-probe", - "openssl-sys", "parse-display 0.8.2", "schemars", "serde", @@ -4897,7 +4857,6 @@ dependencies = [ "futures", "hex", "http", - "hyper", "ipnetwork", "lazy_static", "libc", @@ -4909,11 +4868,9 @@ dependencies = [ "rand 0.8.5", "regress", "reqwest", - "ring 0.16.20", "schemars", "semver 1.0.20", "serde", - "serde_derive", "serde_human_bytes", "serde_json", "serde_urlencoded", @@ -4980,7 +4937,6 @@ dependencies = [ "serde", "serde_derive", "thiserror", - "toml 0.8.8", ] [[package]] @@ -5002,7 +4958,6 @@ dependencies = [ "omicron-common 0.1.0", "omicron-nexus", "omicron-rpaths", - "omicron-sled-agent", "omicron-test-utils", "omicron-workspace-hack", "openssl", @@ -5022,9 +4977,7 @@ name = "omicron-gateway" version = "0.1.0" dependencies = [ "anyhow", - "async-trait", "base64 0.21.5", - "ciborium", "clap 4.4.3", "dropshot", "expectorate", @@ -5045,7 +4998,6 @@ dependencies = [ "openapiv3", "schemars", "serde", - "serde_human_bytes", "serde_json", "signal-hook", "signal-hook-tokio", @@ -5057,7 +5009,6 @@ dependencies = [ "tokio", "tokio-stream", "tokio-tungstenite 0.18.0", - "tokio-util", "toml 0.8.8", "uuid", ] @@ -5071,12 +5022,10 @@ dependencies = [ "async-bb8-diesel", "async-trait", "base64 0.21.5", - "bb8", "camino", "cancel-safe-futures", "chrono", "clap 4.4.3", - "cookie", "criterion", "crucible-agent-client", "crucible-pantry-client", @@ -5105,7 +5054,6 @@ dependencies = [ "macaddr", "mg-admin-client", "mime_guess", - "newtype_derive", "nexus-db-model", "nexus-db-queries", "nexus-defaults", @@ -5125,9 +5073,6 @@ dependencies = [ "openapi-lint", "openapiv3", "openssl", - "openssl-probe", - "openssl-sys", - "oso", "oxide-client", "oximeter 0.1.0", "oximeter-client", @@ -5171,10 +5116,8 @@ dependencies = [ "thiserror", "tokio", "tokio-postgres", - "toml 0.8.8", "tough", "trust-dns-resolver", - "usdt", "uuid", ] @@ -5233,7 +5176,6 @@ dependencies = [ "hex", "illumos-utils", "indicatif", - "omicron-common 0.1.0", "omicron-workspace-hack", "omicron-zone-package", "petgraph", @@ -5242,7 +5184,6 @@ dependencies = [ "ring 0.16.20", "semver 1.0.20", "serde", - "serde_derive", "sled-hardware", "slog", "slog-async", @@ -5251,7 +5192,6 @@ dependencies = [ "strum", "swrite", "tar", - "tempfile", "thiserror", "tokio", "toml 0.8.8", @@ -5302,7 +5242,6 @@ dependencies = [ "assert_matches", "async-trait", "base64 0.21.5", - "bincode", "bootstore", "bootstrap-agent-client", "bytes", @@ -5335,6 +5274,7 @@ dependencies = [ "key-manager", "libc", "macaddr", + "mg-admin-client", "nexus-client 0.1.0", "omicron-common 0.1.0", "omicron-test-utils", @@ -5346,9 +5286,7 @@ dependencies = [ "oximeter 0.1.0", "oximeter-instruments", "oximeter-producer 0.1.0", - "percent-encoding", "pretty_assertions", - "progenitor", "propolis-client", "propolis-server", "rand 0.8.5", @@ -5375,7 +5313,6 @@ dependencies = [ "thiserror", "tofino", "tokio", - "tokio-tungstenite 0.18.0", "toml 0.8.8", "usdt", "uuid", @@ -5394,7 +5331,6 @@ dependencies = [ "dropshot", "expectorate", "filetime", - "futures", "headers", "hex", "http", @@ -5482,6 +5418,7 @@ dependencies = [ "postgres-types", "ppv-lite86", "predicates 3.0.4", + "proc-macro2", "rand 0.8.5", "rand_chacha 0.3.1", "regex", @@ -5807,6 +5744,7 @@ name = "oximeter-collector" version = "0.1.0" dependencies = [ "anyhow", + "camino", "clap 4.4.3", "dropshot", "expectorate", @@ -5846,12 +5784,14 @@ dependencies = [ "async-trait", "bcs", "bytes", + "camino", "chrono", "clap 4.4.3", "dropshot", "expectorate", "highway", "itertools 0.11.0", + "omicron-common 0.1.0", "omicron-test-utils", "omicron-workspace-hack", "oximeter 0.1.0", @@ -5865,6 +5805,7 @@ dependencies = [ "slog-dtrace", "slog-term", "strum", + "tempfile", "thiserror", "tokio", "usdt", @@ -5924,7 +5865,6 @@ dependencies = [ "omicron-common 0.1.0", "omicron-workspace-hack", "oximeter 0.1.0", - "reqwest", "schemars", "serde", "slog", @@ -10177,9 +10117,7 @@ dependencies = [ "ciborium", "clap 4.4.3", "crossterm", - "debug-ignore", "futures", - "hex", "humantime", "indexmap 2.1.0", "indicatif", @@ -10193,7 +10131,6 @@ dependencies = [ "ratatui", "reqwest", "rpassword", - "semver 1.0.20", "serde", "serde_json", "shell-words", @@ -10211,7 +10148,6 @@ dependencies = [ "tui-tree-widget", "unicode-width", "update-engine", - "uuid", "wicket-common", "wicketd-client", "zeroize", @@ -10243,7 +10179,6 @@ dependencies = [ "clap 4.4.3", "crossterm", "omicron-workspace-hack", - "ratatui", "reedline", "serde", "slog", diff --git a/clients/bootstrap-agent-client/Cargo.toml b/clients/bootstrap-agent-client/Cargo.toml index 42ae59b7aa..3474c5814a 100644 --- a/clients/bootstrap-agent-client/Cargo.toml +++ b/clients/bootstrap-agent-client/Cargo.toml @@ -5,8 +5,6 @@ edition = "2021" license = "MPL-2.0" [dependencies] -async-trait.workspace = true -chrono.workspace = true omicron-common.workspace = true progenitor.workspace = true ipnetwork.workspace = true diff --git a/clients/dns-service-client/Cargo.toml b/clients/dns-service-client/Cargo.toml index 681c06672f..6132222b8a 100644 --- a/clients/dns-service-client/Cargo.toml +++ b/clients/dns-service-client/Cargo.toml @@ -11,7 +11,5 @@ progenitor.workspace = true reqwest = { workspace = true, features = ["json", "rustls-tls", "stream"] } schemars.workspace = true serde.workspace = true -serde_json.workspace = true slog.workspace = true -uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/common/Cargo.toml b/common/Cargo.toml index 75c1efab55..49997e619c 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -15,18 +15,15 @@ dropshot.workspace = true futures.workspace = true hex.workspace = true http.workspace = true -hyper.workspace = true ipnetwork.workspace = true macaddr.workspace = true lazy_static.workspace = true proptest = { workspace = true, optional = true } rand.workspace = true reqwest = { workspace = true, features = ["rustls-tls", "stream"] } -ring.workspace = true schemars = { workspace = true, features = ["chrono", "uuid1"] } semver.workspace = true serde.workspace = true -serde_derive.workspace = true serde_human_bytes.workspace = true serde_json.workspace = true serde_with.workspace = true diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index 784da8fcc6..155fbf971b 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -103,6 +103,17 @@ pub struct BgpPeerConfig { pub port: String, /// Address of the peer. pub addr: Ipv4Addr, + /// How long to keep a session alive without a keepalive in seconds. + /// Defaults to 6. + pub hold_time: Option, + /// How long to keep a peer in idle after a state machine reset in seconds. + pub idle_hold_time: Option, + /// How long to delay sending open messages to a peer. In seconds. + pub delay_open: Option, + /// The interval in seconds between peer connection retry attempts. + pub connect_retry: Option, + /// The interval to send keepalive messages at. + pub keepalive: Option, } #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] diff --git a/dev-tools/omicron-dev/Cargo.toml b/dev-tools/omicron-dev/Cargo.toml index ec7cafb559..ce9a6ac32d 100644 --- a/dev-tools/omicron-dev/Cargo.toml +++ b/dev-tools/omicron-dev/Cargo.toml @@ -21,7 +21,6 @@ nexus-test-interface.workspace = true omicron-common.workspace = true omicron-nexus.workspace = true omicron-test-utils.workspace = true -omicron-sled-agent.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. pq-sys = "*" rcgen.workspace = true diff --git a/dev-tools/thing-flinger/Cargo.toml b/dev-tools/thing-flinger/Cargo.toml index 1a6c05a546..2acbaf5659 100644 --- a/dev-tools/thing-flinger/Cargo.toml +++ b/dev-tools/thing-flinger/Cargo.toml @@ -13,7 +13,6 @@ omicron-package.workspace = true serde.workspace = true serde_derive.workspace = true thiserror.workspace = true -toml.workspace = true omicron-workspace-hack.workspace = true [[bin]] diff --git a/end-to-end-tests/Cargo.toml b/end-to-end-tests/Cargo.toml index 87f63ea1df..e78a8792d3 100644 --- a/end-to-end-tests/Cargo.toml +++ b/end-to-end-tests/Cargo.toml @@ -8,9 +8,7 @@ license = "MPL-2.0" anyhow = { workspace = true, features = ["backtrace"] } async-trait.workspace = true base64.workspace = true -camino.workspace = true chrono.workspace = true -futures.workspace = true http.workspace = true omicron-sled-agent.workspace = true omicron-test-utils.workspace = true @@ -19,9 +17,7 @@ rand.workspace = true reqwest.workspace = true russh = "0.39.0" russh-keys = "0.38.0" -serde_json.workspace = true tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } toml.workspace = true trust-dns-resolver.workspace = true -uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/gateway-cli/Cargo.toml b/gateway-cli/Cargo.toml index ba66fa4c4f..2412bf950f 100644 --- a/gateway-cli/Cargo.toml +++ b/gateway-cli/Cargo.toml @@ -8,9 +8,7 @@ license = "MPL-2.0" anyhow.workspace = true clap.workspace = true futures.workspace = true -hex.workspace = true omicron-common.workspace = true -libc.workspace = true reqwest.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/gateway/Cargo.toml b/gateway/Cargo.toml index 9cf41f6c2e..75c31e9977 100644 --- a/gateway/Cargo.toml +++ b/gateway/Cargo.toml @@ -6,9 +6,7 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true -async-trait.workspace = true base64.workspace = true -ciborium.workspace = true clap.workspace = true dropshot.workspace = true futures.workspace = true @@ -23,7 +21,6 @@ omicron-common.workspace = true once_cell.workspace = true schemars.workspace = true serde.workspace = true -serde_human_bytes.workspace = true signal-hook.workspace = true signal-hook-tokio.workspace = true slog.workspace = true @@ -32,7 +29,6 @@ thiserror.workspace = true tokio = { workspace = true, features = ["full"] } tokio-stream.workspace = true tokio-tungstenite.workspace = true -tokio-util.workspace = true toml.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/installinator-artifactd/Cargo.toml b/installinator-artifactd/Cargo.toml index b14ca4002f..e9ddc222cd 100644 --- a/installinator-artifactd/Cargo.toml +++ b/installinator-artifactd/Cargo.toml @@ -7,7 +7,6 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true async-trait.workspace = true -camino.workspace = true clap.workspace = true dropshot.workspace = true hyper.workspace = true @@ -15,7 +14,6 @@ schemars.workspace = true serde.workspace = true serde_json.workspace = true slog.workspace = true -tokio.workspace = true uuid.workspace = true installinator-common.workspace = true diff --git a/installinator-common/Cargo.toml b/installinator-common/Cargo.toml index 8fea234e20..4381de74eb 100644 --- a/installinator-common/Cargo.toml +++ b/installinator-common/Cargo.toml @@ -8,7 +8,6 @@ license = "MPL-2.0" anyhow.workspace = true camino.workspace = true illumos-utils.workspace = true -omicron-common.workspace = true schemars.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/installinator/Cargo.toml b/installinator/Cargo.toml index 36b20d037a..d489e73ec1 100644 --- a/installinator/Cargo.toml +++ b/installinator/Cargo.toml @@ -23,11 +23,8 @@ installinator-common.workspace = true ipcc-key-value.workspace = true itertools.workspace = true libc.workspace = true -once_cell.workspace = true omicron-common.workspace = true -progenitor-client.workspace = true reqwest.workspace = true -serde.workspace = true sha2.workspace = true sled-hardware.workspace = true sled-storage.workspace = true @@ -39,7 +36,6 @@ smf.workspace = true tempfile.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["full"] } -toml.workspace = true tufaceous-lib.workspace = true update-engine.workspace = true uuid.workspace = true diff --git a/internal-dns/Cargo.toml b/internal-dns/Cargo.toml index ecb2d48bda..96993ce6a2 100644 --- a/internal-dns/Cargo.toml +++ b/internal-dns/Cargo.toml @@ -14,7 +14,6 @@ omicron-common.workspace = true reqwest = { workspace = true, features = ["rustls-tls", "stream"] } slog.workspace = true thiserror.workspace = true -trust-dns-proto.workspace = true trust-dns-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 3add4ad559..4fc13a31d8 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -12,12 +12,10 @@ anyhow.workspace = true assert_matches.workspace = true async-trait.workspace = true base64.workspace = true -bb8.workspace = true cancel-safe-futures.workspace = true camino.workspace = true clap.workspace = true chrono.workspace = true -cookie.workspace = true crucible-agent-client.workspace = true crucible-pantry-client.workspace = true dns-service-client.workspace = true @@ -36,16 +34,12 @@ ipnetwork.workspace = true lazy_static.workspace = true macaddr.workspace = true mime_guess.workspace = true -newtype_derive.workspace = true # Not under "dev-dependencies"; these also need to be implemented for # integration tests. nexus-test-interface.workspace = true num-integer.workspace = true once_cell.workspace = true openssl.workspace = true -openssl-sys.workspace = true -openssl-probe.workspace = true -oso.workspace = true oximeter-client.workspace = true oximeter-db.workspace = true parse-display.workspace = true @@ -75,11 +69,9 @@ tempfile.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["full"] } tokio-postgres = { workspace = true, features = ["with-serde_json-1"] } -toml.workspace = true tough.workspace = true trust-dns-resolver.workspace = true uuid.workspace = true -usdt.workspace = true nexus-defaults.workspace = true nexus-db-model.workspace = true diff --git a/nexus/db-model/Cargo.toml b/nexus/db-model/Cargo.toml index a5cb9a06be..b7514c4806 100644 --- a/nexus/db-model/Cargo.toml +++ b/nexus/db-model/Cargo.toml @@ -20,7 +20,6 @@ parse-display.workspace = true pq-sys = "*" rand.workspace = true ref-cast.workspace = true -thiserror.workspace = true schemars = { workspace = true, features = ["chrono", "uuid1"] } semver.workspace = true serde.workspace = true diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index cff261e01a..7c6b8bbd0a 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1243,7 +1243,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(9, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(10, 0, 0); allow_tables_to_appear_in_same_query!( system_update, diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index 5edf4f1e89..b1b8f3b28f 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -21,25 +21,18 @@ diesel-dtrace.workspace = true dropshot.workspace = true futures.workspace = true headers.workspace = true -hex.workspace = true http.workspace = true hyper.workspace = true ipnetwork.workspace = true lazy_static.workspace = true macaddr.workspace = true newtype_derive.workspace = true -once_cell.workspace = true openssl.workspace = true -openssl-sys.workspace = true -openssl-probe.workspace = true oso.workspace = true paste.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. pq-sys = "*" -rand.workspace = true ref-cast.workspace = true -reqwest = { workspace = true, features = [ "json" ] } -ring.workspace = true samael.workspace = true serde.workspace = true serde_json.workspace = true @@ -51,14 +44,11 @@ static_assertions.workspace = true steno.workspace = true thiserror.workspace = true tokio = { workspace = true, features = [ "full" ] } -tokio-postgres = { workspace = true, features = [ "with-serde_json-1" ] } -toml.workspace = true uuid.workspace = true usdt.workspace = true authz-macros.workspace = true db-macros.workspace = true -nexus-defaults.workspace = true nexus-db-model.workspace = true nexus-types.workspace = true omicron-common.workspace = true diff --git a/nexus/inventory/Cargo.toml b/nexus/inventory/Cargo.toml index 965ff3f02a..202aff49b2 100644 --- a/nexus/inventory/Cargo.toml +++ b/nexus/inventory/Cargo.toml @@ -7,7 +7,6 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true chrono.workspace = true -futures.workspace = true gateway-client.workspace = true gateway-messages.workspace = true nexus-types.workspace = true diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index d40b09d2be..1676f44083 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -280,6 +280,15 @@ mod test { let message = regex::Regex::new(r"os error \d+") .unwrap() .replace_all(&e, "os error <>"); + // Communication errors differ based on the configuration of the + // machine running the test. For example whether or not the machine + // has IPv6 configured will determine if an error is network + // unreachable or a timeout due to sending a packet to a known + // discard prefix. So just key in on the communication error in a + // general sense. + let message = regex::Regex::new(r"Communication Error.*") + .unwrap() + .replace_all(&message, "Communication Error <>"); write!(&mut s, "error: {}\n", message).unwrap(); } diff --git a/nexus/inventory/tests/output/collector_errors.txt b/nexus/inventory/tests/output/collector_errors.txt index f231cc7d97..4404046253 100644 --- a/nexus/inventory/tests/output/collector_errors.txt +++ b/nexus/inventory/tests/output/collector_errors.txt @@ -41,4 +41,4 @@ cabooses found: RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimSidecarRot" errors: -error: MGS "http://[100::1]:12345": listing ignition targets: Communication Error: error sending request for url (http://[100::1]:12345/ignition): error trying to connect: tcp connect error: Network is unreachable (os error <>): error sending request for url (http://[100::1]:12345/ignition): error trying to connect: tcp connect error: Network is unreachable (os error <>): error trying to connect: tcp connect error: Network is unreachable (os error <>): tcp connect error: Network is unreachable (os error <>): Network is unreachable (os error <>) +error: MGS "http://[100::1]:12345": listing ignition targets: Communication Error <> diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index bed690f839..163f3bd5bb 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -675,10 +675,15 @@ impl super::Nexus { addresses: info.addresses.iter().map(|a| a.address).collect(), bgp_peers: peer_info .iter() - .map(|(_p, asn, addr)| BgpPeerConfig { + .map(|(p, asn, addr)| BgpPeerConfig { addr: *addr, asn: *asn, port: port.port_name.clone(), + hold_time: Some(p.hold_time.0.into()), + connect_retry: Some(p.connect_retry.0.into()), + delay_open: Some(p.delay_open.0.into()), + idle_hold_time: Some(p.idle_hold_time.0.into()), + keepalive: Some(p.keepalive.0.into()), }) .collect(), switch: port.switch_location.parse().unwrap(), diff --git a/nexus/src/app/sagas/switch_port_settings_apply.rs b/nexus/src/app/sagas/switch_port_settings_apply.rs index 830792826e..0c06d6ff83 100644 --- a/nexus/src/app/sagas/switch_port_settings_apply.rs +++ b/nexus/src/app/sagas/switch_port_settings_apply.rs @@ -962,6 +962,11 @@ pub(crate) async fn bootstore_update( asn: *asn, port: switch_port_name.into(), addr, + hold_time: Some(p.hold_time.0.into()), + connect_retry: Some(p.connect_retry.0.into()), + delay_open: Some(p.delay_open.0.into()), + idle_hold_time: Some(p.idle_hold_time.0.into()), + keepalive: Some(p.keepalive.0.into()), }), IpAddr::V6(_) => { warn!(opctx.log, "IPv6 peers not yet supported"); diff --git a/nexus/test-interface/Cargo.toml b/nexus/test-interface/Cargo.toml index 0071ffaa28..b96afa6dbf 100644 --- a/nexus/test-interface/Cargo.toml +++ b/nexus/test-interface/Cargo.toml @@ -6,8 +6,6 @@ license = "MPL-2.0" [dependencies] async-trait.workspace = true -dropshot.workspace = true -internal-dns.workspace = true nexus-types.workspace = true omicron-common.workspace = true slog.workspace = true diff --git a/nexus/test-utils-macros/Cargo.toml b/nexus/test-utils-macros/Cargo.toml index d3d28a7640..5ed57b9c4a 100644 --- a/nexus/test-utils-macros/Cargo.toml +++ b/nexus/test-utils-macros/Cargo.toml @@ -8,7 +8,6 @@ license = "MPL-2.0" proc-macro = true [dependencies] -proc-macro2.workspace = true quote.workspace = true syn = { workspace = true, features = [ "fold", "parsing" ] } omicron-workspace-hack.workspace = true diff --git a/nexus/test-utils/Cargo.toml b/nexus/test-utils/Cargo.toml index 56cee27b37..024cba958b 100644 --- a/nexus/test-utils/Cargo.toml +++ b/nexus/test-utils/Cargo.toml @@ -28,16 +28,12 @@ omicron-passwords.workspace = true omicron-sled-agent.workspace = true omicron-test-utils.workspace = true oximeter.workspace = true -oximeter-client.workspace = true oximeter-collector.workspace = true oximeter-producer.workspace = true -parse-display.workspace = true serde.workspace = true serde_json.workspace = true serde_urlencoded.workspace = true slog.workspace = true -tempfile.workspace = true -trust-dns-proto.workspace = true trust-dns-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index d79dd09fc1..213e7f9e4f 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -168,13 +168,15 @@ async fn query_crdb_schema_version(crdb: &CockroachInstance) -> String { // // Note that for the purposes of schema comparisons, we don't care about parsing // the contents of the database, merely the schema and equality of contained data. -#[derive(Eq, PartialEq, Clone, Debug)] +#[derive(PartialEq, Clone, Debug)] enum AnySqlType { DateTime, String(String), Bool(bool), Uuid(Uuid), Int8(i64), + Float4(f32), + TextArray(Vec), // TODO: This isn't exhaustive, feel free to add more. // // These should only be necessary for rows where the database schema changes also choose to @@ -213,6 +215,14 @@ impl<'a> tokio_postgres::types::FromSql<'a> for AnySqlType { if i64::accepts(ty) { return Ok(AnySqlType::Int8(i64::from_sql(ty, raw)?)); } + if f32::accepts(ty) { + return Ok(AnySqlType::Float4(f32::from_sql(ty, raw)?)); + } + if Vec::::accepts(ty) { + return Ok(AnySqlType::TextArray(Vec::::from_sql( + ty, raw, + )?)); + } Err(anyhow::anyhow!( "Cannot parse type {ty}. If you're trying to use this type in a table which is populated \ during a schema migration, consider adding it to `AnySqlType`." @@ -224,7 +234,7 @@ during a schema migration, consider adding it to `AnySqlType`." } } -#[derive(Eq, PartialEq, Debug)] +#[derive(PartialEq, Debug)] struct NamedSqlValue { // It's a little redunant to include the column name alongside each value, // but it results in a prettier diff. @@ -240,7 +250,7 @@ impl NamedSqlValue { } // A generic representation of a row of SQL data -#[derive(Eq, PartialEq, Debug)] +#[derive(PartialEq, Debug)] struct Row { values: Vec, } @@ -262,19 +272,7 @@ impl<'a> From<&'a [&'static str]> for ColumnSelector<'a> { } } -async fn crdb_show_constraints( - crdb: &CockroachInstance, - table: &str, -) -> Vec { - let client = crdb.connect().await.expect("failed to connect"); - - let sql = format!("SHOW CONSTRAINTS FROM {table}"); - let rows = client - .query(&sql, &[]) - .await - .unwrap_or_else(|_| panic!("failed to query {table}")); - client.cleanup().await.expect("cleaning up after wipe"); - +fn process_rows(rows: &Vec) -> Vec { let mut result = vec![]; for row in rows { let mut row_result = Row::new(); @@ -290,6 +288,22 @@ async fn crdb_show_constraints( result } +async fn crdb_show_constraints( + crdb: &CockroachInstance, + table: &str, +) -> Vec { + let client = crdb.connect().await.expect("failed to connect"); + + let sql = format!("SHOW CONSTRAINTS FROM {table}"); + let rows = client + .query(&sql, &[]) + .await + .unwrap_or_else(|_| panic!("failed to query {table}")); + client.cleanup().await.expect("cleaning up after wipe"); + + process_rows(&rows) +} + async fn crdb_select( crdb: &CockroachInstance, columns: ColumnSelector<'_>, @@ -324,19 +338,20 @@ async fn crdb_select( .unwrap_or_else(|_| panic!("failed to query {table}")); client.cleanup().await.expect("cleaning up after wipe"); - let mut result = vec![]; - for row in rows { - let mut row_result = Row::new(); - for i in 0..row.len() { - let column_name = row.columns()[i].name(); - row_result.values.push(NamedSqlValue { - column: column_name.to_string(), - value: row.get(i), - }); - } - result.push(row_result); - } - result + process_rows(&rows) +} + +async fn crdb_list_enums(crdb: &CockroachInstance) -> Vec { + let client = crdb.connect().await.expect("failed to connect"); + + // https://www.cockroachlabs.com/docs/stable/show-enums + let rows = client + .query("show enums;", &[]) + .await + .unwrap_or_else(|_| panic!("failed to list enums")); + client.cleanup().await.expect("cleaning up after wipe"); + + process_rows(&rows) } async fn read_all_schema_versions() -> BTreeSet { @@ -569,10 +584,11 @@ const PG_INDEXES: [&'static str; 5] = const TABLES: [&'static str; 4] = ["table_catalog", "table_schema", "table_name", "table_type"]; -#[derive(Eq, PartialEq, Debug)] +#[derive(PartialEq, Debug)] struct InformationSchema { columns: Vec, constraint_column_usage: Vec, + enums: Vec, key_column_usage: Vec, referential_constraints: Vec, views: Vec, @@ -589,6 +605,13 @@ impl InformationSchema { // the columns diff especially needs this: it can be 20k lines otherwise similar_asserts::assert_eq!(self.tables, other.tables); similar_asserts::assert_eq!(self.columns, other.columns); + similar_asserts::assert_eq!( + self.enums, + other.enums, + "Enums did not match. Members must have the same order in dbinit.sql and \ + migrations. If a migration adds a member, it should use BEFORE or AFTER \ + to add it in the same order as dbinit.sql." + ); similar_asserts::assert_eq!(self.views, other.views); similar_asserts::assert_eq!( self.table_constraints, @@ -624,6 +647,8 @@ impl InformationSchema { ) .await; + let enums = crdb_list_enums(crdb).await; + let constraint_column_usage = crdb_select( crdb, CONSTRAINT_COLUMN_USAGE.as_slice().into(), @@ -694,6 +719,7 @@ impl InformationSchema { Self { columns, constraint_column_usage, + enums, key_column_usage, referential_constraints, views, diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml index 5722b065cf..9cb94a8484 100644 --- a/nexus/types/Cargo.toml +++ b/nexus/types/Cargo.toml @@ -9,10 +9,7 @@ anyhow.workspace = true chrono.workspace = true base64.workspace = true futures.workspace = true -newtype_derive.workspace = true openssl.workspace = true -openssl-sys.workspace = true -openssl-probe.workspace = true parse-display.workspace = true schemars = { workspace = true, features = ["chrono", "uuid1"] } serde.workspace = true diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index 6dcf756737..362a7e91d8 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -277,6 +277,41 @@ "format": "uint32", "minimum": 0 }, + "connect_retry": { + "nullable": true, + "description": "The interval in seconds between peer connection retry attempts.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "delay_open": { + "nullable": true, + "description": "How long to delay sending open messages to a peer. In seconds.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "hold_time": { + "nullable": true, + "description": "How long to keep a session alive without a keepalive in seconds. Defaults to 6.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "idle_hold_time": { + "nullable": true, + "description": "How long to keep a peer in idle after a state machine reset in seconds.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "keepalive": { + "nullable": true, + "description": "The interval to send keepalive messages at.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, "port": { "description": "Switch port the peer is reachable on.", "type": "string" diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 411c52ddff..633058d485 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -803,6 +803,41 @@ "format": "uint32", "minimum": 0 }, + "connect_retry": { + "nullable": true, + "description": "The interval in seconds between peer connection retry attempts.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "delay_open": { + "nullable": true, + "description": "How long to delay sending open messages to a peer. In seconds.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "hold_time": { + "nullable": true, + "description": "How long to keep a session alive without a keepalive in seconds. Defaults to 6.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "idle_hold_time": { + "nullable": true, + "description": "How long to keep a peer in idle after a state machine reset in seconds.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "keepalive": { + "nullable": true, + "description": "The interval to send keepalive messages at.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, "port": { "description": "Switch port the peer is reachable on.", "type": "string" diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index febf431793..3d36922296 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -1124,6 +1124,41 @@ "format": "uint32", "minimum": 0 }, + "connect_retry": { + "nullable": true, + "description": "The interval in seconds between peer connection retry attempts.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "delay_open": { + "nullable": true, + "description": "How long to delay sending open messages to a peer. In seconds.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "hold_time": { + "nullable": true, + "description": "How long to keep a session alive without a keepalive in seconds. Defaults to 6.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "idle_hold_time": { + "nullable": true, + "description": "How long to keep a peer in idle after a state machine reset in seconds.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "keepalive": { + "nullable": true, + "description": "The interval to send keepalive messages at.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, "port": { "description": "Switch port the peer is reachable on.", "type": "string" diff --git a/openapi/wicketd.json b/openapi/wicketd.json index e0b37f1ba2..a98e7302d9 100644 --- a/openapi/wicketd.json +++ b/openapi/wicketd.json @@ -861,6 +861,41 @@ "format": "uint32", "minimum": 0 }, + "connect_retry": { + "nullable": true, + "description": "The interval in seconds between peer connection retry attempts.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "delay_open": { + "nullable": true, + "description": "How long to delay sending open messages to a peer. In seconds.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "hold_time": { + "nullable": true, + "description": "How long to keep a session alive without a keepalive in seconds. Defaults to 6.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "idle_hold_time": { + "nullable": true, + "description": "How long to keep a peer in idle after a state machine reset in seconds.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "keepalive": { + "nullable": true, + "description": "The interval to send keepalive messages at.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, "port": { "description": "Switch port the peer is reachable on.", "type": "string" diff --git a/oximeter/collector/Cargo.toml b/oximeter/collector/Cargo.toml index 470d9db312..ad0ae8e330 100644 --- a/oximeter/collector/Cargo.toml +++ b/oximeter/collector/Cargo.toml @@ -7,6 +7,7 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true +camino.workspace = true clap.workspace = true dropshot.workspace = true futures.workspace = true diff --git a/oximeter/collector/src/bin/clickhouse-schema-updater.rs b/oximeter/collector/src/bin/clickhouse-schema-updater.rs new file mode 100644 index 0000000000..20780c37e0 --- /dev/null +++ b/oximeter/collector/src/bin/clickhouse-schema-updater.rs @@ -0,0 +1,126 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! CLI tool to apply offline updates to ClickHouse schema. + +// Copyright 2023 Oxide Computer Company + +use anyhow::anyhow; +use anyhow::Context; +use camino::Utf8PathBuf; +use clap::Parser; +use clap::Subcommand; +use omicron_common::address::CLICKHOUSE_PORT; +use oximeter_db::model::OXIMETER_VERSION; +use oximeter_db::Client; +use slog::Drain; +use slog::Level; +use slog::LevelFilter; +use slog::Logger; +use std::net::Ipv6Addr; +use std::net::SocketAddr; +use std::net::SocketAddrV6; + +const DEFAULT_HOST: SocketAddr = SocketAddr::V6(SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + CLICKHOUSE_PORT, + 0, + 0, +)); + +fn parse_log_level(s: &str) -> anyhow::Result { + s.parse().map_err(|_| anyhow!("Invalid log level")) +} + +/// Tool to apply offline updates to ClickHouse schema. +#[derive(Clone, Debug, Parser)] +struct Args { + /// IP address and port at which to access ClickHouse. + #[arg(long, default_value_t = DEFAULT_HOST, env = "CLICKHOUSE_HOST")] + host: SocketAddr, + + /// Directory from which to read schema files for each version. + #[arg( + short = 's', + long, + default_value_t = Utf8PathBuf::from("/opt/oxide/oximeter/schema") + )] + schema_directory: Utf8PathBuf, + + /// The log level while running the command. + #[arg( + short, + long, + value_parser = parse_log_level, + default_value_t = Level::Warning + )] + log_level: Level, + + #[command(subcommand)] + cmd: Cmd, +} + +#[derive(Clone, Debug, Subcommand)] +enum Cmd { + /// List all schema in the directory available for an upgrade + #[clap(visible_alias = "ls")] + List, + /// Apply an upgrade to a specific version + #[clap(visible_aliases = ["up", "apply"])] + Upgrade { + /// The version to which to upgrade. + #[arg(default_value_t = OXIMETER_VERSION)] + version: u64, + }, +} + +fn build_logger(level: Level) -> Logger { + let decorator = slog_term::TermDecorator::new().build(); + let drain = slog_term::FullFormat::new(decorator).build().fuse(); + let drain = slog_async::Async::new(drain).build().fuse(); + let drain = LevelFilter::new(drain, level).fuse(); + Logger::root(drain, slog::o!("unit" => "clickhouse_schema_updater")) +} + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let args = Args::parse(); + let log = build_logger(args.log_level); + let client = Client::new(args.host, &log); + let is_replicated = client.is_oximeter_cluster().await?; + match args.cmd { + Cmd::List => { + let latest = client + .read_latest_version() + .await + .context("Failed to read latest version")?; + let available_versions = Client::read_available_schema_versions( + &log, + is_replicated, + &args.schema_directory, + ) + .await?; + println!("Latest version: {latest}"); + println!("Available versions:"); + for ver in available_versions { + print!(" {ver}"); + if ver == latest { + print!(" (reported by database)"); + } + if ver == OXIMETER_VERSION { + print!(" (expected by oximeter)"); + } + println!(); + } + } + Cmd::Upgrade { version } => { + client + .ensure_schema(is_replicated, version, args.schema_directory) + .await + .context("Failed to upgrade schema")?; + println!("Upgrade to oximeter database version {version} complete"); + } + } + Ok(()) +} diff --git a/oximeter/collector/src/lib.rs b/oximeter/collector/src/lib.rs index b7a14cec45..ac5517205f 100644 --- a/oximeter/collector/src/lib.rs +++ b/oximeter/collector/src/lib.rs @@ -35,7 +35,6 @@ use omicron_common::backoff; use omicron_common::FileKv; use oximeter::types::ProducerResults; use oximeter::types::ProducerResultsItem; -use oximeter_db::model::OXIMETER_VERSION; use oximeter_db::Client; use oximeter_db::DbWrite; use serde::Deserialize; @@ -454,9 +453,39 @@ impl OximeterAgent { CLICKHOUSE_PORT, ) }; + + // Determine the version of the database. + // + // There are three cases + // + // - The database exists and is at the expected version. Continue in + // this case. + // + // - The database exists and is at a lower-than-expected version. We + // fail back to the caller here, which will retry indefinitely until the + // DB has been updated. + // + // - The DB doesn't exist at all. This reports a version number of 0. We + // need to create the DB here, at the latest version. This is used in + // fresh installations and tests. let client = Client::new(db_address, &log); - let replicated = client.is_oximeter_cluster().await?; - client.initialize_db_with_version(replicated, OXIMETER_VERSION).await?; + match client.check_db_is_at_expected_version().await { + Ok(_) => {} + Err(oximeter_db::Error::DatabaseVersionMismatch { + found: 0, + .. + }) => { + debug!(log, "oximeter database does not exist, creating"); + let replicated = client.is_oximeter_cluster().await?; + client + .initialize_db_with_version( + replicated, + oximeter_db::OXIMETER_VERSION, + ) + .await?; + } + Err(e) => return Err(Error::from(e)), + } // Spawn the task for aggregating and inserting all metrics tokio::spawn(async move { @@ -712,6 +741,9 @@ impl Oximeter { /// /// This can be used to override / ignore the logging configuration in /// `config`, using `log` instead. + /// + /// Note that this blocks until the ClickHouse database is available **and + /// at the expected version**. pub async fn with_logger( config: &Config, args: &OximeterArguments, @@ -743,7 +775,8 @@ impl Oximeter { let log_client_failure = |error, delay| { warn!( log, - "failed to initialize ClickHouse database, will retry in {:?}", delay; + "failed to create ClickHouse client"; + "retry_after" => ?delay, "error" => ?error, ); }; diff --git a/oximeter/db/Cargo.toml b/oximeter/db/Cargo.toml index d37c57ccce..4d53869d0d 100644 --- a/oximeter/db/Cargo.toml +++ b/oximeter/db/Cargo.toml @@ -10,10 +10,12 @@ anyhow.workspace = true async-trait.workspace = true bcs.workspace = true bytes = { workspace = true, features = [ "serde" ] } +camino.workspace = true chrono.workspace = true clap.workspace = true dropshot.workspace = true highway.workspace = true +omicron-common.workspace = true oximeter.workspace = true regex.workspace = true reqwest = { workspace = true, features = [ "json" ] } @@ -35,6 +37,7 @@ itertools.workspace = true omicron-test-utils.workspace = true slog-dtrace.workspace = true strum.workspace = true +tempfile.workspace = true [[bin]] name = "oxdb" diff --git a/oximeter/db/schema/README.md b/oximeter/db/schema/README.md new file mode 100644 index 0000000000..2f1633138d --- /dev/null +++ b/oximeter/db/schema/README.md @@ -0,0 +1,39 @@ +# ClickHouse schema files + +This directory contains the SQL files for different versions of the ClickHouse +timeseries database used by `oximeter`. In general, schema are expected to be +applied while the database is online, but no other clients exist. This is +similar to the current situation for _offline upgrade_ we use when updating the +main control plane database in CockroachDB. + +## Constraints, or why ClickHouse is weird + +While this tool is modeled after the mechanism for applying updates in +CockroachDB, ClickHouse is a significantly different DBMS. There are no +transactions; no unique primary keys; a single DB server can house both +replicated and single-node tables. This means we need to be pretty careful when +updating the schema. Changes must be idempotent, as with the CRDB schema, but at +this point we do not support inserting or modifying data at all. + +Similar to the CRDB offline update tool, we assume no non-update modifications +of the database are running concurrently. However, given ClickHouse's lack of +transactions, we actually require that there are no writes of any kind. In +practice, this means `oximeter` **must not** be running when this is called. +Similarly, there must be only a single instance of this program at a time. + +To run this program: + +- Ensure the ClickHouse server is running, and grab its IP address; + ```bash + $ pfexec zlogin oxz_clickhouse_e449eb80-3371-40a6-a316-d6e64b039357 'ipadm show-addr -o addrobj,addr | grep omicron6' + oxControlService20/omicron6 fd00:1122:3344:101::e/64 + ``` +- Log into the `oximeter` zone, `zlogin oxz_oximeter_` +- Run this tool, pointing it at the desired schema directory, e.g.: + +```bash +# /opt/oxide/oximeter/bin/clickhouse-schema-updater \ + --host \ + --schema-dir /opt/oxide/oximeter/sql + up VERSION +``` diff --git a/oximeter/db/schema/replicated/2/up.sql b/oximeter/db/schema/replicated/2/up.sql new file mode 100644 index 0000000000..eb01b8c1a3 --- /dev/null +++ b/oximeter/db/schema/replicated/2/up.sql @@ -0,0 +1,819 @@ +CREATE DATABASE IF NOT EXISTS oximeter ON CLUSTER oximeter_cluster; + +/* The version table contains metadata about the `oximeter` database */ +CREATE TABLE IF NOT EXISTS oximeter.version ON CLUSTER oximeter_cluster +( + value UInt64, + timestamp DateTime64(9, 'UTC') +) +ENGINE = ReplicatedMergeTree() +ORDER BY (value, timestamp); + +/* The measurement tables contain all individual samples from each timeseries. + * + * Each table stores a single datum type, and otherwise contains nearly the same + * structure. The primary sorting key is on the timeseries name, key, and then + * timestamp, so that all timeseries from the same schema are grouped, followed + * by all samples from the same timeseries. + * + * This reflects that one usually looks up the _key_ in one or more field table, + * and then uses that to index quickly into the measurements tables. + */ +CREATE TABLE IF NOT EXISTS oximeter.measurements_bool_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt8 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_bool_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_bool ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt8 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_bool_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i8_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int8 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_i8_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i8 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int8 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_i8_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u8_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt8 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_u8_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u8 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt8 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_u8_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i16_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int16 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_i16_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i16 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int16 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_i16_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u16_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt16 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_u16_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u16 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt16 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_u16_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i32_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int32 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_i32_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i32 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int32 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_i32_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u32_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt32 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_u32_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u32 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt32 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_u32_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int64 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_i64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int64 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_i64_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt64 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_u64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt64 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_u64_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_f32_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Float32 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_f32_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_f32 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Float32 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_f32_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_f32_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Float32 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_f32_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_f32 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Float64 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_f32_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_f64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Float64 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_f64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_f64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Float64 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_f64_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_string_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum String +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_string_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_string ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum String +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_string_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_bytes_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Array(UInt8) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_bytes_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_bytes ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Array(UInt8) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_bytes_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum Int64 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_cumulativei64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum Int64 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_cumulativei64_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum UInt64 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_cumulativeu64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum UInt64 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_cumulativeu64_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum Float32 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_cumulativef32_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum Float32 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_cumulativef32_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum Float64 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_cumulativef64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum Float64 +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_cumulativef64_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int8), + counts Array(UInt64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami8_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int8), + counts Array(UInt64) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami8_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt8), + counts Array(UInt64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu8_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt8), + counts Array(UInt64) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu8_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int16), + counts Array(UInt64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami16_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int16), + counts Array(UInt64) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami16_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt16), + counts Array(UInt64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu16_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt16), + counts Array(UInt64) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu16_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int32), + counts Array(UInt64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami32_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int32), + counts Array(UInt64) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami32_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt32), + counts Array(UInt64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu32_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt32), + counts Array(UInt64) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu32_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int64), + counts Array(UInt64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int64), + counts Array(UInt64) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami64_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt64), + counts Array(UInt64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt64), + counts Array(UInt64) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu64_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Float32), + counts Array(UInt64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramf32_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Float32), + counts Array(UInt64) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramf32_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64_local ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Float64), + counts Array(UInt64) +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramf64_local', '{replica}') +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Float64), + counts Array(UInt64) +) +ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramf64_local', xxHash64(splitByChar(':', timeseries_name)[1])); + +/* The field tables store named dimensions of each timeseries. + * + * As with the measurement tables, there is one field table for each field data + * type. Fields are deduplicated by using the "replacing merge tree", though + * this behavior **must not** be relied upon for query correctness. + * + * The index for the fields differs from the measurements, however. Rows are + * sorted by timeseries name, then field name, field value, and finally + * timeseries key. This reflects the most common pattern for looking them up: + * by field name and possibly value, within a timeseries. The resulting keys are + * usually then used to look up measurements. + * + * NOTE: We may want to consider a secondary index on these tables, sorting by + * timeseries name and then key, since it would improve lookups where one + * already has the key. Realistically though, these tables are quite small and + * so performance benefits will be low in absolute terms. + */ +CREATE TABLE IF NOT EXISTS oximeter.fields_bool ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UInt8 +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_i8 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value Int8 +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_u8 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UInt8 +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_i16 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value Int16 +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_u16 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UInt16 +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_i32 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value Int32 +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_u32 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UInt32 +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_i64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value Int64 +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_u64 ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UInt64 +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value IPv6 +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_string ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value String +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_uuid ON CLUSTER oximeter_cluster +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UUID +) +ENGINE = ReplicatedReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +/* The timeseries schema table stores the extracted schema for the samples + * oximeter collects. + */ +CREATE TABLE IF NOT EXISTS oximeter.timeseries_schema ON CLUSTER oximeter_cluster +( + timeseries_name String, + fields Nested( + name String, + type Enum( + 'Bool' = 1, + 'I64' = 2, + 'IpAddr' = 3, + 'String' = 4, + 'Uuid' = 6 + ), + source Enum( + 'Target' = 1, + 'Metric' = 2 + ) + ), + datum_type Enum( + 'Bool' = 1, + 'I64' = 2, + 'F64' = 3, + 'String' = 4, + 'Bytes' = 5, + 'CumulativeI64' = 6, + 'CumulativeF64' = 7, + 'HistogramI64' = 8, + 'HistogramF64' = 9, + 'I8' = 10, + 'U8' = 11, + 'I16' = 12, + 'U16' = 13, + 'I32' = 14, + 'U32' = 15, + 'U64' = 16, + 'F32' = 17, + 'CumulativeU64' = 18, + 'CumulativeF32' = 19, + 'HistogramI8' = 20, + 'HistogramU8' = 21, + 'HistogramI16' = 22, + 'HistogramU16' = 23, + 'HistogramI32' = 24, + 'HistogramU32' = 25, + 'HistogramU64' = 26, + 'HistogramF32' = 27 + ), + created DateTime64(9, 'UTC') +) +ENGINE = ReplicatedMergeTree() +ORDER BY (timeseries_name, fields.name); diff --git a/oximeter/db/schema/replicated/3/up.sql b/oximeter/db/schema/replicated/3/up.sql new file mode 100644 index 0000000000..073d643564 --- /dev/null +++ b/oximeter/db/schema/replicated/3/up.sql @@ -0,0 +1,22 @@ +/* This adds missing field types to the timeseries schema table field.type + * column, by augmentin the enum to capture new values. Note that the existing + * variants can't be moved or changed, so the new ones are added at the end. The + * client never sees this discriminant, only the string, so it should not + * matter. + */ +ALTER TABLE oximeter.timeseries_schema + MODIFY COLUMN IF EXISTS fields.type + Array(Enum( + 'Bool' = 1, + 'I64' = 2, + 'IpAddr' = 3, + 'String' = 4, + 'Uuid' = 6, + 'I8' = 7, + 'U8' = 8, + 'I16' = 9, + 'U16' = 10, + 'I32' = 11, + 'U32' = 12, + 'U64' = 13 + )); diff --git a/oximeter/db/src/db-replicated-init.sql b/oximeter/db/schema/replicated/db-init.sql similarity index 93% rename from oximeter/db/src/db-replicated-init.sql rename to oximeter/db/schema/replicated/db-init.sql index ec11854e44..4429f41364 100644 --- a/oximeter/db/src/db-replicated-init.sql +++ b/oximeter/db/schema/replicated/db-init.sql @@ -1,5 +1,6 @@ CREATE DATABASE IF NOT EXISTS oximeter ON CLUSTER oximeter_cluster; --- + +/* The version table contains metadata about the `oximeter` database */ CREATE TABLE IF NOT EXISTS oximeter.version ON CLUSTER oximeter_cluster ( value UInt64, @@ -7,7 +8,17 @@ CREATE TABLE IF NOT EXISTS oximeter.version ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedMergeTree() ORDER BY (value, timestamp); --- + +/* The measurement tables contain all individual samples from each timeseries. + * + * Each table stores a single datum type, and otherwise contains nearly the same + * structure. The primary sorting key is on the timeseries name, key, and then + * timestamp, so that all timeseries from the same schema are grouped, followed + * by all samples from the same timeseries. + * + * This reflects that one usually looks up the _key_ in one or more field table, + * and then uses that to index quickly into the measurements tables. + */ CREATE TABLE IF NOT EXISTS oximeter.measurements_bool_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -18,7 +29,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bool_local ON CLUSTER oximeter_ ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_bool_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_bool ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -27,7 +38,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bool ON CLUSTER oximeter_cluste datum UInt8 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_bool_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i8_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -38,7 +49,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i8_local ON CLUSTER oximeter_cl ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_i8_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i8 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -47,7 +58,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i8 ON CLUSTER oximeter_cluster datum Int8 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_i8_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u8_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -58,7 +69,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u8_local ON CLUSTER oximeter_cl ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_u8_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u8 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -67,7 +78,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u8 ON CLUSTER oximeter_cluster datum UInt8 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_u8_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i16_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -78,7 +89,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i16_local ON CLUSTER oximeter_c ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_i16_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i16 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -87,7 +98,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i16 ON CLUSTER oximeter_cluster datum Int16 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_i16_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u16_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -98,7 +109,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u16_local ON CLUSTER oximeter_c ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_u16_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u16 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -107,7 +118,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u16 ON CLUSTER oximeter_cluster datum UInt16 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_u16_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i32_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -118,7 +129,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i32_local ON CLUSTER oximeter_c ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_i32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i32 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -127,7 +138,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i32 ON CLUSTER oximeter_cluster datum Int32 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_i32_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u32_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -138,7 +149,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u32_local ON CLUSTER oximeter_c ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_u32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u32 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -147,7 +158,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u32 ON CLUSTER oximeter_cluster datum UInt32 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_u32_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i64_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -158,7 +169,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i64_local ON CLUSTER oximeter_c ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_i64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -167,7 +178,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i64 ON CLUSTER oximeter_cluster datum Int64 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_i64_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u64_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -178,7 +189,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u64_local ON CLUSTER oximeter_c ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_u64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -187,7 +198,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u64 ON CLUSTER oximeter_cluster datum UInt64 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_u64_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_f32_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -198,7 +209,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_f32_local ON CLUSTER oximeter_c ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_f32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_f32 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -207,7 +218,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_f32 ON CLUSTER oximeter_cluster datum Float32 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_f32_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_f64_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -218,7 +229,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_f64_local ON CLUSTER oximeter_c ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_f64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_f64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -227,7 +238,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_f64 ON CLUSTER oximeter_cluster datum Float64 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_f64_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_string_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -238,7 +249,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_string_local ON CLUSTER oximete ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_string_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_string ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -247,7 +258,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_string ON CLUSTER oximeter_clus datum String ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_string_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_bytes_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -258,7 +269,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bytes_local ON CLUSTER oximeter ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_bytes_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_bytes ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -267,7 +278,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bytes ON CLUSTER oximeter_clust datum Array(UInt8) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_bytes_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -279,7 +290,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64_local ON CLUSTER ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_cumulativei64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -289,7 +300,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64 ON CLUSTER oximet datum Int64 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_cumulativei64_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -301,7 +312,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64_local ON CLUSTER ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_cumulativeu64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -311,7 +322,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64 ON CLUSTER oximet datum UInt64 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_cumulativeu64_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -323,7 +334,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32_local ON CLUSTER ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_cumulativef32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -333,7 +344,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32 ON CLUSTER oximet datum Float32 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_cumulativef32_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -345,7 +356,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64_local ON CLUSTER ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_cumulativef64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -355,7 +366,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64 ON CLUSTER oximet datum Float64 ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_cumulativef64_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -368,7 +379,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8_local ON CLUSTER ox ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami8_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -379,7 +390,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8 ON CLUSTER oximeter counts Array(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami8_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -392,7 +403,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8_local ON CLUSTER ox ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu8_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -403,7 +414,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8 ON CLUSTER oximeter counts Array(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu8_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -416,7 +427,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16_local ON CLUSTER o ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami16_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -427,7 +438,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16 ON CLUSTER oximete counts Array(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami16_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -440,7 +451,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16_local ON CLUSTER o ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu16_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -451,7 +462,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16 ON CLUSTER oximete counts Array(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu16_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -464,7 +475,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32_local ON CLUSTER o ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -475,7 +486,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32 ON CLUSTER oximete counts Array(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami32_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -488,7 +499,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32_local ON CLUSTER o ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -499,7 +510,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32 ON CLUSTER oximete counts Array(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu32_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -512,7 +523,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64_local ON CLUSTER o ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogrami64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -523,7 +534,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64 ON CLUSTER oximete counts Array(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogrami64_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -536,7 +547,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64_local ON CLUSTER o ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramu64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -547,7 +558,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64 ON CLUSTER oximete counts Array(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramu64_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -560,7 +571,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32_local ON CLUSTER o ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramf32_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -571,7 +582,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32 ON CLUSTER oximete counts Array(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramf32_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64_local ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -584,7 +595,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64_local ON CLUSTER o ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/measurements_histogramf64_local', '{replica}') ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -595,7 +606,24 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64 ON CLUSTER oximete counts Array(UInt64) ) ENGINE = Distributed('oximeter_cluster', 'oximeter', 'measurements_histogramf64_local', xxHash64(splitByChar(':', timeseries_name)[1])); --- + +/* The field tables store named dimensions of each timeseries. + * + * As with the measurement tables, there is one field table for each field data + * type. Fields are deduplicated by using the "replacing merge tree", though + * this behavior **must not** be relied upon for query correctness. + * + * The index for the fields differs from the measurements, however. Rows are + * sorted by timeseries name, then field name, field value, and finally + * timeseries key. This reflects the most common pattern for looking them up: + * by field name and possibly value, within a timeseries. The resulting keys are + * usually then used to look up measurements. + * + * NOTE: We may want to consider a secondary index on these tables, sorting by + * timeseries name and then key, since it would improve lookups where one + * already has the key. Realistically though, these tables are quite small and + * so performance benefits will be low in absolute terms. + */ CREATE TABLE IF NOT EXISTS oximeter.fields_bool ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -605,7 +633,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_bool ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_i8 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -615,7 +643,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i8 ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_u8 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -625,7 +653,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u8 ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_i16 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -635,7 +663,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i16 ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_u16 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -645,7 +673,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u16 ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_i32 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -655,7 +683,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i32 ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_u32 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -665,7 +693,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u32 ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_i64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -675,7 +703,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i64 ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_u64 ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -685,7 +713,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u64 ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -695,7 +723,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_string ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -705,7 +733,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_string ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_uuid ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -715,7 +743,10 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_uuid ON CLUSTER oximeter_cluster ) ENGINE = ReplicatedReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + +/* The timeseries schema table stores the extracted schema for the samples + * oximeter collects. + */ CREATE TABLE IF NOT EXISTS oximeter.timeseries_schema ON CLUSTER oximeter_cluster ( timeseries_name String, @@ -726,7 +757,14 @@ CREATE TABLE IF NOT EXISTS oximeter.timeseries_schema ON CLUSTER oximeter_cluste 'I64' = 2, 'IpAddr' = 3, 'String' = 4, - 'Uuid' = 6 + 'Uuid' = 6, + 'I8' = 7, + 'U8' = 8, + 'I16' = 9, + 'U16' = 10, + 'I32' = 11, + 'U32' = 12, + 'U64' = 13 ), source Enum( 'Target' = 1, diff --git a/oximeter/db/src/db-wipe-replicated.sql b/oximeter/db/schema/replicated/db-wipe.sql similarity index 100% rename from oximeter/db/src/db-wipe-replicated.sql rename to oximeter/db/schema/replicated/db-wipe.sql diff --git a/oximeter/db/src/db-single-node-init.sql b/oximeter/db/schema/single-node/2/up.sql similarity index 88% rename from oximeter/db/src/db-single-node-init.sql rename to oximeter/db/schema/single-node/2/up.sql index 2fb5c36397..4756e2897d 100644 --- a/oximeter/db/src/db-single-node-init.sql +++ b/oximeter/db/schema/single-node/2/up.sql @@ -1,5 +1,6 @@ CREATE DATABASE IF NOT EXISTS oximeter; --- + +/* The version table contains metadata about the `oximeter` database */ CREATE TABLE IF NOT EXISTS oximeter.version ( value UInt64, @@ -7,7 +8,17 @@ CREATE TABLE IF NOT EXISTS oximeter.version ) ENGINE = MergeTree() ORDER BY (value, timestamp); --- + +/* The measurement tables contain all individual samples from each timeseries. + * + * Each table stores a single datum type, and otherwise contains nearly the same + * structure. The primary sorting key is on the timeseries name, key, and then + * timestamp, so that all timeseries from the same schema are grouped, followed + * by all samples from the same timeseries. + * + * This reflects that one usually looks up the _key_ in one or more field table, + * and then uses that to index quickly into the measurements tables. + */ CREATE TABLE IF NOT EXISTS oximeter.measurements_bool ( timeseries_name String, @@ -18,7 +29,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bool ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i8 ( timeseries_name String, @@ -29,7 +40,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i8 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u8 ( timeseries_name String, @@ -40,7 +51,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u8 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i16 ( timeseries_name String, @@ -51,7 +62,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i16 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u16 ( timeseries_name String, @@ -62,7 +73,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u16 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i32 ( timeseries_name String, @@ -73,7 +84,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i32 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u32 ( timeseries_name String, @@ -84,7 +95,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u32 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_i64 ( timeseries_name String, @@ -95,7 +106,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_i64 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_u64 ( timeseries_name String, @@ -106,7 +117,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_u64 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_f32 ( timeseries_name String, @@ -117,7 +128,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_f32 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_f64 ( timeseries_name String, @@ -128,7 +139,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_f64 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_string ( timeseries_name String, @@ -139,7 +150,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_string ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_bytes ( timeseries_name String, @@ -150,7 +161,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_bytes ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64 ( timeseries_name String, @@ -162,7 +173,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64 ( timeseries_name String, @@ -174,7 +185,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32 ( timeseries_name String, @@ -186,8 +197,8 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- --- + + CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64 ( timeseries_name String, @@ -199,7 +210,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8 ( timeseries_name String, @@ -212,7 +223,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8 ( timeseries_name String, @@ -225,7 +236,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16 ( timeseries_name String, @@ -238,7 +249,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16 ( timeseries_name String, @@ -251,7 +262,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32 ( timeseries_name String, @@ -264,7 +275,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32 ( timeseries_name String, @@ -277,7 +288,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64 ( timeseries_name String, @@ -290,7 +301,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64 ( timeseries_name String, @@ -303,7 +314,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32 ( timeseries_name String, @@ -316,7 +327,7 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64 ( timeseries_name String, @@ -329,7 +340,24 @@ CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64 ENGINE = MergeTree() ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) TTL toDateTime(timestamp) + INTERVAL 30 DAY; --- + +/* The field tables store named dimensions of each timeseries. + * + * As with the measurement tables, there is one field table for each field data + * type. Fields are deduplicated by using the "replacing merge tree", though + * this behavior **must not** be relied upon for query correctness. + * + * The index for the fields differs from the measurements, however. Rows are + * sorted by timeseries name, then field name, field value, and finally + * timeseries key. This reflects the most common pattern for looking them up: + * by field name and possibly value, within a timeseries. The resulting keys are + * usually then used to look up measurements. + * + * NOTE: We may want to consider a secondary index on these tables, sorting by + * timeseries name and then key, since it would improve lookups where one + * already has the key. Realistically though, these tables are quite small and + * so performance benefits will be low in absolute terms. + */ CREATE TABLE IF NOT EXISTS oximeter.fields_bool ( timeseries_name String, @@ -339,7 +367,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_bool ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_i8 ( timeseries_name String, @@ -349,7 +377,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i8 ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_u8 ( timeseries_name String, @@ -359,7 +387,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u8 ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_i16 ( timeseries_name String, @@ -369,7 +397,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i16 ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_u16 ( timeseries_name String, @@ -379,7 +407,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u16 ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_i32 ( timeseries_name String, @@ -389,7 +417,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i32 ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_u32 ( timeseries_name String, @@ -399,7 +427,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u32 ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_i64 ( timeseries_name String, @@ -409,7 +437,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_i64 ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_u64 ( timeseries_name String, @@ -419,7 +447,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_u64 ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr ( timeseries_name String, @@ -429,7 +457,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_string ( timeseries_name String, @@ -439,7 +467,7 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_string ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + CREATE TABLE IF NOT EXISTS oximeter.fields_uuid ( timeseries_name String, @@ -449,7 +477,10 @@ CREATE TABLE IF NOT EXISTS oximeter.fields_uuid ) ENGINE = ReplacingMergeTree() ORDER BY (timeseries_name, field_name, field_value, timeseries_key); --- + +/* The timeseries schema table stores the extracted schema for the samples + * oximeter collects. + */ CREATE TABLE IF NOT EXISTS oximeter.timeseries_schema ( timeseries_name String, diff --git a/oximeter/db/schema/single-node/3/up.sql b/oximeter/db/schema/single-node/3/up.sql new file mode 100644 index 0000000000..073d643564 --- /dev/null +++ b/oximeter/db/schema/single-node/3/up.sql @@ -0,0 +1,22 @@ +/* This adds missing field types to the timeseries schema table field.type + * column, by augmentin the enum to capture new values. Note that the existing + * variants can't be moved or changed, so the new ones are added at the end. The + * client never sees this discriminant, only the string, so it should not + * matter. + */ +ALTER TABLE oximeter.timeseries_schema + MODIFY COLUMN IF EXISTS fields.type + Array(Enum( + 'Bool' = 1, + 'I64' = 2, + 'IpAddr' = 3, + 'String' = 4, + 'Uuid' = 6, + 'I8' = 7, + 'U8' = 8, + 'I16' = 9, + 'U16' = 10, + 'I32' = 11, + 'U32' = 12, + 'U64' = 13 + )); diff --git a/oximeter/db/schema/single-node/db-init.sql b/oximeter/db/schema/single-node/db-init.sql new file mode 100644 index 0000000000..ee5e91c4b7 --- /dev/null +++ b/oximeter/db/schema/single-node/db-init.sql @@ -0,0 +1,540 @@ +CREATE DATABASE IF NOT EXISTS oximeter; + +/* The version table contains metadata about the `oximeter` database */ +CREATE TABLE IF NOT EXISTS oximeter.version +( + value UInt64, + timestamp DateTime64(9, 'UTC') +) +ENGINE = MergeTree() +ORDER BY (value, timestamp); + +/* The measurement tables contain all individual samples from each timeseries. + * + * Each table stores a single datum type, and otherwise contains nearly the same + * structure. The primary sorting key is on the timeseries name, key, and then + * timestamp, so that all timeseries from the same schema are grouped, followed + * by all samples from the same timeseries. + * + * This reflects that one usually looks up the _key_ in one or more field table, + * and then uses that to index quickly into the measurements tables. + */ +CREATE TABLE IF NOT EXISTS oximeter.measurements_bool +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt8 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i8 +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int8 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u8 +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt8 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i16 +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int16 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u16 +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt16 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i32 +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int32 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u32 +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt32 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_i64 +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Int64 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_u64 +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum UInt64 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_f32 +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Float32 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_f64 +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Float64 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_string +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum String +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_bytes +( + timeseries_name String, + timeseries_key UInt64, + timestamp DateTime64(9, 'UTC'), + datum Array(UInt8) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativei64 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum Int64 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativeu64 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum UInt64 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef32 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum Float32 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + + +CREATE TABLE IF NOT EXISTS oximeter.measurements_cumulativef64 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + datum Float64 +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami8 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int8), + counts Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu8 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt8), + counts Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami16 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int16), + counts Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu16 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt16), + counts Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami32 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int32), + counts Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu32 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt32), + counts Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogrami64 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Int64), + counts Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramu64 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(UInt64), + counts Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf32 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Float32), + counts Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +CREATE TABLE IF NOT EXISTS oximeter.measurements_histogramf64 +( + timeseries_name String, + timeseries_key UInt64, + start_time DateTime64(9, 'UTC'), + timestamp DateTime64(9, 'UTC'), + bins Array(Float64), + counts Array(UInt64) +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, timeseries_key, start_time, timestamp) +TTL toDateTime(timestamp) + INTERVAL 30 DAY; + +/* The field tables store named dimensions of each timeseries. + * + * As with the measurement tables, there is one field table for each field data + * type. Fields are deduplicated by using the "replacing merge tree", though + * this behavior **must not** be relied upon for query correctness. + * + * The index for the fields differs from the measurements, however. Rows are + * sorted by timeseries name, then field name, field value, and finally + * timeseries key. This reflects the most common pattern for looking them up: + * by field name and possibly value, within a timeseries. The resulting keys are + * usually then used to look up measurements. + * + * NOTE: We may want to consider a secondary index on these tables, sorting by + * timeseries name and then key, since it would improve lookups where one + * already has the key. Realistically though, these tables are quite small and + * so performance benefits will be low in absolute terms. + */ +CREATE TABLE IF NOT EXISTS oximeter.fields_bool +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UInt8 +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_i8 +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value Int8 +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_u8 +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UInt8 +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_i16 +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value Int16 +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_u16 +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UInt16 +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_i32 +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value Int32 +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_u32 +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UInt32 +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_i64 +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value Int64 +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_u64 +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UInt64 +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_ipaddr +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value IPv6 +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_string +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value String +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +CREATE TABLE IF NOT EXISTS oximeter.fields_uuid +( + timeseries_name String, + timeseries_key UInt64, + field_name String, + field_value UUID +) +ENGINE = ReplacingMergeTree() +ORDER BY (timeseries_name, field_name, field_value, timeseries_key); + +/* The timeseries schema table stores the extracted schema for the samples + * oximeter collects. + */ +CREATE TABLE IF NOT EXISTS oximeter.timeseries_schema +( + timeseries_name String, + fields Nested( + name String, + type Enum( + 'Bool' = 1, + 'I64' = 2, + 'IpAddr' = 3, + 'String' = 4, + 'Uuid' = 6, + 'I8' = 7, + 'U8' = 8, + 'I16' = 9, + 'U16' = 10, + 'I32' = 11, + 'U32' = 12, + 'U64' = 13 + ), + source Enum( + 'Target' = 1, + 'Metric' = 2 + ) + ), + datum_type Enum( + 'Bool' = 1, + 'I64' = 2, + 'F64' = 3, + 'String' = 4, + 'Bytes' = 5, + 'CumulativeI64' = 6, + 'CumulativeF64' = 7, + 'HistogramI64' = 8, + 'HistogramF64' = 9, + 'I8' = 10, + 'U8' = 11, + 'I16' = 12, + 'U16' = 13, + 'I32' = 14, + 'U32' = 15, + 'U64' = 16, + 'F32' = 17, + 'CumulativeU64' = 18, + 'CumulativeF32' = 19, + 'HistogramI8' = 20, + 'HistogramU8' = 21, + 'HistogramI16' = 22, + 'HistogramU16' = 23, + 'HistogramI32' = 24, + 'HistogramU32' = 25, + 'HistogramU64' = 26, + 'HistogramF32' = 27 + ), + created DateTime64(9, 'UTC') +) +ENGINE = MergeTree() +ORDER BY (timeseries_name, fields.name); diff --git a/oximeter/db/src/db-wipe-single-node.sql b/oximeter/db/schema/single-node/db-wipe.sql similarity index 100% rename from oximeter/db/src/db-wipe-single-node.sql rename to oximeter/db/schema/single-node/db-wipe.sql diff --git a/oximeter/db/src/client.rs b/oximeter/db/src/client.rs index 88b95c3764..e1ed06554c 100644 --- a/oximeter/db/src/client.rs +++ b/oximeter/db/src/client.rs @@ -23,6 +23,8 @@ use dropshot::PaginationOrder; use dropshot::ResultsPage; use dropshot::WhichPage; use oximeter::types::Sample; +use regex::Regex; +use regex::RegexBuilder; use slog::debug; use slog::error; use slog::info; @@ -35,6 +37,11 @@ use std::collections::BTreeSet; use std::convert::TryFrom; use std::net::SocketAddr; use std::num::NonZeroU32; +use std::ops::Bound; +use std::path::Path; +use std::path::PathBuf; +use std::sync::OnceLock; +use tokio::fs; use tokio::sync::Mutex; use uuid::Uuid; @@ -267,14 +274,319 @@ impl Client { .map_err(|e| Error::Database(e.to_string())) } + /// Read the available schema versions in the provided directory. + pub async fn read_available_schema_versions( + log: &Logger, + is_replicated: bool, + schema_dir: impl AsRef, + ) -> Result, Error> { + let dir = schema_dir.as_ref().join(if is_replicated { + "replicated" + } else { + "single-node" + }); + let mut rd = + fs::read_dir(&dir).await.map_err(|err| Error::ReadSchemaDir { + context: format!( + "Failed to read schema directory '{}'", + dir.display() + ), + err, + })?; + let mut versions = BTreeSet::new(); + debug!(log, "reading entries from schema dir"; "dir" => dir.display()); + while let Some(entry) = + rd.next_entry().await.map_err(|err| Error::ReadSchemaDir { + context: String::from("Failed to read directory entry"), + err, + })? + { + let name = entry + .file_name() + .into_string() + .map_err(|bad| Error::NonUtf8SchemaDirEntry(bad.to_owned()))?; + let md = + entry.metadata().await.map_err(|err| Error::ReadSchemaDir { + context: String::from("Failed to fetch entry metatdata"), + err, + })?; + if !md.is_dir() { + debug!(log, "skipping non-directory"; "name" => &name); + continue; + } + match name.parse() { + Ok(ver) => { + debug!(log, "valid version dir"; "ver" => ver); + assert!(versions.insert(ver), "Versions should be unique"); + } + Err(e) => warn!( + log, + "found directory with non-u64 name, skipping"; + "name" => name, + "error" => ?e, + ), + } + } + Ok(versions) + } + + /// Ensure that the database is upgraded to the desired version of the + /// schema. + /// + /// NOTE: This function is not safe for concurrent usage! + pub async fn ensure_schema( + &self, + replicated: bool, + desired_version: u64, + schema_dir: impl AsRef, + ) -> Result<(), Error> { + let schema_dir = schema_dir.as_ref(); + let latest = self.read_latest_version().await?; + if latest == desired_version { + debug!( + self.log, + "database already at desired version"; + "version" => latest, + ); + return Ok(()); + } + debug!( + self.log, + "starting upgrade to desired version {}", desired_version + ); + let available = Self::read_available_schema_versions( + &self.log, replicated, schema_dir, + ) + .await?; + // We explicitly ignore version 0, which implies the database doesn't + // exist at all. + if latest > 0 && !available.contains(&latest) { + return Err(Error::MissingSchemaVersion(latest)); + } + if !available.contains(&desired_version) { + return Err(Error::MissingSchemaVersion(desired_version)); + } + + // Check we have no gaps in version numbers, starting with the latest + // version and walking through all available ones strictly greater. This + // is to check that the _next_ version is also 1 greater than the + // latest. + let range = (Bound::Excluded(latest), Bound::Included(desired_version)); + if available + .range(latest..) + .zip(available.range(range)) + .any(|(current, next)| next - current != 1) + { + return Err(Error::NonSequentialSchemaVersions); + } + + // Walk through all changes between current version (exclusive) and + // the desired version (inclusive). + let versions_to_apply = available.range(range); + let mut current = latest; + for version in versions_to_apply { + if let Err(e) = self + .apply_one_schema_upgrade(replicated, *version, schema_dir) + .await + { + error!( + self.log, + "failed to apply schema upgrade"; + "current_version" => current, + "next_version" => *version, + "replicated" => replicated, + "schema_dir" => schema_dir.display(), + "error" => ?e, + ); + return Err(e); + } + current = *version; + self.insert_version(current).await?; + } + Ok(()) + } + + fn verify_schema_upgrades( + files: &BTreeMap, + ) -> Result<(), Error> { + let re = schema_validation_regex(); + for (path, sql) in files.values() { + if re.is_match(&sql) { + return Err(Error::SchemaUpdateModifiesData { + path: path.clone(), + statement: sql.clone(), + }); + } + if sql.matches(';').count() > 1 { + return Err(Error::MultipleSqlStatementsInSchemaUpdate { + path: path.clone(), + }); + } + } + Ok(()) + } + + async fn apply_one_schema_upgrade( + &self, + replicated: bool, + next_version: u64, + schema_dir: impl AsRef, + ) -> Result<(), Error> { + let schema_dir = schema_dir.as_ref(); + let upgrade_file_contents = Self::read_schema_upgrade_sql_files( + &self.log, + replicated, + next_version, + schema_dir, + ) + .await?; + + // We need to be pretty careful at this point with any data-modifying + // statements. There should be no INSERT queries, for example, which we + // check here. ClickHouse doesn't support much in the way of data + // modification, which makes this pretty easy. + Self::verify_schema_upgrades(&upgrade_file_contents)?; + + // Apply each file in sequence in the upgrade directory. + for (name, (path, sql)) in upgrade_file_contents.into_iter() { + debug!( + self.log, + "apply schema upgrade file"; + "version" => next_version, + "path" => path.display(), + "filename" => &name, + ); + match self.execute(sql).await { + Ok(_) => debug!( + self.log, + "successfully applied schema upgrade file"; + "version" => next_version, + "path" => path.display(), + "name" => name, + ), + Err(e) => { + return Err(e); + } + } + } + Ok(()) + } + + fn full_upgrade_path( + replicated: bool, + version: u64, + schema_dir: impl AsRef, + ) -> PathBuf { + schema_dir + .as_ref() + .join(if replicated { "replicated" } else { "single-node" }) + .join(version.to_string()) + } + + // Read all SQL files, in order, in the schema directory for the provided + // version. + async fn read_schema_upgrade_sql_files( + log: &Logger, + replicated: bool, + version: u64, + schema_dir: impl AsRef, + ) -> Result, Error> { + let version_schema_dir = + Self::full_upgrade_path(replicated, version, schema_dir.as_ref()); + let mut rd = + fs::read_dir(&version_schema_dir).await.map_err(|err| { + Error::ReadSchemaDir { + context: format!( + "Failed to read schema directory '{}'", + version_schema_dir.display() + ), + err, + } + })?; + + let mut upgrade_files = BTreeMap::new(); + debug!(log, "reading SQL files from schema dir"; "dir" => version_schema_dir.display()); + while let Some(entry) = + rd.next_entry().await.map_err(|err| Error::ReadSchemaDir { + context: String::from("Failed to read directory entry"), + err, + })? + { + let path = entry.path(); + let Some(ext) = path.extension() else { + warn!( + log, + "skipping schema dir entry without an extension"; + "dir" => version_schema_dir.display(), + "path" => path.display(), + ); + continue; + }; + let Some(ext) = ext.to_str() else { + warn!( + log, + "skipping schema dir entry with non-UTF8 extension"; + "dir" => version_schema_dir.display(), + "path" => path.display(), + ); + continue; + }; + if ext.eq_ignore_ascii_case("sql") { + let Some(stem) = path.file_stem() else { + warn!( + log, + "skipping schema SQL file with no name"; + "dir" => version_schema_dir.display(), + "path" => path.display(), + ); + continue; + }; + let Some(name) = stem.to_str() else { + warn!( + log, + "skipping schema SQL file with non-UTF8 name"; + "dir" => version_schema_dir.display(), + "path" => path.display(), + ); + continue; + }; + let contents = + fs::read_to_string(&path).await.map_err(|err| { + Error::ReadSqlFile { + context: format!( + "Reading SQL file '{}' for upgrade", + path.display(), + ), + err, + } + })?; + upgrade_files + .insert(name.to_string(), (path.to_owned(), contents)); + } else { + warn!( + log, + "skipping non-SQL schema dir entry"; + "dir" => version_schema_dir.display(), + "path" => path.display(), + ); + continue; + } + } + Ok(upgrade_files) + } + /// Validates that the schema used by the DB matches the version used by /// the executable using it. /// - /// This function will wipe metrics data if the version stored within + /// This function will **wipe** metrics data if the version stored within /// the DB is less than the schema version of Oximeter. /// If the version in the DB is newer than what is known to Oximeter, an /// error is returned. /// + /// If you would like to non-destructively upgrade the database, then either + /// the included binary `clickhouse-schema-updater` or the method + /// [`Client::ensure_schema()`] should be used instead. + /// /// NOTE: This function is not safe for concurrent usage! pub async fn initialize_db_with_version( &self, @@ -304,11 +616,10 @@ impl Client { } else if version > expected_version { // If the on-storage version is greater than the constant embedded // into this binary, we may have downgraded. - return Err(Error::Database( - format!( - "Expected version {expected_version}, saw {version}. Downgrading is not supported.", - ) - )); + return Err(Error::DatabaseVersionMismatch { + expected: crate::model::OXIMETER_VERSION, + found: version, + }); } else { // If the version matches, we don't need to update the DB return Ok(()); @@ -319,7 +630,8 @@ impl Client { Ok(()) } - async fn read_latest_version(&self) -> Result { + /// Read the latest version applied in the database. + pub async fn read_latest_version(&self) -> Result { let sql = format!( "SELECT MAX(value) FROM {db_name}.version;", db_name = crate::DATABASE_NAME, @@ -354,6 +666,20 @@ impl Client { Ok(version) } + /// Return Ok if the DB is at exactly the version compatible with this + /// client. + pub async fn check_db_is_at_expected_version(&self) -> Result<(), Error> { + let ver = self.read_latest_version().await?; + if ver == crate::model::OXIMETER_VERSION { + Ok(()) + } else { + Err(Error::DatabaseVersionMismatch { + expected: crate::model::OXIMETER_VERSION, + found: ver, + }) + } + } + async fn insert_version(&self, version: u64) -> Result<(), Error> { let sql = format!( "INSERT INTO {db_name}.version (*) VALUES ({version}, now());", @@ -365,7 +691,7 @@ impl Client { /// Verifies if instance is part of oximeter_cluster pub async fn is_oximeter_cluster(&self) -> Result { - let sql = String::from("SHOW CLUSTERS FORMAT JSONEachRow;"); + let sql = "SHOW CLUSTERS FORMAT JSONEachRow;"; let res = self.execute_with_body(sql).await?; Ok(res.contains("oximeter_cluster")) } @@ -501,7 +827,11 @@ impl Client { S: AsRef, { let sql = sql.as_ref().to_string(); - trace!(self.log, "executing SQL query: {}", sql); + trace!( + self.log, + "executing SQL query"; + "sql" => &sql, + ); let id = usdt::UniqueId::new(); probes::query__start!(|| (&id, &sql)); let response = handle_db_response( @@ -720,6 +1050,42 @@ impl Client { // many as one per sample. It's not clear how to structure this in a way that's useful. Ok(()) } + + // Run one or more SQL statements. + // + // This is intended to be used for the methods which run SQL from one of the + // SQL files in the crate, e.g., the DB initialization or update files. + async fn run_many_sql_statements( + &self, + sql: impl AsRef, + ) -> Result<(), Error> { + for stmt in sql.as_ref().split(';').filter(|s| !s.trim().is_empty()) { + self.execute(stmt).await?; + } + Ok(()) + } +} + +// A regex used to validate supported schema updates. +static SCHEMA_VALIDATION_REGEX: OnceLock = OnceLock::new(); +fn schema_validation_regex() -> &'static Regex { + SCHEMA_VALIDATION_REGEX.get_or_init(|| { + RegexBuilder::new(concat!( + // Cannot insert rows + r#"(INSERT INTO)|"#, + // Cannot delete rows in a table + r#"(ALTER TABLE .* DELETE)|"#, + // Cannot update values in a table + r#"(ALTER TABLE .* UPDATE)|"#, + // Cannot drop column values + r#"(ALTER TABLE .* CLEAR COLUMN)|"#, + // Or issue lightweight deletes + r#"(DELETE FROM)"#, + )) + .case_insensitive(true) + .build() + .expect("Invalid regex") + }) } #[derive(Debug)] @@ -767,40 +1133,38 @@ impl DbWrite for Client { /// Initialize the replicated telemetry database, creating tables as needed. async fn init_replicated_db(&self) -> Result<(), Error> { - // The HTTP client doesn't support multiple statements per query, so we break them out here - // manually. debug!(self.log, "initializing ClickHouse database"); - let sql = include_str!("./db-replicated-init.sql"); - for query in sql.split("\n--\n") { - self.execute(query.to_string()).await?; - } - Ok(()) + self.run_many_sql_statements(include_str!( + "../schema/replicated/db-init.sql" + )) + .await + } + + /// Wipe the ClickHouse database entirely from a replicated set up. + async fn wipe_replicated_db(&self) -> Result<(), Error> { + debug!(self.log, "wiping ClickHouse database"); + self.run_many_sql_statements(include_str!( + "../schema/replicated/db-wipe.sql" + )) + .await } /// Initialize a single node telemetry database, creating tables as needed. async fn init_single_node_db(&self) -> Result<(), Error> { - // The HTTP client doesn't support multiple statements per query, so we break them out here - // manually. debug!(self.log, "initializing ClickHouse database"); - let sql = include_str!("./db-single-node-init.sql"); - for query in sql.split("\n--\n") { - self.execute(query.to_string()).await?; - } - Ok(()) + self.run_many_sql_statements(include_str!( + "../schema/single-node/db-init.sql" + )) + .await } /// Wipe the ClickHouse database entirely from a single node set up. async fn wipe_single_node_db(&self) -> Result<(), Error> { debug!(self.log, "wiping ClickHouse database"); - let sql = include_str!("./db-wipe-single-node.sql").to_string(); - self.execute(sql).await - } - - /// Wipe the ClickHouse database entirely from a replicated set up. - async fn wipe_replicated_db(&self) -> Result<(), Error> { - debug!(self.log, "wiping ClickHouse database"); - let sql = include_str!("./db-wipe-replicated.sql").to_string(); - self.execute(sql).await + self.run_many_sql_statements(include_str!( + "../schema/single-node/db-wipe.sql" + )) + .await } } @@ -839,7 +1203,9 @@ mod tests { use oximeter::Metric; use oximeter::Target; use std::net::Ipv6Addr; + use std::path::PathBuf; use std::time::Duration; + use tempfile::TempDir; use tokio::time::sleep; use uuid::Uuid; @@ -1062,18 +1428,20 @@ mod tests { db.cleanup().await.expect("Failed to cleanup ClickHouse server"); } - #[tokio::test] - async fn test_replicated() { - let cur_dir = std::env::current_dir().unwrap(); + async fn create_cluster() -> ClickHouseCluster { + let cur_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); let replica_config = cur_dir.as_path().join("src/configs/replica_config.xml"); - let cur_dir = std::env::current_dir().unwrap(); let keeper_config = cur_dir.as_path().join("src/configs/keeper_config.xml"); - - let mut cluster = ClickHouseCluster::new(replica_config, keeper_config) + ClickHouseCluster::new(replica_config, keeper_config) .await - .expect("Failed to initialise ClickHouse Cluster"); + .expect("Failed to initialise ClickHouse Cluster") + } + + #[tokio::test] + async fn test_replicated() { + let mut cluster = create_cluster().await; // Tests that the expected error is returned on a wrong address bad_db_connection_test().await.unwrap(); @@ -3164,7 +3532,7 @@ mod tests { ) -> Result<(), Error> { use strum::IntoEnumIterator; usdt::register_probes().unwrap(); - let logctx = test_setup_log("test_update_schema_cache_on_new_sample"); + let logctx = test_setup_log("test_select_all_datum_types"); let log = &logctx.log; let client = Client::new(address, &log); @@ -3345,4 +3713,521 @@ mod tests { ); } } + + async fn create_test_upgrade_schema_directory( + replicated: bool, + versions: &[u64], + ) -> (TempDir, Vec) { + assert!(!versions.is_empty()); + let schema_dir = TempDir::new().expect("failed to create tempdir"); + let mut paths = Vec::with_capacity(versions.len()); + for version in versions.iter() { + let version_dir = Client::full_upgrade_path( + replicated, + *version, + schema_dir.as_ref(), + ); + fs::create_dir_all(&version_dir) + .await + .expect("failed to make version directory"); + paths.push(version_dir); + } + (schema_dir, paths) + } + + #[tokio::test] + async fn test_read_schema_upgrade_sql_files() { + let logctx = test_setup_log("test_read_schema_upgrade_sql_files"); + let log = &logctx.log; + const REPLICATED: bool = false; + const VERSION: u64 = 1; + let (schema_dir, version_dirs) = + create_test_upgrade_schema_directory(REPLICATED, &[VERSION]).await; + let version_dir = &version_dirs[0]; + + // Create a few SQL files in there. + const SQL: &str = "SELECT NOW();"; + let filenames: Vec<_> = (0..3).map(|i| format!("up-{i}.sql")).collect(); + for name in filenames.iter() { + let full_path = version_dir.join(name); + fs::write(full_path, SQL).await.expect("Failed to write dummy SQL"); + } + + let upgrade_files = Client::read_schema_upgrade_sql_files( + log, + REPLICATED, + VERSION, + schema_dir.path(), + ) + .await + .expect("Failed to read schema upgrade files"); + for filename in filenames.iter() { + let stem = filename.split_once('.').unwrap().0; + assert_eq!( + upgrade_files.get(stem).unwrap().1, + SQL, + "upgrade SQL file contents are not correct" + ); + } + logctx.cleanup_successful(); + } + + async fn test_apply_one_schema_upgrade_impl( + log: &Logger, + address: SocketAddr, + replicated: bool, + ) { + let test_name = format!( + "test_apply_one_schema_upgrade_{}", + if replicated { "replicated" } else { "single_node" } + ); + let client = Client::new(address, &log); + + // We'll test moving from version 1, which just creates a database and + // table, to version 2, which adds two columns to that table in + // different SQL files. + client.execute(format!("CREATE DATABASE {test_name};")).await.unwrap(); + client + .execute(format!( + "\ + CREATE TABLE {test_name}.tbl (\ + `col0` UInt8 \ + )\ + ENGINE = MergeTree() + ORDER BY `col0`;\ + " + )) + .await + .unwrap(); + + // Write out the upgrading SQL files. + // + // Note that all of these statements are going in the version 2 schema + // directory. + let (schema_dir, version_dirs) = + create_test_upgrade_schema_directory(replicated, &[NEXT_VERSION]) + .await; + const NEXT_VERSION: u64 = 2; + let first_sql = + format!("ALTER TABLE {test_name}.tbl ADD COLUMN `col1` UInt16;"); + let second_sql = + format!("ALTER TABLE {test_name}.tbl ADD COLUMN `col2` String;"); + let all_sql = [first_sql, second_sql]; + let version_dir = &version_dirs[0]; + for (i, sql) in all_sql.iter().enumerate() { + let path = version_dir.join(format!("up-{i}.sql")); + fs::write(path, sql) + .await + .expect("failed to write out upgrade SQL file"); + } + + // Apply the upgrade itself. + client + .apply_one_schema_upgrade( + replicated, + NEXT_VERSION, + schema_dir.path(), + ) + .await + .expect("Failed to apply one schema upgrade"); + + // Check that it actually worked! + let body = client + .execute_with_body(format!( + "\ + SELECT name, type FROM system.columns \ + WHERE database = '{test_name}' AND table = 'tbl' \ + ORDER BY name \ + FORMAT CSV;\ + " + )) + .await + .unwrap(); + let mut lines = body.lines(); + assert_eq!(lines.next().unwrap(), "\"col0\",\"UInt8\""); + assert_eq!(lines.next().unwrap(), "\"col1\",\"UInt16\""); + assert_eq!(lines.next().unwrap(), "\"col2\",\"String\""); + assert!(lines.next().is_none()); + } + + #[tokio::test] + async fn test_apply_one_schema_upgrade_replicated() { + const TEST_NAME: &str = "test_apply_one_schema_upgrade_replicated"; + let logctx = test_setup_log(TEST_NAME); + let log = &logctx.log; + let mut cluster = create_cluster().await; + let address = cluster.replica_1.address; + test_apply_one_schema_upgrade_impl(log, address, true).await; + + // TODO-cleanup: These should be arrays. + // See https://github.com/oxidecomputer/omicron/issues/4460. + cluster + .keeper_1 + .cleanup() + .await + .expect("Failed to cleanup ClickHouse keeper 1"); + cluster + .keeper_2 + .cleanup() + .await + .expect("Failed to cleanup ClickHouse keeper 2"); + cluster + .keeper_3 + .cleanup() + .await + .expect("Failed to cleanup ClickHouse keeper 3"); + cluster + .replica_1 + .cleanup() + .await + .expect("Failed to cleanup ClickHouse server 1"); + cluster + .replica_2 + .cleanup() + .await + .expect("Failed to cleanup ClickHouse server 2"); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_apply_one_schema_upgrade_single_node() { + const TEST_NAME: &str = "test_apply_one_schema_upgrade_single_node"; + let logctx = test_setup_log(TEST_NAME); + let log = &logctx.log; + let mut db = ClickHouseInstance::new_single_node(0) + .await + .expect("Failed to start ClickHouse"); + let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); + test_apply_one_schema_upgrade_impl(log, address, false).await; + db.cleanup().await.expect("Failed to cleanup ClickHouse server"); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_schema_with_version_gaps_fails() { + let logctx = + test_setup_log("test_ensure_schema_with_version_gaps_fails"); + let log = &logctx.log; + let mut db = ClickHouseInstance::new_single_node(0) + .await + .expect("Failed to start ClickHouse"); + let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); + let client = Client::new(address, &log); + const REPLICATED: bool = false; + client + .initialize_db_with_version( + REPLICATED, + crate::model::OXIMETER_VERSION, + ) + .await + .expect("failed to initialize DB"); + + const BOGUS_VERSION: u64 = u64::MAX; + let (schema_dir, _) = create_test_upgrade_schema_directory( + REPLICATED, + &[crate::model::OXIMETER_VERSION, BOGUS_VERSION], + ) + .await; + + let err = client + .ensure_schema(REPLICATED, BOGUS_VERSION, schema_dir.path()) + .await + .expect_err( + "Should have received an error when ensuring \ + non-sequential version numbers", + ); + let Error::NonSequentialSchemaVersions = err else { + panic!( + "Expected an Error::NonSequentialSchemaVersions, found {err:?}" + ); + }; + db.cleanup().await.expect("Failed to cleanup ClickHouse server"); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_schema_with_missing_desired_schema_version_fails() { + let logctx = test_setup_log( + "test_ensure_schema_with_missing_desired_schema_version_fails", + ); + let log = &logctx.log; + let mut db = ClickHouseInstance::new_single_node(0) + .await + .expect("Failed to start ClickHouse"); + let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); + let client = Client::new(address, &log); + const REPLICATED: bool = false; + client + .initialize_db_with_version( + REPLICATED, + crate::model::OXIMETER_VERSION, + ) + .await + .expect("failed to initialize DB"); + + let (schema_dir, _) = create_test_upgrade_schema_directory( + REPLICATED, + &[crate::model::OXIMETER_VERSION], + ) + .await; + + const BOGUS_VERSION: u64 = u64::MAX; + let err = client.ensure_schema( + REPLICATED, + BOGUS_VERSION, + schema_dir.path(), + ).await + .expect_err("Should have received an error when ensuring a non-existing version"); + let Error::MissingSchemaVersion(missing) = err else { + panic!("Expected an Error::MissingSchemaVersion, found {err:?}"); + }; + assert_eq!(missing, BOGUS_VERSION); + + db.cleanup().await.expect("Failed to cleanup ClickHouse server"); + logctx.cleanup_successful(); + } + + async fn test_ensure_schema_walks_through_multiple_steps_impl( + log: &Logger, + address: SocketAddr, + replicated: bool, + ) { + let test_name = format!( + "test_ensure_schema_walks_through_multiple_steps_{}", + if replicated { "replicated" } else { "single_node" } + ); + let client = Client::new(address, &log); + + // We need to actually have the oximeter DB here, and the version table, + // since `ensure_schema()` writes out versions to the DB as they're + // applied. + client.initialize_db_with_version(replicated, 1).await.unwrap(); + + // We'll test moving from version 1, which just creates a database and + // table, to version 3, stopping off at version 2. This is similar to + // the `test_apply_one_schema_upgrade` test, but we split the two + // modifications over two versions, rather than as multiple schema + // upgrades in one version bump. + client.execute(format!("CREATE DATABASE {test_name};")).await.unwrap(); + client + .execute(format!( + "\ + CREATE TABLE {test_name}.tbl (\ + `col0` UInt8 \ + )\ + ENGINE = MergeTree() + ORDER BY `col0`;\ + " + )) + .await + .unwrap(); + + // Write out the upgrading SQL files. + // + // Note that each statement goes into a different version. + const VERSIONS: [u64; 3] = [1, 2, 3]; + let (schema_dir, version_dirs) = + create_test_upgrade_schema_directory(replicated, &VERSIONS).await; + let first_sql = String::new(); + let second_sql = + format!("ALTER TABLE {test_name}.tbl ADD COLUMN `col1` UInt16;"); + let third_sql = + format!("ALTER TABLE {test_name}.tbl ADD COLUMN `col2` String;"); + let all_sql = [first_sql, second_sql, third_sql]; + for (version_dir, sql) in version_dirs.iter().zip(all_sql) { + let path = version_dir.join("up.sql"); + fs::write(path, sql) + .await + .expect("failed to write out upgrade SQL file"); + } + + // Apply the sequence of upgrades. + client + .ensure_schema( + replicated, + *VERSIONS.last().unwrap(), + schema_dir.path(), + ) + .await + .expect("Failed to apply one schema upgrade"); + + // Check that it actually worked! + let body = client + .execute_with_body(format!( + "\ + SELECT name, type FROM system.columns \ + WHERE database = '{test_name}' AND table = 'tbl' \ + ORDER BY name \ + FORMAT CSV;\ + " + )) + .await + .unwrap(); + let mut lines = body.lines(); + assert_eq!(lines.next().unwrap(), "\"col0\",\"UInt8\""); + assert_eq!(lines.next().unwrap(), "\"col1\",\"UInt16\""); + assert_eq!(lines.next().unwrap(), "\"col2\",\"String\""); + assert!(lines.next().is_none()); + + let latest_version = client.read_latest_version().await.unwrap(); + assert_eq!( + latest_version, + *VERSIONS.last().unwrap(), + "Updated version not written to the database" + ); + } + + #[tokio::test] + async fn test_ensure_schema_walks_through_multiple_steps_single_node() { + const TEST_NAME: &str = + "test_ensure_schema_walks_through_multiple_steps_single_node"; + let logctx = test_setup_log(TEST_NAME); + let log = &logctx.log; + let mut db = ClickHouseInstance::new_single_node(0) + .await + .expect("Failed to start ClickHouse"); + let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); + test_ensure_schema_walks_through_multiple_steps_impl( + log, address, false, + ) + .await; + db.cleanup().await.expect("Failed to cleanup ClickHouse server"); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_ensure_schema_walks_through_multiple_steps_replicated() { + const TEST_NAME: &str = + "test_ensure_schema_walks_through_multiple_steps_replicated"; + let logctx = test_setup_log(TEST_NAME); + let log = &logctx.log; + let mut cluster = create_cluster().await; + let address = cluster.replica_1.address; + test_ensure_schema_walks_through_multiple_steps_impl( + log, address, true, + ) + .await; + cluster + .keeper_1 + .cleanup() + .await + .expect("Failed to cleanup ClickHouse keeper 1"); + cluster + .keeper_2 + .cleanup() + .await + .expect("Failed to cleanup ClickHouse keeper 2"); + cluster + .keeper_3 + .cleanup() + .await + .expect("Failed to cleanup ClickHouse keeper 3"); + cluster + .replica_1 + .cleanup() + .await + .expect("Failed to cleanup ClickHouse server 1"); + cluster + .replica_2 + .cleanup() + .await + .expect("Failed to cleanup ClickHouse server 2"); + logctx.cleanup_successful(); + } + + #[test] + fn test_verify_schema_upgrades() { + let mut map = BTreeMap::new(); + + // Check that we fail if the upgrade tries to insert data. + map.insert( + "up".into(), + ( + PathBuf::from("/foo/bar/up.sql"), + String::from( + "INSERT INTO oximeter.version (*) VALUES (100, now());", + ), + ), + ); + assert!(Client::verify_schema_upgrades(&map).is_err()); + + // Sanity check for the normal case. + map.clear(); + map.insert( + "up".into(), + ( + PathBuf::from("/foo/bar/up.sql"), + String::from("ALTER TABLE oximeter.measurements_bool ADD COLUMN foo UInt64;") + ), + ); + assert!(Client::verify_schema_upgrades(&map).is_ok()); + + // Check that we fail if the upgrade ties to delete any data. + map.clear(); + map.insert( + "up".into(), + ( + PathBuf::from("/foo/bar/up.sql"), + String::from("ALTER TABLE oximeter.measurements_bool DELETE WHERE timestamp < NOW();") + ), + ); + assert!(Client::verify_schema_upgrades(&map).is_err()); + + // Check that we fail if the upgrade contains multiple SQL statements. + map.clear(); + map.insert( + "up".into(), + ( + PathBuf::from("/foo/bar/up.sql"), + String::from( + "\ + ALTER TABLE oximeter.measurements_bool \ + ADD COLUMN foo UInt8; \ + ALTER TABLE oximeter.measurements_bool \ + ADD COLUMN bar UInt8; \ + ", + ), + ), + ); + assert!(Client::verify_schema_upgrades(&map).is_err()); + } + + // Regression test for https://github.com/oxidecomputer/omicron/issues/4369. + // + // This tests that we can successfully query all extant field types from the + // schema table. There may be no such values, but the query itself should + // succeed. + #[tokio::test] + async fn test_select_all_field_types() { + use strum::IntoEnumIterator; + usdt::register_probes().unwrap(); + let logctx = test_setup_log("test_select_all_field_types"); + let log = &logctx.log; + + let mut db = ClickHouseInstance::new_single_node(0) + .await + .expect("Failed to start ClickHouse"); + let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); + let client = Client::new(address, &log); + client + .init_single_node_db() + .await + .expect("Failed to initialize timeseries database"); + + // Attempt to select all schema with each field type. + for ty in oximeter::FieldType::iter() { + let sql = format!( + "SELECT COUNT() \ + FROM {}.timeseries_schema \ + WHERE arrayFirstIndex(x -> x = '{:?}', fields.type) > 0;", + crate::DATABASE_NAME, + crate::model::DbFieldType::from(ty), + ); + let res = client.execute_with_body(sql).await.unwrap(); + let count = res.trim().parse::().unwrap(); + assert_eq!(count, 0); + } + db.cleanup().await.expect("Failed to cleanup ClickHouse server"); + logctx.cleanup_successful(); + } } diff --git a/oximeter/db/src/lib.rs b/oximeter/db/src/lib.rs index 11ecbeddc8..425c5189ee 100644 --- a/oximeter/db/src/lib.rs +++ b/oximeter/db/src/lib.rs @@ -15,7 +15,9 @@ use serde::{Deserialize, Serialize}; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::convert::TryFrom; +use std::io; use std::num::NonZeroU32; +use std::path::PathBuf; use thiserror::Error; mod client; @@ -23,7 +25,9 @@ pub mod model; pub mod query; pub use client::{Client, DbWrite}; -#[derive(Clone, Debug, Error)] +pub use model::OXIMETER_VERSION; + +#[derive(Debug, Error)] pub enum Error { #[error("Oximeter core error: {0}")] Oximeter(#[from] oximeter::MetricsError), @@ -79,6 +83,38 @@ pub enum Error { #[error("Query must resolve to a single timeseries if limit is specified")] InvalidLimitQuery, + + #[error("Database is not at expected version")] + DatabaseVersionMismatch { expected: u64, found: u64 }, + + #[error("Could not read schema directory")] + ReadSchemaDir { + context: String, + #[source] + err: io::Error, + }, + + #[error("Could not read SQL file from path")] + ReadSqlFile { + context: String, + #[source] + err: io::Error, + }, + + #[error("Non-UTF8 schema directory entry")] + NonUtf8SchemaDirEntry(std::ffi::OsString), + + #[error("Missing desired schema version: {0}")] + MissingSchemaVersion(u64), + + #[error("Data-modifying operations are not supported in schema updates")] + SchemaUpdateModifiesData { path: PathBuf, statement: String }, + + #[error("Schema update SQL files should contain at most 1 statement")] + MultipleSqlStatementsInSchemaUpdate { path: PathBuf }, + + #[error("Schema update versions must be sequential without gaps")] + NonSequentialSchemaVersions, } /// A timeseries name. diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs index 41c7ab9d24..715e025a04 100644 --- a/oximeter/db/src/model.rs +++ b/oximeter/db/src/model.rs @@ -38,11 +38,12 @@ use uuid::Uuid; /// Describes the version of the Oximeter database. /// -/// See: [crate::Client::initialize_db_with_version] for usage. +/// For usage and details see: /// -/// TODO(#4271): The current implementation of versioning will wipe the metrics -/// database if this number is incremented. -pub const OXIMETER_VERSION: u64 = 2; +/// - [`crate::Client::initialize_db_with_version`] +/// - [`crate::Client::ensure_schema`] +/// - The `clickhouse-schema-updater` binary in this crate +pub const OXIMETER_VERSION: u64 = 3; // Wrapper type to represent a boolean in the database. // diff --git a/oximeter/oximeter/src/types.rs b/oximeter/oximeter/src/types.rs index 0cc3299ec4..325974781e 100644 --- a/oximeter/oximeter/src/types.rs +++ b/oximeter/oximeter/src/types.rs @@ -42,6 +42,7 @@ use uuid::Uuid; JsonSchema, Serialize, Deserialize, + strum::EnumIter, )] #[serde(rename_all = "snake_case")] pub enum FieldType { diff --git a/oximeter/producer/Cargo.toml b/oximeter/producer/Cargo.toml index ef2f16c8ad..79f6c754f7 100644 --- a/oximeter/producer/Cargo.toml +++ b/oximeter/producer/Cargo.toml @@ -11,7 +11,6 @@ dropshot.workspace = true nexus-client.workspace = true omicron-common.workspace = true oximeter.workspace = true -reqwest = { workspace = true, features = [ "json" ] } schemars = { workspace = true, features = [ "uuid1", "bytes", "chrono" ] } serde.workspace = true slog.workspace = true diff --git a/package-manifest.toml b/package-manifest.toml index 2caf936600..b1020e0b98 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -107,9 +107,12 @@ setup_hint = """ service_name = "oximeter" only_for_targets.image = "standard" source.type = "local" -source.rust.binary_names = ["oximeter"] +source.rust.binary_names = ["oximeter", "clickhouse-schema-updater"] source.rust.release = true -source.paths = [ { from = "smf/oximeter", to = "/var/svc/manifest/site/oximeter" } ] +source.paths = [ + { from = "smf/oximeter", to = "/var/svc/manifest/site/oximeter" }, + { from = "oximeter/db/schema", to = "/opt/oxide/oximeter/schema" }, +] output.type = "zone" [package.clickhouse] diff --git a/package/Cargo.toml b/package/Cargo.toml index 9bf0b37a23..6cc0e343db 100644 --- a/package/Cargo.toml +++ b/package/Cargo.toml @@ -12,7 +12,6 @@ futures.workspace = true hex.workspace = true illumos-utils.workspace = true indicatif.workspace = true -omicron-common.workspace = true omicron-zone-package.workspace = true petgraph.workspace = true rayon.workspace = true @@ -20,7 +19,6 @@ reqwest = { workspace = true, features = [ "rustls-tls" ] } ring.workspace = true semver.workspace = true serde.workspace = true -serde_derive.workspace = true sled-hardware.workspace = true slog.workspace = true slog-async.workspace = true @@ -29,7 +27,6 @@ smf.workspace = true strum.workspace = true swrite.workspace = true tar.workspace = true -tempfile.workspace = true thiserror.workspace = true tokio = { workspace = true, features = [ "full" ] } toml.workspace = true diff --git a/schema/crdb/10.0.0/README.md b/schema/crdb/10.0.0/README.md new file mode 100644 index 0000000000..9a98141e37 --- /dev/null +++ b/schema/crdb/10.0.0/README.md @@ -0,0 +1,12 @@ +# Why? + +This migration is part of a PR that adds a check to the schema tests ensuring that the order of enum members is the same when starting from scratch with `dbinit.sql` as it is when building up from existing deployments by running the migrations. The problem: there were already two enums, `dataset_kind` and `service_kind`, where the order did not match, so we have to fix that by putting the enums in the "right" order even on an existing deployment where the order is wrong. To do that, for each of those enums, we: + +1. add `clickhouse_keeper2` member +1. change existing uses of `clickhouse_keeper` to `clickhouse_keeper2` +1. drop `clickhouse_keeper` member +1. add `clickhouse_keeper` back in the right order using `AFTER 'clickhouse'` +1. change uses of `clickhouse_keeper2` back to `clickhouse_keeper` +1. drop `clickhouse_keeper2` + +As there are 6 steps here and two different enums to do them for, there are 12 `up*.sql` files. diff --git a/schema/crdb/10.0.0/up01.sql b/schema/crdb/10.0.0/up01.sql new file mode 100644 index 0000000000..6b92700215 --- /dev/null +++ b/schema/crdb/10.0.0/up01.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.dataset_kind ADD VALUE IF NOT EXISTS 'clickhouse_keeper2' AFTER 'clickhouse'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up02.sql b/schema/crdb/10.0.0/up02.sql new file mode 100644 index 0000000000..d7c15e1959 --- /dev/null +++ b/schema/crdb/10.0.0/up02.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.service_kind ADD VALUE IF NOT EXISTS 'clickhouse_keeper2' AFTER 'clickhouse'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up03.sql b/schema/crdb/10.0.0/up03.sql new file mode 100644 index 0000000000..52a32f9f8a --- /dev/null +++ b/schema/crdb/10.0.0/up03.sql @@ -0,0 +1,5 @@ +set local disallow_full_table_scans = off; + +UPDATE omicron.public.dataset +SET kind = 'clickhouse_keeper2' +WHERE kind = 'clickhouse_keeper'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up04.sql b/schema/crdb/10.0.0/up04.sql new file mode 100644 index 0000000000..c9e193f1c3 --- /dev/null +++ b/schema/crdb/10.0.0/up04.sql @@ -0,0 +1,5 @@ +set local disallow_full_table_scans = off; + +UPDATE omicron.public.service +SET kind = 'clickhouse_keeper2' +WHERE kind = 'clickhouse_keeper'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up05.sql b/schema/crdb/10.0.0/up05.sql new file mode 100644 index 0000000000..4e64de9425 --- /dev/null +++ b/schema/crdb/10.0.0/up05.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.dataset_kind DROP VALUE 'clickhouse_keeper'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up06.sql b/schema/crdb/10.0.0/up06.sql new file mode 100644 index 0000000000..4be0ddf616 --- /dev/null +++ b/schema/crdb/10.0.0/up06.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.service_kind DROP VALUE 'clickhouse_keeper'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up07.sql b/schema/crdb/10.0.0/up07.sql new file mode 100644 index 0000000000..8971f7cf47 --- /dev/null +++ b/schema/crdb/10.0.0/up07.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.dataset_kind ADD VALUE 'clickhouse_keeper' AFTER 'clickhouse'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up08.sql b/schema/crdb/10.0.0/up08.sql new file mode 100644 index 0000000000..4a53d9b812 --- /dev/null +++ b/schema/crdb/10.0.0/up08.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.service_kind ADD VALUE 'clickhouse_keeper' AFTER 'clickhouse'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up09.sql b/schema/crdb/10.0.0/up09.sql new file mode 100644 index 0000000000..60f2bbbb27 --- /dev/null +++ b/schema/crdb/10.0.0/up09.sql @@ -0,0 +1,5 @@ +set local disallow_full_table_scans = off; + +UPDATE omicron.public.dataset +SET kind = 'clickhouse_keeper' +WHERE kind = 'clickhouse_keeper2'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up10.sql b/schema/crdb/10.0.0/up10.sql new file mode 100644 index 0000000000..ad8801709d --- /dev/null +++ b/schema/crdb/10.0.0/up10.sql @@ -0,0 +1,5 @@ +set local disallow_full_table_scans = off; + +UPDATE omicron.public.service +SET kind = 'clickhouse_keeper' +WHERE kind = 'clickhouse_keeper2'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up11.sql b/schema/crdb/10.0.0/up11.sql new file mode 100644 index 0000000000..2c50c0064e --- /dev/null +++ b/schema/crdb/10.0.0/up11.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.dataset_kind DROP VALUE 'clickhouse_keeper2'; \ No newline at end of file diff --git a/schema/crdb/10.0.0/up12.sql b/schema/crdb/10.0.0/up12.sql new file mode 100644 index 0000000000..376c25bfcd --- /dev/null +++ b/schema/crdb/10.0.0/up12.sql @@ -0,0 +1 @@ +ALTER TYPE omicron.public.service_kind DROP VALUE 'clickhouse_keeper2'; \ No newline at end of file diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index da842cbfeb..875877ee96 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2838,7 +2838,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '9.0.0', NULL) + ( TRUE, NOW(), NOW(), '10.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json index 0534c79aef..2ce8ae3bdc 100644 --- a/schema/rss-sled-plan.json +++ b/schema/rss-sled-plan.json @@ -132,6 +132,51 @@ "format": "uint32", "minimum": 0.0 }, + "connect_retry": { + "description": "The interval in seconds between peer connection retry attempts.", + "type": [ + "integer", + "null" + ], + "format": "uint64", + "minimum": 0.0 + }, + "delay_open": { + "description": "How long to delay sending open messages to a peer. In seconds.", + "type": [ + "integer", + "null" + ], + "format": "uint64", + "minimum": 0.0 + }, + "hold_time": { + "description": "How long to keep a session alive without a keepalive in seconds. Defaults to 6.", + "type": [ + "integer", + "null" + ], + "format": "uint64", + "minimum": 0.0 + }, + "idle_hold_time": { + "description": "How long to keep a peer in idle after a state machine reset in seconds.", + "type": [ + "integer", + "null" + ], + "format": "uint64", + "minimum": 0.0 + }, + "keepalive": { + "description": "The interval to send keepalive messages at.", + "type": [ + "integer", + "null" + ], + "format": "uint64", + "minimum": 0.0 + }, "port": { "description": "Switch port the peer is reachable on.", "type": "string" diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index b1fa26a471..46148304f8 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -9,7 +9,6 @@ license = "MPL-2.0" anyhow.workspace = true async-trait.workspace = true base64.workspace = true -bincode.workspace = true bootstore.workspace = true bootstrap-agent-client.workspace = true bytes.workspace = true @@ -41,14 +40,13 @@ itertools.workspace = true key-manager.workspace = true libc.workspace = true macaddr.workspace = true +mg-admin-client.workspace = true nexus-client.workspace = true omicron-common.workspace = true once_cell.workspace = true oximeter.workspace = true oximeter-instruments.workspace = true oximeter-producer.workspace = true -percent-encoding.workspace = true -progenitor.workspace = true propolis-client = { workspace = true, features = [ "generated-migration" ] } propolis-server.workspace = true # Only used by the simulated sled agent rand = { workspace = true, features = ["getrandom"] } @@ -70,7 +68,6 @@ tar.workspace = true thiserror.workspace = true tofino.workspace = true tokio = { workspace = true, features = [ "full" ] } -tokio-tungstenite.workspace = true toml.workspace = true usdt.workspace = true uuid.workspace = true diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index a8aa978f9d..bec309dc27 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -17,7 +17,9 @@ use gateway_client::Client as MgsClient; use internal_dns::resolver::{ResolveError, Resolver as DnsResolver}; use internal_dns::ServiceName; use ipnetwork::{IpNetwork, Ipv6Network}; -use omicron_common::address::{Ipv6Subnet, MGS_PORT}; +use mg_admin_client::types::{ApplyRequest, BgpPeerConfig, Prefix4}; +use mg_admin_client::Client as MgdClient; +use omicron_common::address::{Ipv6Subnet, MGD_PORT, MGS_PORT}; use omicron_common::address::{DDMD_PORT, DENDRITE_PORT}; use omicron_common::api::internal::shared::{ PortConfigV1, PortFec, PortSpeed, RackNetworkConfig, RackNetworkConfigV1, @@ -37,6 +39,7 @@ use std::time::{Duration, Instant}; use thiserror::Error; static BOUNDARY_SERVICES_ADDR: &str = "fd00:99::1"; +const BGP_SESSION_RESOLUTION: u64 = 100; /// Errors that can occur during early network setup #[derive(Error, Debug)] @@ -55,6 +58,12 @@ pub enum EarlyNetworkSetupError { #[error("Error during DNS lookup: {0}")] DnsResolver(#[from] ResolveError), + + #[error("BGP configuration error: {0}")] + BgpConfigurationError(String), + + #[error("MGD error: {0}")] + MgdError(String), } enum LookupSwitchZoneAddrsResult { @@ -453,6 +462,67 @@ impl<'a> EarlyNetworkSetup<'a> { ddmd_client.advertise_prefix(Ipv6Subnet::new(ipv6_entry.addr)); } + let mgd = MgdClient::new( + &self.log, + SocketAddrV6::new(switch_zone_underlay_ip, MGD_PORT, 0, 0).into(), + ) + .map_err(|e| { + EarlyNetworkSetupError::MgdError(format!( + "initialize mgd client: {e}" + )) + })?; + + // Iterate through ports and apply BGP config. + for port in &our_ports { + let mut bgp_peer_configs = Vec::new(); + for peer in &port.bgp_peers { + let config = rack_network_config + .bgp + .iter() + .find(|x| x.asn == peer.asn) + .ok_or(EarlyNetworkSetupError::BgpConfigurationError( + format!( + "asn {} referenced by peer undefined", + peer.asn + ), + ))?; + + let bpc = BgpPeerConfig { + asn: peer.asn, + name: format!("{}", peer.addr), + host: format!("{}:179", peer.addr), + hold_time: peer.hold_time.unwrap_or(6), + idle_hold_time: peer.idle_hold_time.unwrap_or(3), + delay_open: peer.delay_open.unwrap_or(0), + connect_retry: peer.connect_retry.unwrap_or(3), + keepalive: peer.keepalive.unwrap_or(2), + resolution: BGP_SESSION_RESOLUTION, + originate: config + .originate + .iter() + .map(|x| Prefix4 { length: x.prefix(), value: x.ip() }) + .collect(), + }; + bgp_peer_configs.push(bpc); + } + + if bgp_peer_configs.is_empty() { + continue; + } + + mgd.inner + .bgp_apply(&ApplyRequest { + peer_group: port.port.clone(), + peers: bgp_peer_configs, + }) + .await + .map_err(|e| { + EarlyNetworkSetupError::BgpConfigurationError(format!( + "BGP peer configuration failed: {e}", + )) + })?; + } + Ok(our_ports) } diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index d99e5bbc03..604baea55b 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -382,7 +382,15 @@ async fn start_sled_agent( if request.body.use_trust_quorum && request.body.is_lrtq_learner { info!(log, "Initializing sled as learner"); - long_running_task_handles.bootstore.init_learner().await?; + match long_running_task_handles.bootstore.init_learner().await { + Err(bootstore::NodeRequestError::Fsm( + bootstore::ApiError::AlreadyInitialized, + )) => { + // This is a cold boot. Let's ignore this error and continue. + } + Err(e) => return Err(e.into()), + Ok(()) => (), + } } // Inform the storage service that the key manager is available diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index fad4b2e94b..a596cf83db 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -52,6 +52,9 @@ pub struct Config { pub sidecar_revision: SidecarRevision, /// Optional percentage of DRAM to reserve for guest memory pub vmm_reservoir_percentage: Option, + /// Optional DRAM to reserve for guest memory in MiB (mutually exclusive + /// option with vmm_reservoir_percentage). + pub vmm_reservoir_size_mb: Option, /// Optional swap device size in GiB pub swap_device_size_gb: Option, /// Optional VLAN ID to be used for tagging guest VNICs. diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 3b0e37ed8d..f030078761 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -31,7 +31,6 @@ use illumos_utils::svc::wait_for_service; use illumos_utils::zone::Zones; use illumos_utils::zone::PROPOLIS_ZONE_PREFIX; use omicron_common::address::NEXUS_INTERNAL_PORT; -use omicron_common::address::PROPOLIS_PORT; use omicron_common::api::internal::nexus::{ InstanceRuntimeState, SledInstanceState, VmmRuntimeState, }; @@ -196,8 +195,8 @@ struct InstanceInner { // The ID of the Propolis server (and zone) running this instance propolis_id: Uuid, - // The IP address of the Propolis server running this instance - propolis_ip: IpAddr, + // The socket address of the Propolis server running this instance + propolis_addr: SocketAddr, // NIC-related properties vnic_allocator: VnicAllocator, @@ -662,7 +661,7 @@ impl Instance { vcpus: hardware.properties.ncpus.0 as u8, }, propolis_id, - propolis_ip: propolis_addr.ip(), + propolis_addr, vnic_allocator, port_manager, requested_nics: hardware.nics, @@ -965,9 +964,13 @@ impl Instance { .add_property( "listen_addr", "astring", - &inner.propolis_ip.to_string(), + &inner.propolis_addr.ip().to_string(), + ) + .add_property( + "listen_port", + "astring", + &inner.propolis_addr.port().to_string(), ) - .add_property("listen_port", "astring", &PROPOLIS_PORT.to_string()) .add_property("metric_addr", "astring", &metric_addr.to_string()); let profile = ProfileBuilder::new("omicron").add_service( @@ -990,13 +993,11 @@ impl Instance { .map_err(|_| Error::Timeout(fmri.to_string()))?; info!(inner.log, "Propolis SMF service is online"); - let server_addr = SocketAddr::new(inner.propolis_ip, PROPOLIS_PORT); - // We use a custom client builder here because the default progenitor // one has a timeout of 15s but we want to be able to wait indefinitely. let reqwest_client = reqwest::ClientBuilder::new().build().unwrap(); let client = Arc::new(PropolisClient::new_with_client( - &format!("http://{}", server_addr), + &format!("http://{}", &inner.propolis_addr), reqwest_client, )); diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 778a80cd81..fa40a876f0 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -43,6 +43,9 @@ pub enum Error { #[error("Failed to create reservoir: {0}")] Reservoir(#[from] vmm_reservoir::Error), + #[error("Invalid reservoir configuration: {0}")] + ReservoirConfig(String), + #[error("Cannot find data link: {0}")] Underlay(#[from] sled_hardware::underlay::Error), @@ -50,6 +53,12 @@ pub enum Error { ZoneBundle(#[from] BundleError), } +pub enum ReservoirMode { + None, + Size(u32), + Percentage(u8), +} + struct InstanceManagerInternal { log: Logger, nexus_client: NexusClientWithResolver, @@ -108,44 +117,69 @@ impl InstanceManager { }) } - /// Sets the VMM reservoir size to the requested (nonzero) percentage of - /// usable physical RAM, rounded down to nearest aligned size required by - /// the control plane. + /// Sets the VMM reservoir to the requested percentage of usable physical + /// RAM or to a size in MiB. Either mode will round down to the nearest + /// aligned size required by the control plane. pub fn set_reservoir_size( &self, hardware: &sled_hardware::HardwareManager, - target_percent: u8, + mode: ReservoirMode, ) -> Result<(), Error> { - assert!( - target_percent > 0 && target_percent < 100, - "target_percent {} must be nonzero and < 100", - target_percent - ); + let hardware_physical_ram_bytes = hardware.usable_physical_ram_bytes(); + let req_bytes = match mode { + ReservoirMode::None => return Ok(()), + ReservoirMode::Size(mb) => { + let bytes = ByteCount::from_mebibytes_u32(mb).to_bytes(); + if bytes > hardware_physical_ram_bytes { + return Err(Error::ReservoirConfig(format!( + "cannot specify a reservoir of {bytes} bytes when \ + physical memory is {hardware_physical_ram_bytes} bytes", + ))); + } + bytes + } + ReservoirMode::Percentage(percent) => { + if !matches!(percent, 1..=99) { + return Err(Error::ReservoirConfig(format!( + "VMM reservoir percentage of {} must be between 0 and \ + 100", + percent + ))); + }; + (hardware_physical_ram_bytes as f64 * (percent as f64 / 100.0)) + .floor() as u64 + } + }; - let req_bytes = (hardware.usable_physical_ram_bytes() as f64 - * (target_percent as f64 / 100.0)) - .floor() as u64; let req_bytes_aligned = vmm_reservoir::align_reservoir_size(req_bytes); if req_bytes_aligned == 0 { warn!( self.inner.log, - "Requested reservoir size of {} bytes < minimum aligned size of {} bytes", - req_bytes, vmm_reservoir::RESERVOIR_SZ_ALIGN); + "Requested reservoir size of {} bytes < minimum aligned size \ + of {} bytes", + req_bytes, + vmm_reservoir::RESERVOIR_SZ_ALIGN + ); return Ok(()); } - // The max ByteCount value is i64::MAX, which is ~8 million TiB. As this - // value is a percentage of DRAM, constructing this should always work. + // The max ByteCount value is i64::MAX, which is ~8 million TiB. + // As this value is either a percentage of DRAM or a size in MiB + // represented as a u32, constructing this should always work. let reservoir_size = ByteCount::try_from(req_bytes_aligned).unwrap(); + if let ReservoirMode::Percentage(percent) = mode { + info!( + self.inner.log, + "{}% of {} physical ram = {} bytes)", + percent, + hardware_physical_ram_bytes, + req_bytes, + ); + } info!( self.inner.log, - "Setting reservoir size to {} bytes \ - ({}% of {} total = {} bytes requested)", - reservoir_size, - target_percent, - hardware.usable_physical_ram_bytes(), - req_bytes, + "Setting reservoir size to {reservoir_size} bytes" ); vmm_reservoir::ReservoirControl::set(reservoir_size)?; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index ee2dff7001..34d5e06cfe 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -613,6 +613,11 @@ impl ServiceInner { addr: b.addr, asn: b.asn, port: b.port.clone(), + hold_time: b.hold_time, + connect_retry: b.connect_retry, + delay_open: b.delay_open, + idle_hold_time: b.idle_hold_time, + keepalive: b.keepalive, }) .collect(), }) diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 8a63650ce1..aec64a1349 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -10,7 +10,7 @@ use crate::bootstrap::early_networking::{ }; use crate::bootstrap::params::StartSledAgentRequest; use crate::config::Config; -use crate::instance_manager::InstanceManager; +use crate::instance_manager::{InstanceManager, ReservoirMode}; use crate::long_running_tasks::LongRunningTaskHandles; use crate::metrics::MetricsManager; use crate::nexus::{NexusClientWithResolver, NexusRequestQueue}; @@ -383,27 +383,37 @@ impl SledAgent { long_running_task_handles.zone_bundler.clone(), )?; - match config.vmm_reservoir_percentage { - Some(sz) if sz > 0 && sz < 100 => { + // Configure the VMM reservoir as either a percentage of DRAM or as an + // exact size in MiB. + let reservoir_mode = match ( + config.vmm_reservoir_percentage, + config.vmm_reservoir_size_mb, + ) { + (None, None) => ReservoirMode::None, + (Some(p), None) => ReservoirMode::Percentage(p), + (None, Some(mb)) => ReservoirMode::Size(mb), + (Some(_), Some(_)) => panic!( + "only one of vmm_reservoir_percentage and \ + vmm_reservoir_size_mb is allowed" + ), + }; + + match reservoir_mode { + ReservoirMode::None => warn!(log, "Not using VMM reservoir"), + ReservoirMode::Size(0) | ReservoirMode::Percentage(0) => { + warn!(log, "Not using VMM reservoir (size 0 bytes requested)") + } + _ => { instances .set_reservoir_size( &long_running_task_handles.hardware_manager, - sz, + reservoir_mode, ) .map_err(|e| { - error!(log, "Failed to set VMM reservoir size: {e}"); + error!(log, "Failed to setup VMM reservoir: {e}"); e })?; } - Some(0) => { - warn!(log, "Not using VMM reservoir (size 0 bytes requested)"); - } - None => { - warn!(log, "Not using VMM reservoir"); - } - Some(sz) => { - panic!("invalid requested VMM reservoir percentage: {}", sz); - } } let update_config = ConfigUpdates { diff --git a/smf/sled-agent/non-gimlet/config.toml b/smf/sled-agent/non-gimlet/config.toml index 684c0f8589..176f4002a5 100644 --- a/smf/sled-agent/non-gimlet/config.toml +++ b/smf/sled-agent/non-gimlet/config.toml @@ -45,6 +45,11 @@ zpools = [ # guest memory is pulled from. vmm_reservoir_percentage = 50 +# Optionally you can specify the size of the VMM reservoir in MiB. +# Note vmm_reservoir_percentage and vmm_reservoir_size_mb cannot be specified +# at the same time. +#vmm_reservoir_size_mb = 2048 + # Swap device size for the system. The device is a sparsely allocated zvol on # the internal zpool of the M.2 that we booted from. # diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml index 7b1f70c79e..7f210134a2 100644 --- a/test-utils/Cargo.toml +++ b/test-utils/Cargo.toml @@ -11,7 +11,6 @@ camino.workspace = true camino-tempfile.workspace = true dropshot.workspace = true filetime = { workspace = true, optional = true } -futures.workspace = true headers.workspace = true hex.workspace = true http.workspace = true diff --git a/test-utils/src/dev/clickhouse.rs b/test-utils/src/dev/clickhouse.rs index c35b871684..011de576ca 100644 --- a/test-utils/src/dev/clickhouse.rs +++ b/test-utils/src/dev/clickhouse.rs @@ -23,6 +23,9 @@ use crate::dev::poll; // Timeout used when starting up ClickHouse subprocess. const CLICKHOUSE_TIMEOUT: Duration = Duration::from_secs(30); +// Timeout used when starting a ClickHouse keeper subprocess. +const CLICKHOUSE_KEEPER_TIMEOUT: Duration = Duration::from_secs(30); + /// A `ClickHouseInstance` is used to start and manage a ClickHouse single node server process. #[derive(Debug)] pub struct ClickHouseInstance { @@ -527,7 +530,8 @@ async fn find_clickhouse_port_in_log( pub async fn wait_for_ready(log_path: PathBuf) -> Result<(), anyhow::Error> { let p = poll::wait_for_condition( || async { - let result = discover_ready(&log_path, CLICKHOUSE_TIMEOUT).await; + let result = + discover_ready(&log_path, CLICKHOUSE_KEEPER_TIMEOUT).await; match result { Ok(ready) => Ok(ready), Err(e) => { @@ -547,7 +551,7 @@ pub async fn wait_for_ready(log_path: PathBuf) -> Result<(), anyhow::Error> { } }, &Duration::from_millis(500), - &CLICKHOUSE_TIMEOUT, + &CLICKHOUSE_KEEPER_TIMEOUT, ) .await .context("waiting to discover if ClickHouse is ready for connections")?; diff --git a/tools/ci_download_softnpu_machinery b/tools/ci_download_softnpu_machinery index cb5ea40210..3efb030063 100755 --- a/tools/ci_download_softnpu_machinery +++ b/tools/ci_download_softnpu_machinery @@ -15,7 +15,7 @@ OUT_DIR="out/npuzone" # Pinned commit for softnpu ASIC simulator SOFTNPU_REPO="softnpu" -SOFTNPU_COMMIT="c1c42398c82b0220c8b5fa3bfba9c7a3bcaa0943" +SOFTNPU_COMMIT="dec63e67156fe6e958991bbfa090629868115ab5" # This is the softnpu ASIC simulator echo "fetching npuzone" diff --git a/tools/create_virtual_hardware.sh b/tools/create_virtual_hardware.sh index 1db40208f7..884d356222 100755 --- a/tools/create_virtual_hardware.sh +++ b/tools/create_virtual_hardware.sh @@ -63,8 +63,9 @@ function ensure_softnpu_zone { --omicron-zone \ --ports sc0_0,tfportrear0_0 \ --ports sc0_1,tfportqsfp0_0 \ - --sidecar-lite-branch main - } + --sidecar-lite-commit f0585a29fb0285f7a1220c1118856b0e5c1f75c5 \ + --softnpu-commit dec63e67156fe6e958991bbfa090629868115ab5 + } "$SOURCE_DIR"/scrimlet/softnpu-init.sh success "softnpu zone exists" } diff --git a/wicket-dbg/Cargo.toml b/wicket-dbg/Cargo.toml index d546c41e44..a00bcb9c1b 100644 --- a/wicket-dbg/Cargo.toml +++ b/wicket-dbg/Cargo.toml @@ -11,7 +11,6 @@ camino.workspace = true ciborium.workspace = true clap.workspace = true crossterm.workspace = true -ratatui.workspace = true serde.workspace = true slog.workspace = true slog-async.workspace = true diff --git a/wicket/Cargo.toml b/wicket/Cargo.toml index 11f476d98c..efb8e51dff 100644 --- a/wicket/Cargo.toml +++ b/wicket/Cargo.toml @@ -13,9 +13,7 @@ camino.workspace = true ciborium.workspace = true clap.workspace = true crossterm.workspace = true -debug-ignore.workspace = true futures.workspace = true -hex = { workspace = true, features = ["serde"] } humantime.workspace = true indexmap.workspace = true indicatif.workspace = true @@ -26,7 +24,6 @@ owo-colors.workspace = true ratatui.workspace = true reqwest.workspace = true rpassword.workspace = true -semver.workspace = true serde.workspace = true serde_json.workspace = true shell-words.workspace = true @@ -42,7 +39,6 @@ toml.workspace = true toml_edit.workspace = true tui-tree-widget = "0.13.0" unicode-width.workspace = true -uuid.workspace = true zeroize.workspace = true omicron-passwords.workspace = true diff --git a/wicket/src/cli/rack_setup/config_toml.rs b/wicket/src/cli/rack_setup/config_toml.rs index 6c1295f1f0..9b1a25a50e 100644 --- a/wicket/src/cli/rack_setup/config_toml.rs +++ b/wicket/src/cli/rack_setup/config_toml.rs @@ -274,6 +274,36 @@ fn populate_network_table( "port", Value::String(Formatted::new(p.port.to_string())), ); + if let Some(x) = p.hold_time { + peer.insert( + "hold_time", + Value::Integer(Formatted::new(x as i64)), + ); + } + if let Some(x) = p.connect_retry { + peer.insert( + "connect_retry", + Value::Integer(Formatted::new(x as i64)), + ); + } + if let Some(x) = p.delay_open { + peer.insert( + "delay_open", + Value::Integer(Formatted::new(x as i64)), + ); + } + if let Some(x) = p.idle_hold_time { + peer.insert( + "idle_hold_time", + Value::Integer(Formatted::new(x as i64)), + ); + } + if let Some(x) = p.keepalive { + peer.insert( + "keepalive", + Value::Integer(Formatted::new(x as i64)), + ); + } peers.push(Value::InlineTable(peer)); } uplink @@ -389,6 +419,11 @@ mod tests { asn: p.asn, port: p.port.clone(), addr: p.addr, + hold_time: p.hold_time, + connect_retry: p.connect_retry, + delay_open: p.delay_open, + idle_hold_time: p.idle_hold_time, + keepalive: p.keepalive, }) .collect(), port: config.port.clone(), @@ -486,6 +521,11 @@ mod tests { asn: 47, addr: "10.2.3.4".parse().unwrap(), port: "port0".into(), + hold_time: Some(6), + connect_retry: Some(3), + delay_open: Some(0), + idle_hold_time: Some(3), + keepalive: Some(2), }], uplink_port_speed: PortSpeed::Speed400G, uplink_port_fec: PortFec::Firecode, diff --git a/wicketd/src/rss_config.rs b/wicketd/src/rss_config.rs index a96acc56a0..0aaea427f3 100644 --- a/wicketd/src/rss_config.rs +++ b/wicketd/src/rss_config.rs @@ -521,6 +521,11 @@ fn validate_rack_network_config( addr: p.addr, asn: p.asn, port: p.port.clone(), + hold_time: p.hold_time, + connect_retry: p.connect_retry, + delay_open: p.delay_open, + idle_hold_time: p.idle_hold_time, + keepalive: p.keepalive, }) .collect(), switch: match config.switch { diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 1de7f68f2f..3161a67eed 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -71,6 +71,7 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.0.4" } +proc-macro2 = { version = "1.0.67" } rand = { version = "0.8.5", features = ["min_const_gen", "small_rng"] } rand_chacha = { version = "0.3.1" } regex = { version = "1.10.2" } @@ -166,6 +167,7 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.0.4" } +proc-macro2 = { version = "1.0.67" } rand = { version = "0.8.5", features = ["min_const_gen", "small_rng"] } rand_chacha = { version = "0.3.1" } regex = { version = "1.10.2" }