Skip to content

Commit

Permalink
[reconfigurator] Retrieve keeper node config information (#6650)
Browse files Browse the repository at this point in the history
## Overview

This commit implements a new clickhouse-admin endpoint to retrieve and
parse information from the keeper node configuration.

## Purpose

The main purpose of retrieving this information is to have the ability
to populate the inventory's `queried_keeper` in
`ClickhouseKeeperClusterMembership`.


https://github.com/oxidecomputer/omicron/blob/453311a880075b9f89626bb20cca1c1cd85ffb4f/nexus/types/src/inventory.rs#L499-L503

In a follow up PR an endpoint that specifically retrieves all
information to populate `ClickhouseKeeperClusterMembership`. This will
be done by making several calls to the `clickhouse keeper-client` and
using the parsing function here to populate `queried_keeper`.

The endpoint itself will be useful to retrieve information for
debugging.

## Manual testing

```console
$ cargo run --bin=clickhouse-admin -- run -c ./smf/clickhouse-admin/config.toml -a [::1]:8888 -l [::1]:20001 -b ./out/clickhouse/clickhouse
    Finished `dev` profile [unoptimized + debuginfo] target(s) in 0.43s
     Running `target/debug/clickhouse-admin run -c ./smf/clickhouse-admin/config.toml -a '[::1]:8888' -l '[::1]:20001' -b ./out/clickhouse/clickhouse`
note: configured to log to "/dev/stdout"
{"msg":"listening","v":0,"name":"clickhouse-admin","level":30,"time":"2024-09-24T23:15:39.529734Z","hostname":"ixchel","pid":61269,"local_addr":"[::1]:8888","component":"dropshot","file":"/Users/karcar/.cargo/git/checkouts/dropshot-a4a923d29dccc492/06c8dab/dropshot/src/server.rs:205"}
{"msg":"accepted connection","v":0,"name":"clickhouse-admin","level":30,"time":"2024-09-24T23:15:46.767686Z","hostname":"ixchel","pid":61269,"local_addr":"[::1]:8888","component":"dropshot","file":"/Users/karcar/.cargo/git/checkouts/dropshot-a4a923d29dccc492/06c8dab/dropshot/src/server.rs:775","remote_addr":"[::1]:57461"}
{"msg":"Retrieved data from `clickhouse keeper-client --q conf`","v":0,"name":"clickhouse-admin","level":30,"time":"2024-09-24T23:15:47.224265Z","hostname":"ixchel","pid":61269,"component":"ClickhouseCli","file":"clickhouse-admin/types/src/lib.rs:605","output":"\"server_id=1\\nenable_ipv6=true\\ntcp_port=20001\\nfour_letter_word_allow_list=conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs,ftfl\\nmax_requests_batch_size=100\\nmin_session_timeout_ms=10000\\nsession_timeout_ms=30000\\noperation_timeout_ms=10000\\ndead_session_check_period_ms=500\\nheart_beat_interval_ms=500\\nelection_timeout_lower_bound_ms=1000\\nelection_timeout_upper_bound_ms=2000\\nreserved_log_items=100000\\nsnapshot_distance=100000\\nauto_forwarding=true\\nshutdown_timeout=5000\\nstartup_timeout=180000\\nraft_logs_level=trace\\nsnapshots_to_keep=3\\nrotate_log_storage_interval=100000\\nstale_log_gap=10000\\nfresh_log_gap=200\\nmax_requests_batch_size=100\\nmax_requests_batch_bytes_size=102400\\nmax_request_queue_size=100000\\nmax_requests_quick_batch_size=100\\nquorum_reads=false\\nforce_sync=true\\ncompress_logs=true\\ncompress_snapshots_with_zstd_format=true\\nconfiguration_change_tries_count=20\\nraft_limits_reconnect_limit=50\\nlog_storage_path=./deployment/keeper-1/coordination/log\\nlog_storage_disk=LocalLogDisk\\nsnapshot_storage_path=./deployment/keeper-1/coordination/snapshots\\nsnapshot_storage_disk=LocalSnapshotDisk\\n\\n\""}
{"msg":"request completed","v":0,"name":"clickhouse-admin","level":30,"time":"2024-09-24T23:15:47.22448Z","hostname":"ixchel","pid":61269,"uri":"/keeper/conf","method":"GET","req_id":"847f0baa-3b16-4273-a84a-fcfd5acd6b49","remote_addr":"[::1]:57461","local_addr":"[::1]:8888","component":"dropshot","file":"/Users/karcar/.cargo/git/checkouts/dropshot-a4a923d29dccc492/06c8dab/dropshot/src/server.rs:914","latency_us":455407,"response_code":"200"}
```

```console
$ curl http://[::1]:8888/keeper/conf
{"server_id":1,"enable_ipv6":true,"tcp_port":20001,"four_letter_word_allow_list":"conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs,ftfl","max_requests_batch_size":100,"min_session_timeout_ms":10000,"session_timeout_ms":30000,"operation_timeout_ms":10000,"dead_session_check_period_ms":500,"heart_beat_interval_ms":500,"election_timeout_lower_bound_ms":1000,"election_timeout_upper_bound_ms":2000,"reserved_log_items":100000,"snapshot_distance":100000,"auto_forwarding":true,"shutdown_timeout":5000,"startup_timeout":180000,"raft_logs_level":"trace","snapshots_to_keep":3,"rotate_log_storage_interval":100000,"stale_log_gap":10000,"fresh_log_gap":200,"max_requests_batch_bytes_size":102400,"max_request_queue_size":100000,"max_requests_quick_batch_size":100,"quorum_reads":false,"force_sync":true,"compress_logs":true,"compress_snapshots_with_zstd_format":true,"configuration_change_tries_count":20,"raft_limits_reconnect_limit":50,"log_storage_path":"./deployment/keeper-1/coordination/log","log_storage_disk":"LocalLogDisk","snapshot_storage_path":"./deployment/keeper-1/coordination/snapshots","snapshot_storage_disk":"LocalSnapshotDisk"}
```

Related: #5999
  • Loading branch information
karencfv authored Sep 26, 2024
1 parent 80449b2 commit 90a8734
Show file tree
Hide file tree
Showing 9 changed files with 1,080 additions and 11 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 10 additions & 1 deletion clickhouse-admin/api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use clickhouse_admin_types::config::{KeeperConfig, ReplicaConfig};
use clickhouse_admin_types::{
KeeperSettings, Lgif, RaftConfig, ServerSettings,
KeeperConf, KeeperSettings, Lgif, RaftConfig, ServerSettings,
};
use dropshot::{
HttpError, HttpResponseCreated, HttpResponseOk, RequestContext, TypedBody,
Expand Down Expand Up @@ -75,4 +75,13 @@ pub trait ClickhouseAdminApi {
async fn raft_config(
rqctx: RequestContext<Self::Context>,
) -> Result<HttpResponseOk<RaftConfig>, HttpError>;

/// Retrieve configuration information from a keeper node.
#[endpoint {
method = GET,
path = "/keeper/conf",
}]
async fn keeper_conf(
rqctx: RequestContext<Self::Context>,
) -> Result<HttpResponseOk<KeeperConf>, HttpError>;
}
12 changes: 11 additions & 1 deletion clickhouse-admin/src/clickhouse_cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use anyhow::Result;
use camino::Utf8PathBuf;
use clickhouse_admin_types::{Lgif, RaftConfig};
use clickhouse_admin_types::{KeeperConf, Lgif, RaftConfig};
use dropshot::HttpError;
use illumos_utils::{output_to_exec_error, ExecutionError};
use slog::Logger;
Expand Down Expand Up @@ -92,6 +92,16 @@ impl ClickhouseCli {
.await
}

pub async fn keeper_conf(&self) -> Result<KeeperConf, ClickhouseCliError> {
self.keeper_client_non_interactive(
"conf",
"Retrieve keeper node configuration information",
KeeperConf::parse,
self.log.clone().unwrap(),
)
.await
}

async fn keeper_client_non_interactive<F, T>(
&self,
query: &str,
Expand Down
10 changes: 9 additions & 1 deletion clickhouse-admin/src/http_entrypoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
use crate::context::ServerContext;
use clickhouse_admin_api::*;
use clickhouse_admin_types::config::{KeeperConfig, ReplicaConfig};
use clickhouse_admin_types::{Lgif, RaftConfig};
use clickhouse_admin_types::{KeeperConf, Lgif, RaftConfig};
use dropshot::{
HttpError, HttpResponseCreated, HttpResponseOk, RequestContext, TypedBody,
};
Expand Down Expand Up @@ -63,4 +63,12 @@ impl ClickhouseAdminApi for ClickhouseAdminImpl {
let output = ctx.clickhouse_cli().raft_config().await?;
Ok(HttpResponseOk(output))
}

async fn keeper_conf(
rqctx: RequestContext<Self::Context>,
) -> Result<HttpResponseOk<KeeperConf>, HttpError> {
let ctx = rqctx.context();
let output = ctx.clickhouse_cli().keeper_conf().await?;
Ok(HttpResponseOk(output))
}
}
60 changes: 57 additions & 3 deletions clickhouse-admin/tests/integration_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use anyhow::Context;
use camino::Utf8PathBuf;
use clickhouse_admin_types::config::ClickhouseHost;
use clickhouse_admin_types::{KeeperServerInfo, KeeperServerType, RaftConfig};
use clickward::{BasePorts, Deployment, DeploymentConfig, KeeperId};
use clickward::{BasePorts, Deployment, DeploymentConfig};
use dropshot::test_util::log_prefix_for_test;
use omicron_clickhouse_admin::ClickhouseCli;
use omicron_test_utils::dev::test_setup_log;
Expand Down Expand Up @@ -51,7 +51,7 @@ async fn test_lgif_parsing() -> anyhow::Result<()> {
.context("failed to generate config")?;
deployment.deploy().context("failed to deploy")?;

wait_for_keepers(&log, &deployment, vec![KeeperId(1)]).await?;
wait_for_keepers(&log, &deployment, vec![clickward::KeeperId(1)]).await?;

let clickhouse_cli = ClickhouseCli::new(
Utf8PathBuf::from_str("clickhouse").unwrap(),
Expand Down Expand Up @@ -108,7 +108,7 @@ async fn test_raft_config_parsing() -> anyhow::Result<()> {
wait_for_keepers(
&log,
&deployment,
(1..=num_keepers).map(KeeperId).collect(),
(1..=num_keepers).map(clickward::KeeperId).collect(),
)
.await?;

Expand Down Expand Up @@ -143,3 +143,57 @@ async fn test_raft_config_parsing() -> anyhow::Result<()> {
logctx.cleanup_successful();
Ok(())
}

#[tokio::test]
async fn test_keeper_conf_parsing() -> anyhow::Result<()> {
let logctx = test_setup_log("test_keeper_conf_parsing");
let log = logctx.log.clone();

let (parent_dir, prefix) = log_prefix_for_test(logctx.test_name());
let path = parent_dir.join(format!("{prefix}-oximeter-clickward-test"));
std::fs::create_dir(&path)?;

// We spin up several replicated clusters and must use a
// separate set of ports in case the tests run concurrently.
let base_ports = BasePorts {
keeper: 49000,
raft: 49100,
clickhouse_tcp: 49200,
clickhouse_http: 49300,
clickhouse_interserver_http: 49400,
};

let config = DeploymentConfig {
path: path.clone(),
base_ports,
cluster_name: "oximeter_cluster".to_string(),
};

let mut deployment = Deployment::new(config);

// We only need a single keeper to test the conf command
let num_keepers = 1;
let num_replicas = 1;
deployment
.generate_config(num_keepers, num_replicas)
.context("failed to generate config")?;
deployment.deploy().context("failed to deploy")?;

wait_for_keepers(&log, &deployment, vec![clickward::KeeperId(1)]).await?;

let clickhouse_cli = ClickhouseCli::new(
Utf8PathBuf::from_str("clickhouse").unwrap(),
SocketAddrV6::new(Ipv6Addr::LOCALHOST, 49001, 0, 0),
)
.with_log(log.clone());

let conf = clickhouse_cli.keeper_conf().await.unwrap();

assert_eq!(conf.server_id, clickhouse_admin_types::KeeperId(1));

info!(&log, "Cleaning up test");
deployment.teardown()?;
std::fs::remove_dir_all(path)?;
logctx.cleanup_successful();
Ok(())
}
1 change: 1 addition & 0 deletions clickhouse-admin/types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ atomicwrites.workspace = true
camino.workspace = true
camino-tempfile.workspace = true
derive_more.workspace = true
itertools.workspace = true
omicron-common.workspace = true
omicron-workspace-hack.workspace = true
schemars.workspace = true
Expand Down
14 changes: 14 additions & 0 deletions clickhouse-admin/types/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -606,3 +606,17 @@ impl Display for LogLevel {
write!(f, "{s}")
}
}

impl FromStr for LogLevel {
type Err = Error;

fn from_str(s: &str) -> Result<Self, Self::Err> {
if s == "trace" {
Ok(LogLevel::Trace)
} else if s == "debug" {
Ok(LogLevel::Debug)
} else {
bail!("{s} is not a valid log level")
}
}
}
Loading

0 comments on commit 90a8734

Please sign in to comment.