Skip to content

Commit

Permalink
[reconfigurator] Endpoint to retrieve keeper cluster membership (#6758)
Browse files Browse the repository at this point in the history
## Overview

This commit implements a new clickhouse-admin endpoint to retrieve
information from the keeper cluster. It returns
`ClickhouseKeeperClusterMembership`

## Manual testing

```console
$ curl http://[::1]:8888/keeper/cluster-membership
{"queried_keeper":2,"leader_committed_log_index":38875,"raft_config":[1,2,3]}
```
  • Loading branch information
karencfv authored Oct 2, 2024
1 parent 03c4f24 commit abac284
Show file tree
Hide file tree
Showing 8 changed files with 204 additions and 21 deletions.
12 changes: 11 additions & 1 deletion clickhouse-admin/api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

use clickhouse_admin_types::config::{KeeperConfig, ReplicaConfig};
use clickhouse_admin_types::{
KeeperConf, KeeperSettings, Lgif, RaftConfig, ServerSettings,
ClickhouseKeeperClusterMembership, KeeperConf, KeeperSettings, Lgif,
RaftConfig, ServerSettings,
};
use dropshot::{
HttpError, HttpResponseCreated, HttpResponseOk, RequestContext, TypedBody,
Expand Down Expand Up @@ -84,4 +85,13 @@ pub trait ClickhouseAdminApi {
async fn keeper_conf(
rqctx: RequestContext<Self::Context>,
) -> Result<HttpResponseOk<KeeperConf>, HttpError>;

/// Retrieve cluster membership information from a keeper node.
#[endpoint {
method = GET,
path = "/keeper/cluster-membership",
}]
async fn keeper_cluster_membership(
rqctx: RequestContext<Self::Context>,
) -> Result<HttpResponseOk<ClickhouseKeeperClusterMembership>, HttpError>;
}
21 changes: 20 additions & 1 deletion clickhouse-admin/src/clickhouse_cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@

use anyhow::Result;
use camino::Utf8PathBuf;
use clickhouse_admin_types::{KeeperConf, Lgif, RaftConfig};
use clickhouse_admin_types::{
ClickhouseKeeperClusterMembership, KeeperConf, KeeperId, Lgif, RaftConfig,
};
use dropshot::HttpError;
use illumos_utils::{output_to_exec_error, ExecutionError};
use slog::Logger;
use slog_error_chain::{InlineErrorChain, SlogInlineError};
use std::collections::BTreeSet;
use std::ffi::OsStr;
use std::io;
use std::net::SocketAddrV6;
Expand Down Expand Up @@ -102,6 +105,22 @@ impl ClickhouseCli {
.await
}

pub async fn keeper_cluster_membership(
&self,
) -> Result<ClickhouseKeeperClusterMembership, ClickhouseCliError> {
let lgif_output = self.lgif().await?;
let conf_output = self.keeper_conf().await?;
let raft_output = self.raft_config().await?;
let raft_config: BTreeSet<KeeperId> =
raft_output.keeper_servers.iter().map(|s| s.server_id).collect();

Ok(ClickhouseKeeperClusterMembership {
queried_keeper: conf_output.server_id,
leader_committed_log_index: lgif_output.leader_committed_log_idx,
raft_config,
})
}

async fn keeper_client_non_interactive<F, T>(
&self,
query: &str,
Expand Down
13 changes: 12 additions & 1 deletion clickhouse-admin/src/http_entrypoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
use crate::context::ServerContext;
use clickhouse_admin_api::*;
use clickhouse_admin_types::config::{KeeperConfig, ReplicaConfig};
use clickhouse_admin_types::{KeeperConf, Lgif, RaftConfig};
use clickhouse_admin_types::{
ClickhouseKeeperClusterMembership, KeeperConf, Lgif, RaftConfig,
};
use dropshot::{
HttpError, HttpResponseCreated, HttpResponseOk, RequestContext, TypedBody,
};
Expand Down Expand Up @@ -71,4 +73,13 @@ impl ClickhouseAdminApi for ClickhouseAdminImpl {
let output = ctx.clickhouse_cli().keeper_conf().await?;
Ok(HttpResponseOk(output))
}

async fn keeper_cluster_membership(
rqctx: RequestContext<Self::Context>,
) -> Result<HttpResponseOk<ClickhouseKeeperClusterMembership>, HttpError>
{
let ctx = rqctx.context();
let output = ctx.clickhouse_cli().keeper_cluster_membership().await?;
Ok(HttpResponseOk(output))
}
}
85 changes: 84 additions & 1 deletion clickhouse-admin/tests/integration_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
use anyhow::Context;
use camino::Utf8PathBuf;
use clickhouse_admin_types::config::ClickhouseHost;
use clickhouse_admin_types::{KeeperServerInfo, KeeperServerType, RaftConfig};
use clickhouse_admin_types::{
ClickhouseKeeperClusterMembership, KeeperId, KeeperServerInfo,
KeeperServerType, RaftConfig,
};
use clickward::{BasePorts, Deployment, DeploymentConfig};
use dropshot::test_util::log_prefix_for_test;
use omicron_clickhouse_admin::ClickhouseCli;
Expand Down Expand Up @@ -197,3 +200,83 @@ async fn test_keeper_conf_parsing() -> anyhow::Result<()> {
logctx.cleanup_successful();
Ok(())
}

#[tokio::test]
async fn test_keeper_cluster_membership() -> anyhow::Result<()> {
let logctx = test_setup_log("test_keeper_cluster_membership");
let log = logctx.log.clone();

let (parent_dir, prefix) = log_prefix_for_test(logctx.test_name());
let path = parent_dir.join(format!("{prefix}-oximeter-clickward-test"));
std::fs::create_dir(&path)?;

// We spin up several replicated clusters and must use a
// separate set of ports in case the tests run concurrently.
let base_ports = BasePorts {
keeper: 30500,
raft: 30600,
clickhouse_tcp: 30700,
clickhouse_http: 30800,
clickhouse_interserver_http: 30900,
};

let config = DeploymentConfig {
path: path.clone(),
base_ports,
cluster_name: "oximeter_cluster".to_string(),
};

let mut deployment = Deployment::new(config);

let num_keepers = 3;
let num_replicas = 1;
deployment
.generate_config(num_keepers, num_replicas)
.context("failed to generate config")?;
deployment.deploy().context("failed to deploy")?;

wait_for_keepers(
&log,
&deployment,
(1..=num_keepers).map(clickward::KeeperId).collect(),
)
.await?;

let clickhouse_cli = ClickhouseCli::new(
Utf8PathBuf::from_str("clickhouse").unwrap(),
SocketAddrV6::new(Ipv6Addr::LOCALHOST, 30501, 0, 0),
)
.with_log(log.clone());

let keeper_cluster_membership =
clickhouse_cli.keeper_cluster_membership().await.unwrap();

let mut raft_config = BTreeSet::new();

for i in 1..=num_keepers {
raft_config.insert(clickhouse_admin_types::KeeperId(i));
}

let expected_keeper_cluster_membership =
ClickhouseKeeperClusterMembership {
queried_keeper: KeeperId(1),
// This number is always different so we won't be testing it
leader_committed_log_index: 0,
raft_config,
};

assert_eq!(
keeper_cluster_membership.queried_keeper,
expected_keeper_cluster_membership.queried_keeper
);
assert_eq!(
keeper_cluster_membership.raft_config,
expected_keeper_cluster_membership.raft_config
);

info!(&log, "Cleaning up test");
deployment.teardown()?;
std::fs::remove_dir_all(path)?;
logctx.cleanup_successful();
Ok(())
}
18 changes: 18 additions & 0 deletions clickhouse-admin/types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,24 @@ impl KeeperConf {
}
}

/// The configuration of the clickhouse keeper raft cluster returned from a
/// single keeper node
///
/// Each keeper is asked for its known raft configuration via `clickhouse-admin`
/// dropshot servers running in `ClickhouseKeeper` zones. state. We include the
/// leader committed log index known to the current keeper node (whether or not
/// it is the leader) to determine which configuration is newest.
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub struct ClickhouseKeeperClusterMembership {
/// Keeper ID of the keeper being queried
pub queried_keeper: KeeperId,
/// Index of the last committed log entry from the leader's perspective
pub leader_committed_log_index: u64,
/// Keeper IDs of all keepers in the cluster
pub raft_config: BTreeSet<KeeperId>,
}

#[cfg(test)]
mod tests {
use camino::Utf8PathBuf;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@
//! A mechanism for allocating clickhouse keeper and server nodes for clustered
//! clickhouse setups during blueprint planning
use clickhouse_admin_types::KeeperId;
use clickhouse_admin_types::{ClickhouseKeeperClusterMembership, KeeperId};
use nexus_types::deployment::{
Blueprint, BlueprintZoneFilter, BlueprintZoneType, BlueprintZonesConfig,
ClickhouseClusterConfig,
};
use nexus_types::inventory::ClickhouseKeeperClusterMembership;
use omicron_uuid_kinds::{OmicronZoneUuid, SledUuid};
use slog::{error, Logger};
use std::collections::{BTreeMap, BTreeSet};
Expand Down
16 changes: 1 addition & 15 deletions nexus/types/src/inventory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::external_api::params::PhysicalDiskKind;
use crate::external_api::params::UninitializedSledId;
use chrono::DateTime;
use chrono::Utc;
use clickhouse_admin_types::KeeperId;
use clickhouse_admin_types::ClickhouseKeeperClusterMembership;
pub use gateway_client::types::PowerState;
pub use gateway_client::types::RotImageError;
pub use gateway_client::types::RotSlot;
Expand Down Expand Up @@ -512,17 +512,3 @@ pub struct OmicronZonesFound {
pub sled_id: SledUuid,
pub zones: OmicronZonesConfig,
}

/// The configuration of the clickhouse keeper raft cluster returned from a
/// single keeper node
///
/// Each keeper is asked for its known raft configuration via `clickhouse-admin`
/// dropshot servers running in `ClickhouseKeeper` zones. state. We include the
/// leader committed log index known to the current keeper node (whether or not
/// it is the leader) to determine which configuration is newest.
#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)]
pub struct ClickhouseKeeperClusterMembership {
pub queried_keeper: KeeperId,
pub leader_committed_log_index: u64,
pub raft_config: BTreeSet<KeeperId>,
}
57 changes: 57 additions & 0 deletions openapi/clickhouse-admin.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,30 @@
"version": "0.0.1"
},
"paths": {
"/keeper/cluster-membership": {
"get": {
"summary": "Retrieve cluster membership information from a keeper node.",
"operationId": "keeper_cluster_membership",
"responses": {
"200": {
"description": "successful operation",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ClickhouseKeeperClusterMembership"
}
}
}
},
"4XX": {
"$ref": "#/components/responses/Error"
},
"5XX": {
"$ref": "#/components/responses/Error"
}
}
}
},
"/keeper/conf": {
"get": {
"summary": "Retrieve configuration information from a keeper node.",
Expand Down Expand Up @@ -199,6 +223,39 @@
}
]
},
"ClickhouseKeeperClusterMembership": {
"description": "The configuration of the clickhouse keeper raft cluster returned from a single keeper node\n\nEach keeper is asked for its known raft configuration via `clickhouse-admin` dropshot servers running in `ClickhouseKeeper` zones. state. We include the leader committed log index known to the current keeper node (whether or not it is the leader) to determine which configuration is newest.",
"type": "object",
"properties": {
"leader_committed_log_index": {
"description": "Index of the last committed log entry from the leader's perspective",
"type": "integer",
"format": "uint64",
"minimum": 0
},
"queried_keeper": {
"description": "Keeper ID of the keeper being queried",
"allOf": [
{
"$ref": "#/components/schemas/KeeperId"
}
]
},
"raft_config": {
"description": "Keeper IDs of all keepers in the cluster",
"type": "array",
"items": {
"$ref": "#/components/schemas/KeeperId"
},
"uniqueItems": true
}
},
"required": [
"leader_committed_log_index",
"queried_keeper",
"raft_config"
]
},
"Error": {
"description": "Error information from a response.",
"type": "object",
Expand Down

0 comments on commit abac284

Please sign in to comment.