From b6bf22f9a97b223419048c869337b631f0ab4262 Mon Sep 17 00:00:00 2001 From: Chris Capurso <1036769+ccapurso@users.noreply.github.com> Date: Fri, 6 Jan 2023 17:06:54 -0500 Subject: [PATCH] VAULT-11829: Add cluster status handler (#18351) * go get link proto @vault-11829-meta-get-cluster-status * add HA status * add HAEnabled method * add raft config * allocate HA nodes based on actual count * add raft autopilot status * add raft quorum warnings * add ClusterID method * add StorageType * add ClusterID * update github.com/hashicorp/vault/vault/hcp_link/proto * add changelog entry * fix raft config panic * remove "Warning" quorum message prefix * add error wrapping * add Core.HAStateWithLock method * reduce quorum warnings to single string * fix HCP_API_HOST test env var check * Revert "fix HCP_API_HOST test env var check" This reverts commit 97c73c4798b77b84aea84f341f2c63c4d657914d. --- changelog/18351.txt | 3 + go.mod | 2 +- go.sum | 10 ++ vault/cluster.go | 4 + vault/core.go | 30 +++++ vault/hcp_link/capabilities/meta/meta.go | 122 ++++++++++++++++++++- vault/hcp_link/internal/wrapped_hcpLink.go | 12 +- vault/request_handling.go | 4 +- 8 files changed, 177 insertions(+), 10 deletions(-) create mode 100644 changelog/18351.txt diff --git a/changelog/18351.txt b/changelog/18351.txt new file mode 100644 index 000000000000..07faa06d1356 --- /dev/null +++ b/changelog/18351.txt @@ -0,0 +1,3 @@ +```release-note:improvement +hcp/status: Add cluster-level status information +``` diff --git a/go.mod b/go.mod index ba7446dae95c..e7df61cc8d8b 100644 --- a/go.mod +++ b/go.mod @@ -138,7 +138,7 @@ require ( github.com/hashicorp/vault/api/auth/approle v0.1.0 github.com/hashicorp/vault/api/auth/userpass v0.1.0 github.com/hashicorp/vault/sdk v0.6.1 - github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221209165735-a2eed407e08d + github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106203127-9eaf26716342 github.com/influxdata/influxdb1-client v0.0.0-20200827194710-b269163b24ab github.com/jackc/pgx/v4 v4.15.0 github.com/jcmturner/gokrb5/v8 v8.4.2 diff --git a/go.sum b/go.sum index 353496b0d56f..c112eea1fcd8 100644 --- a/go.sum +++ b/go.sum @@ -1173,6 +1173,16 @@ github.com/hashicorp/vault-testing-stepwise v0.1.2 h1:3obC/ziAPGnsz2IQxr5e4Ayb7t github.com/hashicorp/vault-testing-stepwise v0.1.2/go.mod h1:TeU6B+5NqxUjto+Zey+QQEH1iywuHn0ciHZNYh4q3uI= github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221209165735-a2eed407e08d h1:U692VbDl6ww5GQsNFClJVFJDaPeuqtDt1Mwqf21KYek= github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221209165735-a2eed407e08d/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU= +github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221213220056-b0613b59f419 h1:yl6f//YTaTTGKJwyOpRe7v1DDPrzP+NErwgnef6qx7A= +github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221213220056-b0613b59f419/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU= +github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230103211812-c28545e74f94 h1:Rx4Q2/mOPqJuanzwZYttDkWjdibPv3UpvsvKmOkl6h4= +github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230103211812-c28545e74f94/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU= +github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230105183308-048241517ffb h1:PgXcBszV61BvxD0wZzm4QCz9btgTWX74NO4be6S2afU= +github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230105183308-048241517ffb/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU= +github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106184443-96cfe11e7051 h1:cMQoRbIUMhbM0NsmP6hH3S3ZmAPVgic3g3L8Z55rXCI= +github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106184443-96cfe11e7051/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU= +github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106203127-9eaf26716342 h1:9cMwZnaAV/lKs8EZsvBF00wPt350wD3sg/xqWGeN4gM= +github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106203127-9eaf26716342/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU= github.com/hashicorp/vic v1.5.1-0.20190403131502-bbfe86ec9443 h1:O/pT5C1Q3mVXMyuqg7yuAWUg/jMZR1/0QTzTRdNR6Uw= github.com/hashicorp/vic v1.5.1-0.20190403131502-bbfe86ec9443/go.mod h1:bEpDU35nTu0ey1EXjwNwPjI9xErAsoOCmcMb9GKvyxo= github.com/hashicorp/yamux v0.0.0-20180604194846-3520598351bb/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM= diff --git a/vault/cluster.go b/vault/cluster.go index bc1d352e5337..28060721bb62 100644 --- a/vault/cluster.go +++ b/vault/cluster.go @@ -388,3 +388,7 @@ func (c *Core) SetClusterListenerAddrs(addrs []*net.TCPAddr) { func (c *Core) SetClusterHandler(handler http.Handler) { c.clusterHandler = handler } + +func (c *Core) ClusterID() string { + return c.clusterID.Load() +} diff --git a/vault/core.go b/vault/core.go index a37e23d6f0a7..bcfec5baa1d3 100644 --- a/vault/core.go +++ b/vault/core.go @@ -688,6 +688,13 @@ func (c *Core) HAState() consts.HAState { } } +func (c *Core) HAStateWithLock() consts.HAState { + c.stateLock.RLock() + c.stateLock.RUnlock() + + return c.HAState() +} + // CoreConfig is used to parameterize a core type CoreConfig struct { entCoreConfig @@ -3699,3 +3706,26 @@ func (c *Core) GetHCPLinkStatus() (string, string) { return status, resourceID } + +func (c *Core) HAEnabled() bool { + return c.ha != nil && c.ha.HAEnabled() +} + +func (c *Core) GetRaftConfiguration(ctx context.Context) (*raft.RaftConfigurationResponse, error) { + raftBackend := c.getRaftBackend() + + if raftBackend == nil { + return nil, nil + } + + return raftBackend.GetConfiguration(ctx) +} + +func (c *Core) GetRaftAutopilotState(ctx context.Context) (*raft.AutopilotState, error) { + raftBackend := c.getRaftBackend() + if raftBackend == nil { + return nil, nil + } + + return raftBackend.GetAutopilotServerState(ctx) +} diff --git a/vault/hcp_link/capabilities/meta/meta.go b/vault/hcp_link/capabilities/meta/meta.go index a0b5f2db2b11..c96f7cb602ea 100644 --- a/vault/hcp_link/capabilities/meta/meta.go +++ b/vault/hcp_link/capabilities/meta/meta.go @@ -4,12 +4,14 @@ import ( "context" "fmt" "math" + "os" "sync" "time" "github.com/hashicorp/go-hclog" scada "github.com/hashicorp/hcp-scada-provider" "github.com/hashicorp/vault/helper/namespace" + "github.com/hashicorp/vault/sdk/helper/consts" "github.com/hashicorp/vault/vault" "github.com/hashicorp/vault/vault/cluster" "github.com/hashicorp/vault/vault/hcp_link/capabilities" @@ -23,7 +25,7 @@ import ( type hcpLinkMetaHandler struct { meta.UnimplementedHCPLinkMetaServer - wrappedCore internal.WrappedCoreListNamespacesMounts + wrappedCore internal.WrappedCoreMeta scadaProvider scada.SCADAProvider logger hclog.Logger @@ -129,7 +131,7 @@ func (h *hcpLinkMetaHandler) ListNamespaces(ctx context.Context, req *meta.ListN func (h *hcpLinkMetaHandler) ListMounts(ctx context.Context, req *meta.ListMountsRequest) (*meta.ListMountsResponse, error) { mountEntries, err := h.wrappedCore.ListMounts() if err != nil { - return nil, err + return nil, fmt.Errorf("unable to list secret mounts: %w", err) } var mounts []*meta.Mount @@ -140,7 +142,7 @@ func (h *hcpLinkMetaHandler) ListMounts(ctx context.Context, req *meta.ListMount if nsID != namespace.RootNamespaceID { ns, err := h.wrappedCore.NamespaceByID(ctx, entry.NamespaceID) if err != nil { - return nil, err + return nil, fmt.Errorf("unable to get namespace associated with secret mount: %w", err) } path = ns.Path + path @@ -161,7 +163,7 @@ func (h *hcpLinkMetaHandler) ListMounts(ctx context.Context, req *meta.ListMount func (h *hcpLinkMetaHandler) ListAuths(ctx context.Context, req *meta.ListAuthsRequest) (*meta.ListAuthResponse, error) { authEntries, err := h.wrappedCore.ListAuths() if err != nil { - return nil, err + return nil, fmt.Errorf("unable to list auth mounts: %w", err) } var auths []*meta.Auth @@ -172,7 +174,7 @@ func (h *hcpLinkMetaHandler) ListAuths(ctx context.Context, req *meta.ListAuthsR if nsID != namespace.RootNamespaceID { ns, err := h.wrappedCore.NamespaceByID(ctx, entry.NamespaceID) if err != nil { - return nil, err + return nil, fmt.Errorf("unable to get namespace associated with auth mount: %w", err) } path = ns.Path + path @@ -189,3 +191,113 @@ func (h *hcpLinkMetaHandler) ListAuths(ctx context.Context, req *meta.ListAuthsR Auths: auths, }, nil } + +func (h *hcpLinkMetaHandler) GetClusterStatus(ctx context.Context, req *meta.GetClusterStatusRequest) (*meta.GetClusterStatusResponse, error) { + if h.wrappedCore.HAStateWithLock() != consts.Active { + return nil, fmt.Errorf("node not active") + } + + hostname, err := os.Hostname() + if err != nil { + return nil, fmt.Errorf("unable to fetch hostname: %w", err) + } + + haEnabled := h.wrappedCore.HAEnabled() + haStatus := &meta.HAStatus{ + Enabled: haEnabled, + } + + if haEnabled { + leader := &meta.HANode{ + Hostname: hostname, + } + + peers := h.wrappedCore.GetHAPeerNodesCached() + + haNodes := make([]*meta.HANode, len(peers)+1) + haNodes[0] = leader + + for i, peerNode := range peers { + haNodes[i+1] = &meta.HANode{ + Hostname: peerNode.Hostname, + } + } + + haStatus.Nodes = haNodes + } + + raftStatus := &meta.RaftStatus{} + raftConfig, err := h.wrappedCore.GetRaftConfiguration(ctx) + if err != nil { + return nil, fmt.Errorf("unable to get Raft configuration: %w", err) + } + + if raftConfig != nil { + raftServers := make([]*meta.RaftServer, len(raftConfig.Servers)) + + var voterCount uint32 + for i, srv := range raftConfig.Servers { + raftServers[i] = &meta.RaftServer{ + NodeID: srv.NodeID, + Address: srv.Address, + Voter: srv.Voter, + Leader: srv.Leader, + ProtocolVersion: srv.ProtocolVersion, + } + + if srv.Voter { + voterCount++ + } + } + + raftStatus.RaftConfiguration = &meta.RaftConfiguration{ + Servers: raftServers, + } + + evenVoterMessage := "Vault should have access to an odd number of voter nodes." + largeClusterMessage := "Very large cluster detected." + var quorumWarning string + + if voterCount == 1 { + quorumWarning = "Only one server node found. Vault is not running in high availability mode." + } else if voterCount%2 == 0 && voterCount > 7 { + quorumWarning = evenVoterMessage + " " + largeClusterMessage + } else if voterCount%2 == 0 { + quorumWarning = evenVoterMessage + } else if voterCount > 7 { + quorumWarning = largeClusterMessage + } + + raftStatus.QuorumWarning = quorumWarning + } + + raftAutopilotState, err := h.wrappedCore.GetRaftAutopilotState(ctx) + if err != nil { + return nil, fmt.Errorf("unable to get Raft Autopilot state: %w", err) + } + + if raftAutopilotState != nil { + autopilotStatus := &meta.AutopilotStatus{ + Healthy: raftAutopilotState.Healthy, + } + + autopilotServers := make([]*meta.AutopilotServer, 0) + for _, srv := range raftAutopilotState.Servers { + autopilotServers = append(autopilotServers, &meta.AutopilotServer{ + ID: srv.ID, + Healthy: srv.Healthy, + }) + } + + raftStatus.AutopilotStatus = autopilotStatus + } + + resp := &meta.GetClusterStatusResponse{ + ClusterID: h.wrappedCore.ClusterID(), + HAStatus: haStatus, + RaftStatus: raftStatus, + StorageType: h.wrappedCore.StorageType(), + } + + return resp, nil +} diff --git a/vault/hcp_link/internal/wrapped_hcpLink.go b/vault/hcp_link/internal/wrapped_hcpLink.go index 0bc9717bc8dd..98b0bf4e0564 100644 --- a/vault/hcp_link/internal/wrapped_hcpLink.go +++ b/vault/hcp_link/internal/wrapped_hcpLink.go @@ -4,6 +4,7 @@ import ( "context" "github.com/hashicorp/vault/helper/namespace" + "github.com/hashicorp/vault/physical/raft" "github.com/hashicorp/vault/sdk/helper/consts" "github.com/hashicorp/vault/sdk/logical" "github.com/hashicorp/vault/vault" @@ -30,14 +31,21 @@ type WrappedCoreHCPToken interface { var _ WrappedCoreHCPToken = &vault.Core{} -type WrappedCoreListNamespacesMounts interface { +type WrappedCoreMeta interface { NamespaceByID(ctx context.Context, nsID string) (*namespace.Namespace, error) ListNamespaces(includePath bool) []*namespace.Namespace ListMounts() ([]*vault.MountEntry, error) ListAuths() ([]*vault.MountEntry, error) + HAEnabled() bool + HAStateWithLock() consts.HAState + GetHAPeerNodesCached() []vault.PeerNode + GetRaftConfiguration(ctx context.Context) (*raft.RaftConfigurationResponse, error) + GetRaftAutopilotState(ctx context.Context) (*raft.AutopilotState, error) + StorageType() string + ClusterID() string } -var _ WrappedCoreListNamespacesMounts = &vault.Core{} +var _ WrappedCoreMeta = &vault.Core{} type WrappedCoreHCPLinkStatus interface { WrappedCoreStandbyStates diff --git a/vault/request_handling.go b/vault/request_handling.go index 6703d3a535ec..3183a02733c8 100644 --- a/vault/request_handling.go +++ b/vault/request_handling.go @@ -796,7 +796,7 @@ func (c *Core) handleCancelableRequest(ctx context.Context, req *logical.Request } if walState.LocalIndex != 0 || walState.ReplicatedIndex != 0 { - walState.ClusterID = c.clusterID.Load() + walState.ClusterID = c.ClusterID() if walState.LocalIndex == 0 { if c.perfStandby { walState.LocalIndex = LastRemoteWAL(c) @@ -2343,7 +2343,7 @@ func (c *Core) checkSSCTokenInternal(ctx context.Context, token string, isPerfSt return plainToken.Random, nil } - requiredWalState := &logical.WALState{ClusterID: c.clusterID.Load(), LocalIndex: plainToken.LocalIndex, ReplicatedIndex: 0} + requiredWalState := &logical.WALState{ClusterID: c.ClusterID(), LocalIndex: plainToken.LocalIndex, ReplicatedIndex: 0} if c.HasWALState(requiredWalState, isPerfStandby) { return plainToken.Random, nil }