Skip to content

Commit

Permalink
VAULT-11829: Add cluster status handler (#18351)
Browse files Browse the repository at this point in the history
* go get link proto @vault-11829-meta-get-cluster-status

* add HA status

* add HAEnabled method

* add raft config

* allocate HA nodes based on actual count

* add raft autopilot status

* add raft quorum warnings

* add ClusterID method

* add StorageType

* add ClusterID

* update github.com/hashicorp/vault/vault/hcp_link/proto

* add changelog entry

* fix raft config panic

* remove "Warning" quorum message prefix

* add error wrapping

* add Core.HAStateWithLock method

* reduce quorum warnings to single string

* fix HCP_API_HOST test env var check

* Revert "fix HCP_API_HOST test env var check"

This reverts commit 97c73c4.
  • Loading branch information
ccapurso authored and AnPucel committed Jan 14, 2023
1 parent 8237243 commit b6bf22f
Show file tree
Hide file tree
Showing 8 changed files with 177 additions and 10 deletions.
3 changes: 3 additions & 0 deletions changelog/18351.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
hcp/status: Add cluster-level status information
```
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ require (
github.com/hashicorp/vault/api/auth/approle v0.1.0
github.com/hashicorp/vault/api/auth/userpass v0.1.0
github.com/hashicorp/vault/sdk v0.6.1
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221209165735-a2eed407e08d
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106203127-9eaf26716342
github.com/influxdata/influxdb1-client v0.0.0-20200827194710-b269163b24ab
github.com/jackc/pgx/v4 v4.15.0
github.com/jcmturner/gokrb5/v8 v8.4.2
Expand Down
10 changes: 10 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1173,6 +1173,16 @@ github.com/hashicorp/vault-testing-stepwise v0.1.2 h1:3obC/ziAPGnsz2IQxr5e4Ayb7t
github.com/hashicorp/vault-testing-stepwise v0.1.2/go.mod h1:TeU6B+5NqxUjto+Zey+QQEH1iywuHn0ciHZNYh4q3uI=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221209165735-a2eed407e08d h1:U692VbDl6ww5GQsNFClJVFJDaPeuqtDt1Mwqf21KYek=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221209165735-a2eed407e08d/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221213220056-b0613b59f419 h1:yl6f//YTaTTGKJwyOpRe7v1DDPrzP+NErwgnef6qx7A=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20221213220056-b0613b59f419/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230103211812-c28545e74f94 h1:Rx4Q2/mOPqJuanzwZYttDkWjdibPv3UpvsvKmOkl6h4=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230103211812-c28545e74f94/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230105183308-048241517ffb h1:PgXcBszV61BvxD0wZzm4QCz9btgTWX74NO4be6S2afU=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230105183308-048241517ffb/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106184443-96cfe11e7051 h1:cMQoRbIUMhbM0NsmP6hH3S3ZmAPVgic3g3L8Z55rXCI=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106184443-96cfe11e7051/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106203127-9eaf26716342 h1:9cMwZnaAV/lKs8EZsvBF00wPt350wD3sg/xqWGeN4gM=
github.com/hashicorp/vault/vault/hcp_link/proto v0.0.0-20230106203127-9eaf26716342/go.mod h1:a2crHoMWwY6aiL8GWT8hYj7vKD64uX0EdRPbnsHF5wU=
github.com/hashicorp/vic v1.5.1-0.20190403131502-bbfe86ec9443 h1:O/pT5C1Q3mVXMyuqg7yuAWUg/jMZR1/0QTzTRdNR6Uw=
github.com/hashicorp/vic v1.5.1-0.20190403131502-bbfe86ec9443/go.mod h1:bEpDU35nTu0ey1EXjwNwPjI9xErAsoOCmcMb9GKvyxo=
github.com/hashicorp/yamux v0.0.0-20180604194846-3520598351bb/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM=
Expand Down
4 changes: 4 additions & 0 deletions vault/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,3 +388,7 @@ func (c *Core) SetClusterListenerAddrs(addrs []*net.TCPAddr) {
func (c *Core) SetClusterHandler(handler http.Handler) {
c.clusterHandler = handler
}

func (c *Core) ClusterID() string {
return c.clusterID.Load()
}
30 changes: 30 additions & 0 deletions vault/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,13 @@ func (c *Core) HAState() consts.HAState {
}
}

func (c *Core) HAStateWithLock() consts.HAState {
c.stateLock.RLock()
c.stateLock.RUnlock()

return c.HAState()
}

// CoreConfig is used to parameterize a core
type CoreConfig struct {
entCoreConfig
Expand Down Expand Up @@ -3699,3 +3706,26 @@ func (c *Core) GetHCPLinkStatus() (string, string) {

return status, resourceID
}

func (c *Core) HAEnabled() bool {
return c.ha != nil && c.ha.HAEnabled()
}

func (c *Core) GetRaftConfiguration(ctx context.Context) (*raft.RaftConfigurationResponse, error) {
raftBackend := c.getRaftBackend()

if raftBackend == nil {
return nil, nil
}

return raftBackend.GetConfiguration(ctx)
}

func (c *Core) GetRaftAutopilotState(ctx context.Context) (*raft.AutopilotState, error) {
raftBackend := c.getRaftBackend()
if raftBackend == nil {
return nil, nil
}

return raftBackend.GetAutopilotServerState(ctx)
}
122 changes: 117 additions & 5 deletions vault/hcp_link/capabilities/meta/meta.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@ import (
"context"
"fmt"
"math"
"os"
"sync"
"time"

"github.com/hashicorp/go-hclog"
scada "github.com/hashicorp/hcp-scada-provider"
"github.com/hashicorp/vault/helper/namespace"
"github.com/hashicorp/vault/sdk/helper/consts"
"github.com/hashicorp/vault/vault"
"github.com/hashicorp/vault/vault/cluster"
"github.com/hashicorp/vault/vault/hcp_link/capabilities"
Expand All @@ -23,7 +25,7 @@ import (
type hcpLinkMetaHandler struct {
meta.UnimplementedHCPLinkMetaServer

wrappedCore internal.WrappedCoreListNamespacesMounts
wrappedCore internal.WrappedCoreMeta
scadaProvider scada.SCADAProvider
logger hclog.Logger

Expand Down Expand Up @@ -129,7 +131,7 @@ func (h *hcpLinkMetaHandler) ListNamespaces(ctx context.Context, req *meta.ListN
func (h *hcpLinkMetaHandler) ListMounts(ctx context.Context, req *meta.ListMountsRequest) (*meta.ListMountsResponse, error) {
mountEntries, err := h.wrappedCore.ListMounts()
if err != nil {
return nil, err
return nil, fmt.Errorf("unable to list secret mounts: %w", err)
}

var mounts []*meta.Mount
Expand All @@ -140,7 +142,7 @@ func (h *hcpLinkMetaHandler) ListMounts(ctx context.Context, req *meta.ListMount
if nsID != namespace.RootNamespaceID {
ns, err := h.wrappedCore.NamespaceByID(ctx, entry.NamespaceID)
if err != nil {
return nil, err
return nil, fmt.Errorf("unable to get namespace associated with secret mount: %w", err)
}

path = ns.Path + path
Expand All @@ -161,7 +163,7 @@ func (h *hcpLinkMetaHandler) ListMounts(ctx context.Context, req *meta.ListMount
func (h *hcpLinkMetaHandler) ListAuths(ctx context.Context, req *meta.ListAuthsRequest) (*meta.ListAuthResponse, error) {
authEntries, err := h.wrappedCore.ListAuths()
if err != nil {
return nil, err
return nil, fmt.Errorf("unable to list auth mounts: %w", err)
}

var auths []*meta.Auth
Expand All @@ -172,7 +174,7 @@ func (h *hcpLinkMetaHandler) ListAuths(ctx context.Context, req *meta.ListAuthsR
if nsID != namespace.RootNamespaceID {
ns, err := h.wrappedCore.NamespaceByID(ctx, entry.NamespaceID)
if err != nil {
return nil, err
return nil, fmt.Errorf("unable to get namespace associated with auth mount: %w", err)
}

path = ns.Path + path
Expand All @@ -189,3 +191,113 @@ func (h *hcpLinkMetaHandler) ListAuths(ctx context.Context, req *meta.ListAuthsR
Auths: auths,
}, nil
}

func (h *hcpLinkMetaHandler) GetClusterStatus(ctx context.Context, req *meta.GetClusterStatusRequest) (*meta.GetClusterStatusResponse, error) {
if h.wrappedCore.HAStateWithLock() != consts.Active {
return nil, fmt.Errorf("node not active")
}

hostname, err := os.Hostname()
if err != nil {
return nil, fmt.Errorf("unable to fetch hostname: %w", err)
}

haEnabled := h.wrappedCore.HAEnabled()
haStatus := &meta.HAStatus{
Enabled: haEnabled,
}

if haEnabled {
leader := &meta.HANode{
Hostname: hostname,
}

peers := h.wrappedCore.GetHAPeerNodesCached()

haNodes := make([]*meta.HANode, len(peers)+1)
haNodes[0] = leader

for i, peerNode := range peers {
haNodes[i+1] = &meta.HANode{
Hostname: peerNode.Hostname,
}
}

haStatus.Nodes = haNodes
}

raftStatus := &meta.RaftStatus{}
raftConfig, err := h.wrappedCore.GetRaftConfiguration(ctx)
if err != nil {
return nil, fmt.Errorf("unable to get Raft configuration: %w", err)
}

if raftConfig != nil {
raftServers := make([]*meta.RaftServer, len(raftConfig.Servers))

var voterCount uint32
for i, srv := range raftConfig.Servers {
raftServers[i] = &meta.RaftServer{
NodeID: srv.NodeID,
Address: srv.Address,
Voter: srv.Voter,
Leader: srv.Leader,
ProtocolVersion: srv.ProtocolVersion,
}

if srv.Voter {
voterCount++
}
}

raftStatus.RaftConfiguration = &meta.RaftConfiguration{
Servers: raftServers,
}

evenVoterMessage := "Vault should have access to an odd number of voter nodes."
largeClusterMessage := "Very large cluster detected."
var quorumWarning string

if voterCount == 1 {
quorumWarning = "Only one server node found. Vault is not running in high availability mode."
} else if voterCount%2 == 0 && voterCount > 7 {
quorumWarning = evenVoterMessage + " " + largeClusterMessage
} else if voterCount%2 == 0 {
quorumWarning = evenVoterMessage
} else if voterCount > 7 {
quorumWarning = largeClusterMessage
}

raftStatus.QuorumWarning = quorumWarning
}

raftAutopilotState, err := h.wrappedCore.GetRaftAutopilotState(ctx)
if err != nil {
return nil, fmt.Errorf("unable to get Raft Autopilot state: %w", err)
}

if raftAutopilotState != nil {
autopilotStatus := &meta.AutopilotStatus{
Healthy: raftAutopilotState.Healthy,
}

autopilotServers := make([]*meta.AutopilotServer, 0)
for _, srv := range raftAutopilotState.Servers {
autopilotServers = append(autopilotServers, &meta.AutopilotServer{
ID: srv.ID,
Healthy: srv.Healthy,
})
}

raftStatus.AutopilotStatus = autopilotStatus
}

resp := &meta.GetClusterStatusResponse{
ClusterID: h.wrappedCore.ClusterID(),
HAStatus: haStatus,
RaftStatus: raftStatus,
StorageType: h.wrappedCore.StorageType(),
}

return resp, nil
}
12 changes: 10 additions & 2 deletions vault/hcp_link/internal/wrapped_hcpLink.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"

"github.com/hashicorp/vault/helper/namespace"
"github.com/hashicorp/vault/physical/raft"
"github.com/hashicorp/vault/sdk/helper/consts"
"github.com/hashicorp/vault/sdk/logical"
"github.com/hashicorp/vault/vault"
Expand All @@ -30,14 +31,21 @@ type WrappedCoreHCPToken interface {

var _ WrappedCoreHCPToken = &vault.Core{}

type WrappedCoreListNamespacesMounts interface {
type WrappedCoreMeta interface {
NamespaceByID(ctx context.Context, nsID string) (*namespace.Namespace, error)
ListNamespaces(includePath bool) []*namespace.Namespace
ListMounts() ([]*vault.MountEntry, error)
ListAuths() ([]*vault.MountEntry, error)
HAEnabled() bool
HAStateWithLock() consts.HAState
GetHAPeerNodesCached() []vault.PeerNode
GetRaftConfiguration(ctx context.Context) (*raft.RaftConfigurationResponse, error)
GetRaftAutopilotState(ctx context.Context) (*raft.AutopilotState, error)
StorageType() string
ClusterID() string
}

var _ WrappedCoreListNamespacesMounts = &vault.Core{}
var _ WrappedCoreMeta = &vault.Core{}

type WrappedCoreHCPLinkStatus interface {
WrappedCoreStandbyStates
Expand Down
4 changes: 2 additions & 2 deletions vault/request_handling.go
Original file line number Diff line number Diff line change
Expand Up @@ -796,7 +796,7 @@ func (c *Core) handleCancelableRequest(ctx context.Context, req *logical.Request
}

if walState.LocalIndex != 0 || walState.ReplicatedIndex != 0 {
walState.ClusterID = c.clusterID.Load()
walState.ClusterID = c.ClusterID()
if walState.LocalIndex == 0 {
if c.perfStandby {
walState.LocalIndex = LastRemoteWAL(c)
Expand Down Expand Up @@ -2343,7 +2343,7 @@ func (c *Core) checkSSCTokenInternal(ctx context.Context, token string, isPerfSt
return plainToken.Random, nil
}

requiredWalState := &logical.WALState{ClusterID: c.clusterID.Load(), LocalIndex: plainToken.LocalIndex, ReplicatedIndex: 0}
requiredWalState := &logical.WALState{ClusterID: c.ClusterID(), LocalIndex: plainToken.LocalIndex, ReplicatedIndex: 0}
if c.HasWALState(requiredWalState, isPerfStandby) {
return plainToken.Random, nil
}
Expand Down

0 comments on commit b6bf22f

Please sign in to comment.