Skip to content

Commit

Permalink
Add replication state to EchoReply (#3810)
Browse files Browse the repository at this point in the history
  • Loading branch information
jefferai authored Jan 18, 2018
1 parent 98e8884 commit ba219f4
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 39 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,19 @@

DEPRECATIONS/CHANGES:

* `sys/health` DR Secondary Reporting: The `replication_dr_secondary` bool
returned by `sys/health` could be misleading since it would be `false` both
when a cluster was not a DR secondary but also when the node is a standby in
the cluster and has not yet fully received state from the active node. This
could cause health checks on LBs to decide that the node was acceptable for
traffic even though DR secondaries cannot handle normal Vault traffic. (In
other words, the bool could only convey "yes" or "no" but not "not sure
yet".) This has been replaced by `replication_dr_state` and
`replication_performance_state` which are string values that convey the
current state of the node; a value of `disabled` indicates that replication
is disabled or the state is still being discovered. As a result, an LB check
can positively verify that the node is both not `disabled` and is not a DR
secondary, and avoid sending traffic to it if either is true.
* PKI Secret Backend Roles parameter types: For `ou` and `organization`
in role definitions in the PKI secret backend, input can now be a
comma-separated string or an array of strings. Reading a role will
Expand Down
39 changes: 21 additions & 18 deletions http/sys_health.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"strconv"
"time"

"github.com/hashicorp/vault/helper/consts"
"github.com/hashicorp/vault/vault"
"github.com/hashicorp/vault/version"
)
Expand Down Expand Up @@ -111,7 +112,7 @@ func getSysHealth(core *vault.Core, r *http.Request) (int, *HealthResponse, erro
// Check system status
sealed, _ := core.Sealed()
standby, _ := core.Standby()
drSecondary := core.IsDRSecondary()
replicationState := core.ReplicationState()
init, err := core.Initialized()
if err != nil {
return http.StatusInternalServerError, nil, err
Expand All @@ -124,7 +125,7 @@ func getSysHealth(core *vault.Core, r *http.Request) (int, *HealthResponse, erro
code = uninitCode
case sealed:
code = sealedCode
case drSecondary:
case replicationState.HasState(consts.ReplicationDRSecondary):
code = drSecondaryCode
case !standbyOK && standby:
code = standbyCode
Expand All @@ -146,25 +147,27 @@ func getSysHealth(core *vault.Core, r *http.Request) (int, *HealthResponse, erro

// Format the body
body := &HealthResponse{
Initialized: init,
Sealed: sealed,
Standby: standby,
ReplicationDRSecondary: drSecondary,
ServerTimeUTC: time.Now().UTC().Unix(),
Version: version.GetVersion().VersionNumber(),
ClusterName: clusterName,
ClusterID: clusterID,
Initialized: init,
Sealed: sealed,
Standby: standby,
ReplicationPerformanceState: replicationState.GetPerformanceString(),
ReplicationDRState: replicationState.GetDRString(),
ServerTimeUTC: time.Now().UTC().Unix(),
Version: version.GetVersion().VersionNumber(),
ClusterName: clusterName,
ClusterID: clusterID,
}
return code, body, nil
}

type HealthResponse struct {
Initialized bool `json:"initialized"`
Sealed bool `json:"sealed"`
Standby bool `json:"standby"`
ReplicationDRSecondary bool `json:"replication_dr_secondary"`
ServerTimeUTC int64 `json:"server_time_utc"`
Version string `json:"version"`
ClusterName string `json:"cluster_name,omitempty"`
ClusterID string `json:"cluster_id,omitempty"`
Initialized bool `json:"initialized"`
Sealed bool `json:"sealed"`
Standby bool `json:"standby"`
ReplicationPerformanceState string `json:"replication_performance_state"`
ReplicationDRState string `json:"replication_dr_state"`
ServerTimeUTC int64 `json:"server_time_utc"`
Version string `json:"version"`
ClusterName string `json:"cluster_name,omitempty"`
ClusterID string `json:"cluster_id,omitempty"`
}
6 changes: 5 additions & 1 deletion vault/request_forwarding.go
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,8 @@ func (s *forwardedRequestRPCServer) Echo(ctx context.Context, in *EchoRequest) (
s.core.clusterPeerClusterAddrsCache.Set(in.ClusterAddr, nil, 0)
}
return &EchoReply{
Message: "pong",
Message: "pong",
ReplicationState: uint32(s.core.ReplicationState()),
}, nil
}

Expand Down Expand Up @@ -461,6 +462,9 @@ func (c *forwardingClient) startHeartbeat() {
c.core.logger.Debug("forwarding: unexpected echo response from active node", "message", resp.Message)
return
}
// Store the active node's replication state to display in
// sys/health calls
atomic.StoreUint32(c.core.replicationState, resp.ReplicationState)
c.core.logger.Trace("forwarding: successful heartbeat")
}

Expand Down
49 changes: 29 additions & 20 deletions vault/request_forwarding_service.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions vault/request_forwarding_service.proto
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ message EchoRequest {
message EchoReply {
string message = 1;
repeated string cluster_addrs = 2;
uint32 replication_state = 3;
}

service RequestForwarding {
Expand Down

0 comments on commit ba219f4

Please sign in to comment.