Skip to content

Commit

Permalink
Merge pull request #79516 from cameronnunez/backport22.1-76516-79157
Browse files Browse the repository at this point in the history
release-22.1: server: improve visibility of ranges that fail to move during decommissioning
  • Loading branch information
cameronnunez authored May 27, 2022
2 parents 0695669 + 7dad71f commit dc03928
Show file tree
Hide file tree
Showing 5 changed files with 227 additions and 7 deletions.
32 changes: 32 additions & 0 deletions docs/generated/http/full.md
Original file line number Diff line number Diff line change
Expand Up @@ -6193,6 +6193,7 @@ If no NodeIDs are given, it targets the recipient node.
| ----- | ---- | ----- | ----------- | -------------- |
| node_ids | [int32](#cockroach.server.serverpb.DecommissionRequest-int32) | repeated | | [reserved](#support-status) |
| target_membership | [cockroach.kv.kvserver.liveness.livenesspb.MembershipStatus](#cockroach.server.serverpb.DecommissionRequest-cockroach.kv.kvserver.liveness.livenesspb.MembershipStatus) | | | [reserved](#support-status) |
| num_replica_report | [int32](#cockroach.server.serverpb.DecommissionRequest-int32) | | The number of decommissioning replicas to be reported. | [reserved](#support-status) |



Expand Down Expand Up @@ -6229,6 +6230,21 @@ DecommissionStatusResponse lists decommissioning statuses for a number of NodeID
| replica_count | [int64](#cockroach.server.serverpb.DecommissionStatusResponse-int64) | | The number of replicas on the node, computed by scanning meta2 ranges. | [reserved](#support-status) |
| membership | [cockroach.kv.kvserver.liveness.livenesspb.MembershipStatus](#cockroach.server.serverpb.DecommissionStatusResponse-cockroach.kv.kvserver.liveness.livenesspb.MembershipStatus) | | The membership status of the given node. | [reserved](#support-status) |
| draining | [bool](#cockroach.server.serverpb.DecommissionStatusResponse-bool) | | | [reserved](#support-status) |
| reported_replicas | [DecommissionStatusResponse.Replica](#cockroach.server.serverpb.DecommissionStatusResponse-cockroach.server.serverpb.DecommissionStatusResponse.Replica) | repeated | Decommissioning replicas on the given node to be reported. How many replicas are reported is determined by what was specified in the request. | [reserved](#support-status) |





<a name="cockroach.server.serverpb.DecommissionStatusResponse-cockroach.server.serverpb.DecommissionStatusResponse.Replica"></a>
#### DecommissionStatusResponse.Replica



| Field | Type | Label | Description | Support status |
| ----- | ---- | ----- | ----------- | -------------- |
| replica_id | [int32](#cockroach.server.serverpb.DecommissionStatusResponse-int32) | | | [reserved](#support-status) |
| range_id | [int32](#cockroach.server.serverpb.DecommissionStatusResponse-int32) | | | [reserved](#support-status) |



Expand Down Expand Up @@ -6257,6 +6273,7 @@ specified or, if none are specified, all nodes.
| Field | Type | Label | Description | Support status |
| ----- | ---- | ----- | ----------- | -------------- |
| node_ids | [int32](#cockroach.server.serverpb.DecommissionStatusRequest-int32) | repeated | | [reserved](#support-status) |
| num_replica_report | [int32](#cockroach.server.serverpb.DecommissionStatusRequest-int32) | | The number of decommissioning replicas to be reported. | [reserved](#support-status) |



Expand Down Expand Up @@ -6293,6 +6310,21 @@ DecommissionStatusResponse lists decommissioning statuses for a number of NodeID
| replica_count | [int64](#cockroach.server.serverpb.DecommissionStatusResponse-int64) | | The number of replicas on the node, computed by scanning meta2 ranges. | [reserved](#support-status) |
| membership | [cockroach.kv.kvserver.liveness.livenesspb.MembershipStatus](#cockroach.server.serverpb.DecommissionStatusResponse-cockroach.kv.kvserver.liveness.livenesspb.MembershipStatus) | | The membership status of the given node. | [reserved](#support-status) |
| draining | [bool](#cockroach.server.serverpb.DecommissionStatusResponse-bool) | | | [reserved](#support-status) |
| reported_replicas | [DecommissionStatusResponse.Replica](#cockroach.server.serverpb.DecommissionStatusResponse-cockroach.server.serverpb.DecommissionStatusResponse.Replica) | repeated | Decommissioning replicas on the given node to be reported. How many replicas are reported is determined by what was specified in the request. | [reserved](#support-status) |





<a name="cockroach.server.serverpb.DecommissionStatusResponse-cockroach.server.serverpb.DecommissionStatusResponse.Replica"></a>
#### DecommissionStatusResponse.Replica



| Field | Type | Label | Description | Support status |
| ----- | ---- | ----- | ----------- | -------------- |
| replica_id | [int32](#cockroach.server.serverpb.DecommissionStatusResponse-int32) | | | [reserved](#support-status) |
| range_id | [int32](#cockroach.server.serverpb.DecommissionStatusResponse-int32) | | | [reserved](#support-status) |



Expand Down
48 changes: 47 additions & 1 deletion pkg/cli/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,17 @@ func runDecommissionNodeImpl(
MaxBackoff: 20 * time.Second,
}

// Log verbosity is increased when there is possibly a decommission stall.
// If the number of decommissioning replicas does not decrease after some time
// (i.e. the decommission status has not changed after
// sameStatusThreshold iterations), verbosity is automatically set.
// Some decommissioning replicas will be reported to the operator.
const sameStatusThreshold = 15
var (
numReplicaReport = 0
sameStatusCount = 0
)

// Decommissioning a node is driven by a three-step process.
// 1) Mark each node as 'decommissioning'. In doing so, all replicas are
// slowly moved off of these nodes.
Expand All @@ -472,21 +483,41 @@ func runDecommissionNodeImpl(
req := &serverpb.DecommissionRequest{
NodeIDs: nodeIDs,
TargetMembership: livenesspb.MembershipStatus_DECOMMISSIONING,
NumReplicaReport: int32(numReplicaReport),
}
resp, err := c.Decommission(ctx, req)
if err != nil {
fmt.Fprintln(stderr)
return errors.Wrap(err, "while trying to mark as decommissioning")
}

if numReplicaReport > 0 {
printDecommissionReplicas(*resp)
}

if !reflect.DeepEqual(&prevResponse, resp) {
fmt.Fprintln(stderr)
if err := printDecommissionStatus(*resp); err != nil {
if err = printDecommissionStatus(*resp); err != nil {
return err
}
prevResponse = *resp

// The decommissioning status changed. Set `sameStatusCount` back to zero.
sameStatusCount = 0
numReplicaReport = 0
} else {
// Print a marker indicating that there has been no progress,
// instead of printing the same status.
fmt.Fprintf(stderr, ".")

// Report decommissioning replicas if there's been significant time of
// no progress.
if sameStatusCount >= sameStatusThreshold && numReplicaReport == 0 {
// Configure a number of replicas to report.
numReplicaReport = 5
} else {
sameStatusCount++
}
}

anyActive := false
Expand Down Expand Up @@ -591,6 +622,21 @@ func printDecommissionStatus(resp serverpb.DecommissionStatusResponse) error {
clisqlexec.NewRowSliceIter(decommissionResponseValueToRows(resp.Status), decommissionResponseAlignment()))
}

func printDecommissionReplicas(resp serverpb.DecommissionStatusResponse) {
fmt.Fprintln(stderr, "\npossible decommission stall detected")

for _, nodeStatus := range resp.Status {
for _, replica := range nodeStatus.ReportedReplicas {
fmt.Fprintf(stderr,
"n%d still has replica id %d for range r%d\n",
nodeStatus.NodeID,
replica.ReplicaID,
replica.RangeID,
)
}
}
}

func runRecommissionNode(cmd *cobra.Command, args []string) error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
Expand Down
91 changes: 91 additions & 0 deletions pkg/cmd/roachtest/tests/decommission.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"fmt"
"math/rand"
"reflect"
"regexp"
"strconv"
"strings"
"time"
Expand All @@ -26,6 +27,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
"github.com/cockroachdb/cockroach/pkg/roachprod/install"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/util/retry"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
Expand Down Expand Up @@ -99,6 +101,17 @@ func registerDecommission(r registry.Registry) {
},
})
}
{
numNodes := 6
r.Add(registry.TestSpec{
Name: "decommission/slow",
Owner: registry.OwnerServer,
Cluster: r.MakeClusterSpec(numNodes),
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runDecommissionSlow(ctx, t, c)
},
})
}
}

// runDrainAndDecommission marks 3 nodes in the test cluster as "draining" and
Expand Down Expand Up @@ -1091,6 +1104,84 @@ func runDecommissionDrains(ctx context.Context, t test.Test, c cluster.Cluster)
require.NoError(t, cli.MatchCSV(o, expDecommissioned))
}

// runDecommissionSlow decommissions 5 nodes in a test cluster of 6
// (with a replication factor of 5), which will guarantee a replica transfer
// stall. This test is meant to ensure that decommissioning replicas are
// reported when replica transfers stall.
func runDecommissionSlow(ctx context.Context, t test.Test, c cluster.Cluster) {
const (
numNodes = 6
pinnedNodeID = 1
replicationFactor = 5
)

var verboseStoreLogRe = regexp.MustCompile("possible decommission stall detected")

err := c.PutE(ctx, t.L(), t.Cockroach(), "./cockroach", c.All())
require.NoError(t, err)

c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings(), c.All())

run := func(db *gosql.DB, query string) {
_, err = db.ExecContext(ctx, query)
require.NoError(t, err)
t.L().Printf("run: %s\n", query)
}

{
db := c.Conn(ctx, t.L(), pinnedNodeID)
defer db.Close()

// Set the replication factor to 5.
run(db, fmt.Sprintf(`ALTER RANGE default CONFIGURE ZONE USING num_replicas=%d`, replicationFactor))
run(db, fmt.Sprintf(`ALTER DATABASE system CONFIGURE ZONE USING num_replicas=%d`, replicationFactor))

// Increase the speed of decommissioning.
run(db, `SET CLUSTER SETTING kv.snapshot_rebalance.max_rate='2GiB'`)
run(db, `SET CLUSTER SETTING kv.snapshot_recovery.max_rate='2GiB'`)

// Wait for initial up-replication.
err := WaitForReplication(ctx, t, db, replicationFactor)
require.NoError(t, err)
}

// Decommission 5 nodes from the cluster, resulting in immovable replicas.
// Be prepared to cancel the context for the processes running decommissions
// since the decommissions will stall.
decomCtx, cancel := context.WithTimeout(ctx, 10*time.Minute)
defer cancel()
m := c.NewMonitor(decomCtx)
for nodeID := 2; nodeID <= numNodes; nodeID++ {
id := nodeID
m.Go(func(ctx context.Context) error {
decom := func(id int) error {
t.Status(fmt.Sprintf("decommissioning node %d", id))
return c.RunE(ctx,
c.Node(id),
fmt.Sprintf("./cockroach node decommission %d --insecure", id),
)
}
return decom(id)
})
}

// Check for reported decommissioning replicas.
t.Status("checking for decommissioning replicas report...")
testutils.SucceedsWithin(t, func() error {
for nodeID := 1; nodeID <= numNodes; nodeID++ {
if err = c.RunE(ctx,
c.Node(nodeID),
fmt.Sprintf("grep -q '%s' logs/cockroach.log", verboseStoreLogRe),
); err == nil {
return nil
}
}
return errors.New("still waiting for decommissioning replicas report")
},
3*time.Minute,
)
}

// Header from the output of `cockroach node decommission`.
var decommissionHeader = []string{
"id", "is_live", "replicas", "is_decommissioning", "membership", "is_draining",
Expand Down
49 changes: 43 additions & 6 deletions pkg/server/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -2307,6 +2307,7 @@ func (s *adminServer) decommissionStatusHelper(
// Get the number of replicas on each node. We *may* not need all of them,
// but that would be more complicated than seems worth it right now.
nodeIDs := req.NodeIDs
numReplicaReport := req.NumReplicaReport

// If no nodeIDs given, use all nodes.
if len(nodeIDs) == 0 {
Expand All @@ -2319,6 +2320,24 @@ func (s *adminServer) decommissionStatusHelper(
}
}

// If the client specified a number of decommissioning replicas to report,
// prepare to get decommissioning replicas to report to the operator.
// numReplicaReport is the number of replicas reported for each node.
var replicasToReport map[roachpb.NodeID][]*serverpb.DecommissionStatusResponse_Replica
if numReplicaReport > 0 {
log.Ops.Warning(ctx, "possible decommission stall detected")
replicasToReport = make(map[roachpb.NodeID][]*serverpb.DecommissionStatusResponse_Replica)
}

isDecommissioningNode := func(n roachpb.NodeID) bool {
for _, nodeID := range nodeIDs {
if n == nodeID {
return true
}
}
return false
}

// Compute the replica counts for the target nodes only. This map doubles as
// a lookup table to check whether we care about a given node.
var replicaCounts map[roachpb.NodeID]int64
Expand All @@ -2336,6 +2355,24 @@ func (s *adminServer) decommissionStatusHelper(
return errors.Wrapf(err, "%s: unable to unmarshal range descriptor", row.Key)
}
for _, r := range rangeDesc.Replicas().Descriptors() {
if numReplicaReport > 0 {
if len(replicasToReport[r.NodeID]) < int(numReplicaReport) {
if isDecommissioningNode(r.NodeID) {
replicasToReport[r.NodeID] = append(replicasToReport[r.NodeID],
&serverpb.DecommissionStatusResponse_Replica{
ReplicaID: r.ReplicaID,
RangeID: rangeDesc.RangeID,
},
)
log.Ops.Warningf(ctx,
"n%d still has replica id %d for range r%d",
r.NodeID,
r.ReplicaID,
rangeDesc.RangeID,
)
}
}
}
if _, ok := replicaCounts[r.NodeID]; ok {
replicaCounts[r.NodeID]++
}
Expand Down Expand Up @@ -2373,15 +2410,15 @@ func (s *adminServer) decommissionStatusHelper(
return nil, errors.Newf("unable to get liveness for %d", nodeID)
}
nodeResp := serverpb.DecommissionStatusResponse_Status{
NodeID: l.NodeID,
ReplicaCount: replicaCounts[l.NodeID],
Membership: l.Membership,
Draining: l.Draining,
NodeID: l.NodeID,
ReplicaCount: replicaCounts[l.NodeID],
Membership: l.Membership,
Draining: l.Draining,
ReportedReplicas: replicasToReport[l.NodeID],
}
if l.IsLive(s.server.clock.Now().GoTime()) {
nodeResp.IsLive = true
}

res.Status = append(res.Status, nodeResp)
}

Expand Down Expand Up @@ -2420,7 +2457,7 @@ func (s *adminServer) Decommission(
return &serverpb.DecommissionStatusResponse{}, nil
}

return s.DecommissionStatus(ctx, &serverpb.DecommissionStatusRequest{NodeIDs: nodeIDs})
return s.DecommissionStatus(ctx, &serverpb.DecommissionStatusRequest{NodeIDs: nodeIDs, NumReplicaReport: req.NumReplicaReport})
}

// DataDistribution returns a count of replicas on each node for each table.
Expand Down
14 changes: 14 additions & 0 deletions pkg/server/serverpb/admin.proto
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,8 @@ message DrainResponse {
message DecommissionStatusRequest {
repeated int32 node_ids = 1 [(gogoproto.customname) = "NodeIDs",
(gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.NodeID"];
// The number of decommissioning replicas to be reported.
int32 num_replica_report = 2;
}

// DecommissionRequest requests the server to set the membership status on
Expand All @@ -486,10 +488,18 @@ message DecommissionRequest {
repeated int32 node_ids = 1 [(gogoproto.customname) = "NodeIDs",
(gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.NodeID"];
kv.kvserver.liveness.livenesspb.MembershipStatus target_membership = 2;
// The number of decommissioning replicas to be reported.
int32 num_replica_report = 3;
}

// DecommissionStatusResponse lists decommissioning statuses for a number of NodeIDs.
message DecommissionStatusResponse {
message Replica {
int32 replica_id = 1 [ (gogoproto.customname) = "ReplicaID",
(gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.ReplicaID"];
int32 range_id = 2 [ (gogoproto.customname) = "RangeID",
(gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.RangeID"];
}
message Status {
int32 node_id = 1 [ (gogoproto.customname) = "NodeID",
(gogoproto.casttype) = "github.com/cockroachdb/cockroach/pkg/roachpb.NodeID"];
Expand All @@ -499,6 +509,10 @@ message DecommissionStatusResponse {
// The membership status of the given node.
kv.kvserver.liveness.livenesspb.MembershipStatus membership = 4;
bool draining = 5;
// Decommissioning replicas on the given node to be reported.
// How many replicas are reported is determined by what was specified in the
// request.
repeated Replica reported_replicas = 6;
}
// Status of all affected nodes.
repeated Status status = 2 [(gogoproto.nullable) = false];
Expand Down

0 comments on commit dc03928

Please sign in to comment.