Skip to content

Commit

Permalink
Merge pull request cockroachdb#20308 from tschottdorf/cli/grpc-cleanup
Browse files Browse the repository at this point in the history
cli: add timeouts to `node status` and `node ls`
  • Loading branch information
tbg authored Nov 30, 2017
2 parents 25ad9d4 + da5dca5 commit 602df28
Show file tree
Hide file tree
Showing 18 changed files with 295 additions and 119 deletions.
30 changes: 30 additions & 0 deletions pkg/cli/cli_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/security/securitytest"
"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
Expand Down Expand Up @@ -1476,6 +1477,35 @@ func Example_node() {
// Error: node 10000 doesn't exist
}

func TestCLITimeout(t *testing.T) {
defer leaktest.AfterTest(t)()

c := newCLITest(cliTestParams{})
defer c.cleanup()

// Wrap the meat of the test in a retry loop. Setting a timeout like this is
// racy as the operation may have succeeded by the time the scheduler gives
// the timeout a chance to have an effect.
testutils.SucceedsSoon(t, func() error {
out, err := c.RunWithCapture("node status 1 --timeout 1ns")
if err != nil {
t.Fatal(err)
}

const exp = `node status 1 --timeout 1ns
operation timed out.
rpc error: code = DeadlineExceeded desc = context deadline exceeded
`
if out != exp {
err := errors.Errorf("unexpected output:\n%q\nwanted:\n%q", out, exp)
t.Log(err)
return err
}
return nil
})
}

func TestNodeStatus(t *testing.T) {
defer leaktest.AfterTest(t)()

Expand Down
7 changes: 7 additions & 0 deletions pkg/cli/cliflags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,13 @@ Takes any of the following values:
</PRE>`,
}

Timeout = FlagInfo{
Name: "timeout",
Description: `
If nonzero, return with an error if the operation does not conclude within the specified timeout.
The timeout is specified with a suffix of 's' for seconds, 'm' for minutes, and 'h' for hours.`,
}

NodeRanges = FlagInfo{
Name: "ranges",
Description: `Show node details for ranges and replicas.`,
Expand Down
23 changes: 12 additions & 11 deletions pkg/cli/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ func printKeyValue(kv engine.MVCCKeyValue) (bool, error) {

func runDebugKeys(cmd *cobra.Command, args []string) error {
stopper := stop.NewStopper()
defer stopper.Stop(stopperContext(stopper))
defer stopper.Stop(context.Background())

if len(args) != 1 {
return errors.New("one argument required: dir")
Expand Down Expand Up @@ -163,7 +163,7 @@ state like the raft HardState. With --replicated, only includes data covered by

func runDebugRangeData(cmd *cobra.Command, args []string) error {
stopper := stop.NewStopper()
defer stopper.Stop(stopperContext(stopper))
defer stopper.Stop(context.Background())

if len(args) != 2 {
return errors.New("two arguments required: dir range_id")
Expand Down Expand Up @@ -401,7 +401,7 @@ func loadRangeDescriptor(

func runDebugRangeDescriptors(cmd *cobra.Command, args []string) error {
stopper := stop.NewStopper()
defer stopper.Stop(stopperContext(stopper))
defer stopper.Stop(context.Background())

if len(args) != 1 {
return errors.New("one argument required: dir")
Expand Down Expand Up @@ -473,7 +473,7 @@ func printRaftLogEntry(kv engine.MVCCKeyValue) (bool, error) {

func runDebugRaftLog(cmd *cobra.Command, args []string) error {
stopper := stop.NewStopper()
defer stopper.Stop(stopperContext(stopper))
defer stopper.Stop(context.Background())

if len(args) != 2 {
return errors.New("two arguments required: dir range_id")
Expand Down Expand Up @@ -512,7 +512,7 @@ Uses a hard-coded GC policy with a 24 hour TTL for old versions.

func runDebugGCCmd(cmd *cobra.Command, args []string) error {
stopper := stop.NewStopper()
defer stopper.Stop(stopperContext(stopper))
defer stopper.Stop(context.Background())

var rangeID roachpb.RangeID
switch len(args) {
Expand Down Expand Up @@ -601,7 +601,7 @@ type replicaCheckInfo struct {

func runDebugCheckStoreCmd(cmd *cobra.Command, args []string) error {
stopper := stop.NewStopper()
defer stopper.Stop(stopperContext(stopper))
defer stopper.Stop(context.Background())

if len(args) != 1 {
return errors.New("one required argument: dir")
Expand Down Expand Up @@ -723,7 +723,7 @@ Compact the sstables in a store.

func runDebugCompact(cmd *cobra.Command, args []string) error {
stopper := stop.NewStopper()
defer stopper.Stop(stopperContext(stopper))
defer stopper.Stop(context.Background())

if len(args) != 1 {
return errors.New("one argument is required")
Expand Down Expand Up @@ -766,7 +766,7 @@ and TiB.

func runDebugSSTables(cmd *cobra.Command, args []string) error {
stopper := stop.NewStopper()
defer stopper.Stop(stopperContext(stopper))
defer stopper.Stop(context.Background())

if len(args) != 1 {
return errors.New("one argument is required")
Expand Down Expand Up @@ -794,6 +794,8 @@ a JSON file captured from a node's /_status/gossip/ debug endpoint.
}

func runDebugGossipValues(cmd *cobra.Command, args []string) error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// If a file is provided, use it. Otherwise, try talking to the running node.
var gossipInfo *gossip.InfoStatus
if debugCtx.inputFile != "" {
Expand All @@ -807,12 +809,11 @@ func runDebugGossipValues(cmd *cobra.Command, args []string) error {
return errors.Wrap(err, "failed to parse provided file as gossip.InfoStatus")
}
} else {
conn, _, stopper, err := getClientGRPCConn()
conn, _, finish, err := getClientGRPCConn(ctx)
if err != nil {
return err
}
ctx := stopperContext(stopper)
defer stopper.Stop(ctx)
defer finish()

status := serverpb.NewStatusClient(conn)
gossipInfo, err = status.Gossip(ctx, &serverpb.GossipRequest{})
Expand Down
56 changes: 43 additions & 13 deletions pkg/cli/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ package cli
import (
"net"

"google.golang.org/grpc"
"google.golang.org/grpc/codes"

"golang.org/x/net/context"

"github.com/cockroachdb/cockroach/pkg/roachpb"
Expand All @@ -34,27 +37,54 @@ func MaybeDecorateGRPCError(
return func(cmd *cobra.Command, args []string) error {
err := wrapped(cmd, args)

{
unwrappedErr := errors.Cause(err)
if unwrappedErr == nil {
return err
}
_, isSendError := unwrappedErr.(*roachpb.SendError)
isGRPCError := grpcutil.IsClosedConnection(unwrappedErr)
_, isNetError := unwrappedErr.(*net.OpError)
if !(isSendError || isGRPCError || isNetError) {
return err // intentionally return original to keep wrapping
}
if err == nil {
return nil
}

format := `unable to connect or connection lost.
connDropped := func() error {
const format = `unable to connect or connection lost.
Please check the address and credentials such as certificates (if attempting to
communicate with a secure cluster).
%s`
return errors.Errorf(format, err)
}
opTimeout := func() error {
const format = `operation timed out.
%s`
return errors.Errorf(format, err)
}

return errors.Errorf(format, err)
// Is this an "unable to connect" type of error?
unwrappedErr := errors.Cause(err)
switch unwrappedErr.(type) {
case *roachpb.SendError:
return connDropped()
case *net.OpError:
return connDropped()
}

// No, it's not. Is it a plain context cancellation (i.e. timeout)?
switch unwrappedErr {
case context.DeadlineExceeded:
return opTimeout()
case context.Canceled:
return opTimeout()
}

// Is it a GRPC-observed context cancellation (i.e. timeout) or a GRPC
// connection error?
switch {
case grpc.Code(err) == codes.DeadlineExceeded:
return opTimeout()
case grpcutil.IsClosedConnection(unwrappedErr):
return connDropped()
}

// Nothing we can special case, just return what we have.
return err
}
}

Expand Down
24 changes: 24 additions & 0 deletions pkg/cli/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
"strings"
"time"

"golang.org/x/net/context"

"github.com/kr/text"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
Expand All @@ -43,6 +45,16 @@ var clientConnHost, clientConnPort string
var tempDir string
var externalIODir string

const defaultCmdTimeout = 0 // no timeout
var cmdTimeout time.Duration

func cmdTimeoutContext(ctx context.Context) (context.Context, func()) {
if cmdTimeout != 0 {
return context.WithTimeout(ctx, cmdTimeout)
}
return context.WithCancel(ctx)
}

const usageIndentation = 8
const wrapWidth = 79 - usageIndentation

Expand Down Expand Up @@ -304,6 +316,18 @@ func init() {
stringFlag(f, &baseCfg.SSLCertsDir, cliflags.CertsDir, base.DefaultCertsDirectory)
}

timeoutCmds := []*cobra.Command{
statusNodeCmd,
lsNodesCmd,
// If you add something here, make sure the actual implementation
// of the command uses `cmdTimeoutContext(.)` or it will ignore
// the timeout.
}

for _, cmd := range timeoutCmds {
durationFlag(cmd.Flags(), &cmdTimeout, cliflags.Timeout, defaultCmdTimeout)
}

// Node Status command.
{
f := statusNodeCmd.Flags()
Expand Down
12 changes: 9 additions & 3 deletions pkg/cli/haproxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import (
"io"
"os"

"golang.org/x/net/context"

"github.com/cockroachdb/cockroach/pkg/server/serverpb"
"github.com/spf13/cobra"
)
Expand All @@ -44,18 +46,22 @@ func runGenHAProxyCmd(cmd *cobra.Command, args []string) error {
return usageAndError(cmd)
}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

configTemplate, err := template.New("haproxy template").Parse(haProxyTemplate)
if err != nil {
return err
}

c, stopper, err := getStatusClient()
conn, _, finish, err := getClientGRPCConn(ctx)
if err != nil {
return err
}
defer stopper.Stop(stopperContext(stopper))
defer finish()
c := serverpb.NewStatusClient(conn)

nodeStatuses, err := c.Nodes(stopperContext(stopper), &serverpb.NodesRequest{})
nodeStatuses, err := c.Nodes(ctx, &serverpb.NodesRequest{})
if err != nil {
return err
}
Expand Down
22 changes: 9 additions & 13 deletions pkg/cli/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ import (
"fmt"
"os"

"golang.org/x/net/context"

"github.com/spf13/cobra"

"github.com/cockroachdb/cockroach/pkg/server/serverpb"
"github.com/cockroachdb/cockroach/pkg/util/stop"
)

var initCmd = &cobra.Command{
Expand All @@ -42,12 +43,16 @@ single-node cluster, so the init command is not used in that case.
}

func runInit(cmd *cobra.Command, args []string) error {
c, stopper, err := getInitClient()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

conn, _, finish, err := getClientGRPCConn(ctx)
if err != nil {
return err
}
ctx := stopperContext(stopper)
defer stopper.Stop(ctx)
defer finish()

c := serverpb.NewInitClient(conn)

if _, err = c.Bootstrap(ctx, &serverpb.BootstrapRequest{}); err != nil {
return err
Expand All @@ -56,12 +61,3 @@ func runInit(cmd *cobra.Command, args []string) error {
fmt.Fprintln(os.Stdout, "Cluster successfully initialized")
return nil
}

func getInitClient() (serverpb.InitClient, *stop.Stopper, error) {
// TODO(adam): This depends on servercfg which is a bit weird..
conn, _, stopper, err := getClientGRPCConn()
if err != nil {
return nil, nil, err
}
return serverpb.NewInitClient(conn), stopper, nil
}
Loading

0 comments on commit 602df28

Please sign in to comment.