Skip to content

Commit

Permalink
Merge #39492
Browse files Browse the repository at this point in the history
39492: cli: use zone configs to disable replication r=knz a=knz

Fixes #39379.

As established in #39382 it is not safe to modify the global
default zone config objects to non-standard values.

This commit changes `start-single-node` and `demo` to use
zone configs via SQL instead.

Also as requested by @piyush-singh it now informs the user
the replication was disabled, for example:

```
*
* INFO: Replication was disabled for this cluster.
* When/if adding nodes in the future, update zone configurations to increase the replication factor.
*
```

Release note: None

Co-authored-by: Raphael 'kena' Poss <[email protected]>
  • Loading branch information
craig[bot] and knz committed Aug 13, 2019
2 parents 14f9c37 + b233dba commit b056645
Show file tree
Hide file tree
Showing 8 changed files with 175 additions and 42 deletions.
28 changes: 9 additions & 19 deletions pkg/cli/demo.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/cli/cliflags"
"github.com/cockroachdb/cockroach/pkg/config"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/server"
Expand All @@ -28,7 +27,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log/logflags"
"github.com/cockroachdb/cockroach/pkg/workload"
"github.com/cockroachdb/cockroach/pkg/workload/workloadsql"
"github.com/gogo/protobuf/proto"
"github.com/pkg/errors"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
Expand Down Expand Up @@ -123,27 +121,11 @@ func setupTransientServers(
}
cleanup = func() { stopper.Stop(ctx) }

// Set up the default zone configuration. We are using an in-memory store
// so we really want to disable replication.
cfg := config.DefaultZoneConfig()
sysCfg := config.DefaultSystemZoneConfig()

if demoCtx.nodes < 3 {
cfg.NumReplicas = proto.Int32(1)
sysCfg.NumReplicas = proto.Int32(1)
}

// Create the first transient server. The others will join this one.
args := base.TestServerArgs{
PartOfCluster: true,
Insecure: true,
Knobs: base.TestingKnobs{
Server: &server.TestingKnobs{
DefaultZoneConfigOverride: &cfg,
DefaultSystemZoneConfigOverride: &sysCfg,
},
},
Stopper: stopper,
Stopper: stopper,
}

serverFactory := server.TestServerFactory
Expand All @@ -166,6 +148,14 @@ func setupTransientServers(
}
}

if demoCtx.nodes < 3 {
// Set up the default zone configuration. We are using an in-memory store
// so we really want to disable replication.
if err := cliDisableReplication(ctx, s.Server); err != nil {
return ``, ``, cleanup, err
}
}

// Prepare the URL for use by the SQL shell.
options := url.Values{}
options.Add("sslmode", "disable")
Expand Down
48 changes: 48 additions & 0 deletions pkg/cli/disable_replication.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright 2019 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package cli

import (
"context"
"fmt"

"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/sql"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
)

// cliDisableReplication changes the replication factor on
// all defined zones to become 1. This is used by start-single-node
// and demo to define single-node clusters, so as to avoid
// churn in the log files.
//
// The change is effected using the internal SQL interface of the
// given server object.
func cliDisableReplication(ctx context.Context, s *server.Server) error {
return s.RunLocalSQL(ctx,
func(ctx context.Context, ie *sql.InternalExecutor) error {
rows, err := ie.Query(ctx, "get-zones", nil,
"SELECT target FROM crdb_internal.zones")
if err != nil {
return err
}

for _, row := range rows {
zone := string(*row[0].(*tree.DString))
if _, err := ie.Exec(ctx, "set-zone", nil,
fmt.Sprintf("ALTER %s CONFIGURE ZONE USING num_replicas = 1", zone)); err != nil {
return err
}
}

return nil
})
}
8 changes: 0 additions & 8 deletions pkg/cli/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log/logflags"
"github.com/cockroachdb/cockroach/pkg/util/netutil"
"github.com/cockroachdb/errors"
"github.com/gogo/protobuf/proto"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
)
Expand Down Expand Up @@ -250,13 +249,6 @@ func init() {
})
}

// start-single-node starts with default replication of 1.
AddPersistentPreRunE(startSingleNodeCmd, func(cmd *cobra.Command, _ []string) error {
serverCfg.DefaultSystemZoneConfig.NumReplicas = proto.Int32(1)
serverCfg.DefaultZoneConfig.NumReplicas = proto.Int32(1)
return nil
})

// Map any flags registered in the standard "flag" package into the
// top-level cockroach command.
pf := cockroachCmd.PersistentFlags()
Expand Down
51 changes: 51 additions & 0 deletions pkg/cli/interactive_tests/test_disable_replication.tcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#! /usr/bin/env expect -f

source [file join [file dirname $argv0] common.tcl]

spawn /bin/bash
send "PS1=':''/# '\r"
eexpect ":/# "

start_test "Check that demo disables replication properly"
send "$argv demo -e 'show zone configuration for range default'\r"
eexpect "num_replicas = 1"
eexpect ":/# "
end_test

start_test "Check that start-single-node disables replication properly"
system "rm -rf logs/db"
start_server $argv
send "$argv sql -e 'show zone configuration for range default'\r"
eexpect "num_replicas = 1"
eexpect ":/# "
end_test

start_test "Check that it remains possible to reset the replication factor"
send "$argv sql -e 'alter range default configure zone using num_replicas = 3'\r"
eexpect "CONFIGURE ZONE"
eexpect ":/# "
stop_server $argv
start_server $argv
send "$argv sql -e 'show zone configuration for range default'\r"
eexpect "num_replicas = 3"
eexpect ":/# "
end_test

stop_server $argv

start_test "Check that start-single-node on a regular cluster does not reset the replication factor"
# make a fresh server but using the regular 'start'
system "rm -rf logs/db"
system "$argv start --insecure --pid-file=server_pid --background -s=path=logs/db >>logs/expect-cmd.log 2>&1;
$argv sql -e 'select 1'"
# restart with start-single-node
stop_server $argv
start_server $argv
# check that the replication factor was unchanged
send "$argv sql -e 'show zone configuration for range default'\r"
eexpect "num_replicas = 3"
eexpect ":/# "
end_test

send "exit 0\r"
eexpect eof
9 changes: 0 additions & 9 deletions pkg/cli/interactive_tests/test_flags.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,5 @@ interrupt
eexpect ":/# "
end_test

start_test "Check that start-single-node disables replication properly"
system "rm -rf logs/db"
start_server $argv
send "$argv sql -e 'show zone configuration for range default'\r"
eexpect "num_replicas = 1"
eexpect ":/# "
stop_server $argv
end_test

send "exit 0\r"
eexpect eof
27 changes: 27 additions & 0 deletions pkg/cli/interactive_tests/test_multiple_nodes.tcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#! /usr/bin/env expect -f

source [file join [file dirname $argv0] common.tcl]

start_server $argv

start_test "Check that it is possible to add nodes to a server started with start-single-node"

system "$argv start --insecure --port=26258 --http-port=8083 --pid-file=server_pid2 --background -s=path=logs/db2 --join=:26257 >>logs/expect-cmd.log 2>&1;
$argv sql -e 'select 1' --port=26258"

system "$argv start --insecure --port=26259 --http-port=8084 --pid-file=server_pid3 --background -s=path=logs/db3 --join=:26257 >>logs/expect-cmd.log 2>&1;
$argv sql -e 'select 1' --port=26259"

# Check the number of nodes
spawn $argv node ls
eexpect id
eexpect "3 rows"
eexpect eof

# Remove the additional nodes.
system "$argv quit --port=26258"
system "$argv quit --port=26259"

end_test

stop_server $argv
30 changes: 24 additions & 6 deletions pkg/cli/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -416,18 +416,22 @@ func runStartSingleNode(cmd *cobra.Command, args []string) error {
// Now actually set the flag as changed so that the start code
// doesn't warn that it was not set.
joinFlag.Changed = true
return runStart(cmd, args)
return runStart(cmd, args, true /*disableReplication*/)
}

func runStartJoin(cmd *cobra.Command, args []string) error {
return runStart(cmd, args)
return runStart(cmd, args, false /*disableReplication*/)
}

// runStart starts the cockroach node using --store as the list of
// storage devices ("stores") on this machine and --join as the list
// of other active nodes used to join this node to the cockroach
// cluster, if this is its first time connecting.
func runStart(cmd *cobra.Command, args []string) error {
//
// If the argument disableReplication is true and we are starting
// a fresh cluster, the replication factor will be disabled in
// all zone configs.
func runStart(cmd *cobra.Command, args []string, disableReplication bool) error {
tBegin := timeutil.Now()

// First things first: if the user wants background processing,
Expand Down Expand Up @@ -710,6 +714,21 @@ If problems persist, please see ` + base.DocsURL("cluster-setup-troubleshooting.
s.PeriodicallyCheckForUpdates(ctx)
}

initialBoot := s.InitialBoot()

if disableReplication && initialBoot {
// For start-single-node, set the default replication factor to
// 1 so as to avoid warning message and unnecessary rebalance
// churn.
if err := cliDisableReplication(ctx, s); err != nil {
log.Errorf(ctx, "could not disable replication: %v", err)
return err
}
log.Shout(ctx, log.Severity_INFO,
"Replication was disabled for this cluster.\n"+
"When/if adding nodes in the future, update zone configurations to increase the replication factor.")
}

// Now inform the user that the server is running and tell the
// user about its run-time derived parameters.
var buf bytes.Buffer
Expand All @@ -725,9 +744,9 @@ If problems persist, please see ` + base.DocsURL("cluster-setup-troubleshooting.
pgURL, err := serverCfg.PGURL(url.User(security.RootUser))
if err != nil {
log.Errorf(ctx, "failed computing the URL: %v", err)
} else {
fmt.Fprintf(tw, "sql:\t%s\n", pgURL)
return err
}
fmt.Fprintf(tw, "sql:\t%s\n", pgURL)

fmt.Fprintf(tw, "RPC client flags:\t%s\n", clientFlagsRPC())
if len(serverCfg.SocketFile) != 0 {
Expand All @@ -754,7 +773,6 @@ If problems persist, please see ` + base.DocsURL("cluster-setup-troubleshooting.
for i, spec := range serverCfg.Stores.Specs {
fmt.Fprintf(tw, "store[%d]:\t%s\n", i, spec)
}
initialBoot := s.InitialBoot()
nodeID := s.NodeID()
if initialBoot {
if nodeID == server.FirstNodeID {
Expand Down
16 changes: 16 additions & 0 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -2227,3 +2227,19 @@ func listen(
}
return ln, nil
}

// RunLocalSQL calls fn on a SQL internal executor on this server.
// This is meant for use for SQL initialization during bootstrapping.
//
// The internal SQL interface should be used instead of a regular SQL
// network connection for SQL initializations when setting up a new
// server, because it is possible for the server to listen on a
// network interface that is not reachable from loopback. It is also
// possible for the TLS certificates to be invalid when used locally
// (e.g. if the hostname in the cert is an advertised address that's
// only reachable externally).
func (s *Server) RunLocalSQL(
ctx context.Context, fn func(ctx context.Context, sqlExec *sql.InternalExecutor) error,
) error {
return fn(ctx, s.internalExecutor)
}

0 comments on commit b056645

Please sign in to comment.