Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cli: use zone configs to disable replication #39492

Merged
merged 1 commit into from
Aug 13, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 9 additions & 19 deletions pkg/cli/demo.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/cli/cliflags"
"github.com/cockroachdb/cockroach/pkg/config"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/server"
Expand All @@ -28,7 +27,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log/logflags"
"github.com/cockroachdb/cockroach/pkg/workload"
"github.com/cockroachdb/cockroach/pkg/workload/workloadsql"
"github.com/gogo/protobuf/proto"
"github.com/pkg/errors"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
Expand Down Expand Up @@ -123,27 +121,11 @@ func setupTransientServers(
}
cleanup = func() { stopper.Stop(ctx) }

// Set up the default zone configuration. We are using an in-memory store
// so we really want to disable replication.
cfg := config.DefaultZoneConfig()
sysCfg := config.DefaultSystemZoneConfig()

if demoCtx.nodes < 3 {
cfg.NumReplicas = proto.Int32(1)
sysCfg.NumReplicas = proto.Int32(1)
}

// Create the first transient server. The others will join this one.
args := base.TestServerArgs{
PartOfCluster: true,
Insecure: true,
Knobs: base.TestingKnobs{
Server: &server.TestingKnobs{
DefaultZoneConfigOverride: &cfg,
DefaultSystemZoneConfigOverride: &sysCfg,
},
},
Stopper: stopper,
Stopper: stopper,
}

serverFactory := server.TestServerFactory
Expand All @@ -166,6 +148,14 @@ func setupTransientServers(
}
}

if demoCtx.nodes < 3 {
// Set up the default zone configuration. We are using an in-memory store
// so we really want to disable replication.
if err := cliDisableReplication(ctx, s.Server); err != nil {
return ``, ``, cleanup, err
}
}

// Prepare the URL for use by the SQL shell.
options := url.Values{}
options.Add("sslmode", "disable")
Expand Down
48 changes: 48 additions & 0 deletions pkg/cli/disable_replication.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright 2019 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package cli

import (
"context"
"fmt"

"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/sql"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
)

// cliDisableReplication changes the replication factor on
// all defined zones to become 1. This is used by start-single-node
// and demo to define single-node clusters, so as to avoid
// churn in the log files.
//
// The change is effected using the internal SQL interface of the
// given server object.
func cliDisableReplication(ctx context.Context, s *server.Server) error {
return s.RunLocalSQL(ctx,
func(ctx context.Context, ie *sql.InternalExecutor) error {
rows, err := ie.Query(ctx, "get-zones", nil,
"SELECT target FROM crdb_internal.zones")
if err != nil {
return err
}

for _, row := range rows {
zone := string(*row[0].(*tree.DString))
if _, err := ie.Exec(ctx, "set-zone", nil,
fmt.Sprintf("ALTER %s CONFIGURE ZONE USING num_replicas = 1", zone)); err != nil {
return err
}
}

return nil
})
}
8 changes: 0 additions & 8 deletions pkg/cli/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log/logflags"
"github.com/cockroachdb/cockroach/pkg/util/netutil"
"github.com/cockroachdb/errors"
"github.com/gogo/protobuf/proto"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
)
Expand Down Expand Up @@ -250,13 +249,6 @@ func init() {
})
}

// start-single-node starts with default replication of 1.
AddPersistentPreRunE(startSingleNodeCmd, func(cmd *cobra.Command, _ []string) error {
serverCfg.DefaultSystemZoneConfig.NumReplicas = proto.Int32(1)
serverCfg.DefaultZoneConfig.NumReplicas = proto.Int32(1)
return nil
})

// Map any flags registered in the standard "flag" package into the
// top-level cockroach command.
pf := cockroachCmd.PersistentFlags()
Expand Down
51 changes: 51 additions & 0 deletions pkg/cli/interactive_tests/test_disable_replication.tcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#! /usr/bin/env expect -f

source [file join [file dirname $argv0] common.tcl]

spawn /bin/bash
send "PS1=':''/# '\r"
eexpect ":/# "

start_test "Check that demo disables replication properly"
send "$argv demo -e 'show zone configuration for range default'\r"
eexpect "num_replicas = 1"
eexpect ":/# "
end_test

start_test "Check that start-single-node disables replication properly"
system "rm -rf logs/db"
start_server $argv
send "$argv sql -e 'show zone configuration for range default'\r"
eexpect "num_replicas = 1"
eexpect ":/# "
end_test

start_test "Check that it remains possible to reset the replication factor"
send "$argv sql -e 'alter range default configure zone using num_replicas = 3'\r"
eexpect "CONFIGURE ZONE"
eexpect ":/# "
stop_server $argv
start_server $argv
send "$argv sql -e 'show zone configuration for range default'\r"
eexpect "num_replicas = 3"
eexpect ":/# "
end_test

stop_server $argv

start_test "Check that start-single-node on a regular cluster does not reset the replication factor"
# make a fresh server but using the regular 'start'
system "rm -rf logs/db"
system "$argv start --insecure --pid-file=server_pid --background -s=path=logs/db >>logs/expect-cmd.log 2>&1;
$argv sql -e 'select 1'"
# restart with start-single-node
stop_server $argv
start_server $argv
# check that the replication factor was unchanged
send "$argv sql -e 'show zone configuration for range default'\r"
eexpect "num_replicas = 3"
eexpect ":/# "
end_test

send "exit 0\r"
eexpect eof
9 changes: 0 additions & 9 deletions pkg/cli/interactive_tests/test_flags.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,5 @@ interrupt
eexpect ":/# "
end_test

start_test "Check that start-single-node disables replication properly"
system "rm -rf logs/db"
start_server $argv
send "$argv sql -e 'show zone configuration for range default'\r"
eexpect "num_replicas = 1"
eexpect ":/# "
stop_server $argv
end_test

send "exit 0\r"
eexpect eof
27 changes: 27 additions & 0 deletions pkg/cli/interactive_tests/test_multiple_nodes.tcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#! /usr/bin/env expect -f

source [file join [file dirname $argv0] common.tcl]

start_server $argv

start_test "Check that it is possible to add nodes to a server started with start-single-node"

system "$argv start --insecure --port=26258 --http-port=8083 --pid-file=server_pid2 --background -s=path=logs/db2 --join=:26257 >>logs/expect-cmd.log 2>&1;
$argv sql -e 'select 1' --port=26258"

system "$argv start --insecure --port=26259 --http-port=8084 --pid-file=server_pid3 --background -s=path=logs/db3 --join=:26257 >>logs/expect-cmd.log 2>&1;
$argv sql -e 'select 1' --port=26259"

# Check the number of nodes
spawn $argv node ls
eexpect id
eexpect "3 rows"
eexpect eof

# Remove the additional nodes.
system "$argv quit --port=26258"
system "$argv quit --port=26259"

end_test

stop_server $argv
30 changes: 24 additions & 6 deletions pkg/cli/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -416,18 +416,22 @@ func runStartSingleNode(cmd *cobra.Command, args []string) error {
// Now actually set the flag as changed so that the start code
// doesn't warn that it was not set.
joinFlag.Changed = true
return runStart(cmd, args)
return runStart(cmd, args, true /*disableReplication*/)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if I start a non-single node three node cluster with nodes 2 and 3 pointing at node 1 (which itself joins to nobody), and then I restart node one as a single-node instance? Will it just work, error out, or erroneously overwrite my zone configs?

}

func runStartJoin(cmd *cobra.Command, args []string) error {
return runStart(cmd, args)
return runStart(cmd, args, false /*disableReplication*/)
}

// runStart starts the cockroach node using --store as the list of
// storage devices ("stores") on this machine and --join as the list
// of other active nodes used to join this node to the cockroach
// cluster, if this is its first time connecting.
func runStart(cmd *cobra.Command, args []string) error {
//
// If the argument disableReplication is true and we are starting
// a fresh cluster, the replication factor will be disabled in
// all zone configs.
func runStart(cmd *cobra.Command, args []string, disableReplication bool) error {
tBegin := timeutil.Now()

// First things first: if the user wants background processing,
Expand Down Expand Up @@ -710,6 +714,21 @@ If problems persist, please see ` + base.DocsURL("cluster-setup-troubleshooting.
s.PeriodicallyCheckForUpdates(ctx)
}

initialBoot := s.InitialBoot()

if disableReplication && initialBoot {
// For start-single-node, set the default replication factor to
// 1 so as to avoid warning message and unnecessary rebalance
// churn.
if err := cliDisableReplication(ctx, s); err != nil {
log.Errorf(ctx, "could not disable replication: %v", err)
return err
}
log.Shout(ctx, log.Severity_INFO,
"Replication was disabled for this cluster.\n"+
"When/if adding nodes in the future, update zone configurations to increase the replication factor.")
}

// Now inform the user that the server is running and tell the
// user about its run-time derived parameters.
var buf bytes.Buffer
Expand All @@ -725,9 +744,9 @@ If problems persist, please see ` + base.DocsURL("cluster-setup-troubleshooting.
pgURL, err := serverCfg.PGURL(url.User(security.RootUser))
if err != nil {
log.Errorf(ctx, "failed computing the URL: %v", err)
} else {
fmt.Fprintf(tw, "sql:\t%s\n", pgURL)
return err
}
fmt.Fprintf(tw, "sql:\t%s\n", pgURL)

fmt.Fprintf(tw, "RPC client flags:\t%s\n", clientFlagsRPC())
if len(serverCfg.SocketFile) != 0 {
Expand All @@ -754,7 +773,6 @@ If problems persist, please see ` + base.DocsURL("cluster-setup-troubleshooting.
for i, spec := range serverCfg.Stores.Specs {
fmt.Fprintf(tw, "store[%d]:\t%s\n", i, spec)
}
initialBoot := s.InitialBoot()
nodeID := s.NodeID()
if initialBoot {
if nodeID == server.FirstNodeID {
Expand Down
16 changes: 16 additions & 0 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -2227,3 +2227,19 @@ func listen(
}
return ln, nil
}

// RunLocalSQL calls fn on a SQL internal executor on this server.
// This is meant for use for SQL initialization during bootstrapping.
//
// The internal SQL interface should be used instead of a regular SQL
// network connection for SQL initializations when setting up a new
// server, because it is possible for the server to listen on a
// network interface that is not reachable from loopback. It is also
// possible for the TLS certificates to be invalid when used locally
// (e.g. if the hostname in the cert is an advertised address that's
// only reachable externally).
func (s *Server) RunLocalSQL(
ctx context.Context, fn func(ctx context.Context, sqlExec *sql.InternalExecutor) error,
) error {
return fn(ctx, s.internalExecutor)
}