Skip to content

Commit

Permalink
server: remove initState.joined
Browse files Browse the repository at this point in the history
This simplifies initState a bit further, and is a stop along the way in
trying to improve our handling of init{,Disk}State.

Release note: None
  • Loading branch information
irfansharif committed Aug 24, 2020
1 parent 81f0865 commit 4beff80
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 51 deletions.
6 changes: 3 additions & 3 deletions pkg/cli/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -586,9 +586,9 @@ If problems persist, please see %s.`
s.PeriodicallyCheckForUpdates(ctx)
}

initialBoot := s.InitialBoot()
initialStart := s.InitialStart()

if disableReplication && initialBoot {
if disableReplication && initialStart {
// For start-single-node, set the default replication factor to
// 1 so as to avoid warning message and unnecessary rebalance
// churn.
Expand Down Expand Up @@ -648,7 +648,7 @@ If problems persist, please see %s.`
}
fmt.Fprintf(tw, "storage engine: \t%s\n", serverCfg.StorageEngine.String())
nodeID := s.NodeID()
if initialBoot {
if initialStart {
if nodeID == server.FirstNodeID {
fmt.Fprintf(tw, "status:\tinitialized new cluster\n")
} else {
Expand Down
37 changes: 11 additions & 26 deletions pkg/server/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,19 +120,6 @@ type initDiskState struct {
// a CockroachDB server can be started up after ServeAndWait returns.
type initState struct {
initDiskState
// joined is true if this is a new node. Note that the initDiskState may
// reflect the result of bootstrapping a new cluster, i.e. it is not true
// that joined==true implies that the initDiskState shows no initialized
// engines.
//
// This flag should only be used for logging and reporting. A newly
// bootstrapped single-node cluster is functionally equivalent to one that
// restarted; any decisions should be made on persisted data instead of
// this flag.
//
// TODO(tbg): remove this bool. The Node can find out another way whether
// it just joined or restarted.
joined bool
}

// NeedsInit returns true if (and only if) none if the engines are initialized.
Expand All @@ -149,28 +136,29 @@ func (s *initServer) NeedsInit() bool {
//
// The returned initState may not reflect a bootstrapped cluster yet, but it
// is guaranteed to have a ClusterID set.
// initialStart is true if this is a new node. This flag should only be used for
// logging and reporting. A newly bootstrapped single-node cluster is, for e.g.,
// functionally equivalent to one that restarted; any decisions should be made
// on persisted data instead of this flag.
//
// This method must be called only once.
//
// TODO(tbg): give this a KV client and thus initialize at least one store in
// all cases.
func (s *initServer) ServeAndWait(
ctx context.Context, stopper *stop.Stopper, sv *settings.Values, g *gossip.Gossip,
) (*initState, error) {
) (state *initState, initialStart bool, err error) {
if !s.NeedsInit() {
// If already bootstrapped, return early.
return &initState{
initDiskState: *s.inspectState,
joined: false,
}, nil
return &initState{initDiskState: *s.inspectState}, false, nil
}

log.Info(ctx, "no stores bootstrapped and --join flag specified, awaiting "+
"init command or join with an already initialized node.")

select {
case <-stopper.ShouldQuiesce():
return nil, stop.ErrUnavailable
return nil, false, stop.ErrUnavailable
case state := <-s.bootstrapReqCh:
// Bootstrap() did its job. At this point, we know that the cluster
// version will be bootstrapVersion (=state.clusterVersion.Version), but
Expand All @@ -182,11 +170,11 @@ func (s *initServer) ServeAndWait(
// having every freshly bootstrapped cluster spend time at an old
// cluster version.
if err := clusterversion.Initialize(ctx, state.clusterVersion.Version, sv); err != nil {
return nil, err
return nil, false, err
}

log.Infof(ctx, "**** cluster %s has been created", state.clusterID)
return state, nil
return state, true, nil
case <-g.Connected:
// Gossip connected, that is, we know a ClusterID. Due to the early
// return above, we know that all of our engines are empty, i.e. we
Expand All @@ -212,13 +200,10 @@ func (s *initServer) ServeAndWait(
// easy.
clusterID, err := g.GetClusterID()
if err != nil {
return nil, err
return nil, false, err
}
s.inspectState.clusterID = clusterID
return &initState{
initDiskState: *s.inspectState,
joined: true,
}, nil
return &initState{initDiskState: *s.inspectState}, true, nil
}
}

Expand Down
32 changes: 16 additions & 16 deletions pkg/server/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,18 +147,18 @@ func (nm nodeMetrics) callComplete(d time.Duration, pErr *roachpb.Error) {
// IDs for bootstrapping the node itself or new stores as they're added
// on subsequent instantiations.
type Node struct {
stopper *stop.Stopper
clusterID *base.ClusterIDContainer // UUID for Cockroach cluster
Descriptor roachpb.NodeDescriptor // Node ID, network/physical topology
storeCfg kvserver.StoreConfig // Config to use and pass to stores
eventLogger sql.EventLogger
stores *kvserver.Stores // Access to node-local stores
metrics nodeMetrics
recorder *status.MetricsRecorder
startedAt int64
lastUp int64
initialBoot bool // True if this is the first time this node has started.
txnMetrics kvcoord.TxnMetrics
stopper *stop.Stopper
clusterID *base.ClusterIDContainer // UUID for Cockroach cluster
Descriptor roachpb.NodeDescriptor // Node ID, network/physical topology
storeCfg kvserver.StoreConfig // Config to use and pass to stores
eventLogger sql.EventLogger
stores *kvserver.Stores // Access to node-local stores
metrics nodeMetrics
recorder *status.MetricsRecorder
startedAt int64
lastUp int64
initialStart bool // True if this is the first time this node has started.
txnMetrics kvcoord.TxnMetrics

perReplicaServer kvserver.Server
}
Expand Down Expand Up @@ -268,7 +268,6 @@ func bootstrapCluster(
initializedEngines: engines,
newEngines: nil,
},
joined: true,
}
return state, nil
}
Expand Down Expand Up @@ -336,6 +335,7 @@ func (n *Node) start(
ctx context.Context,
addr, sqlAddr net.Addr,
state initState,
initialStart bool,
clusterName string,
attrs roachpb.Attributes,
locality roachpb.Locality,
Expand All @@ -345,10 +345,10 @@ func (n *Node) start(
// Obtaining the NodeID requires a dance of sorts. If the node has initialized
// stores, the NodeID is persisted in each of them. If not, then we'll need to
// use the KV store to get a NodeID assigned.
n.initialBoot = state.joined
n.initialStart = initialStart
nodeID := state.nodeID
if nodeID == 0 {
if !state.joined {
if !initialStart {
log.Fatalf(ctx, "node has no NodeID, but claims to not be joining cluster")
}
// Allocate NodeID. Note that Gossip is already connected because if there's
Expand Down Expand Up @@ -775,7 +775,7 @@ func (n *Node) recordJoinEvent() {

logEventType := sql.EventLogNodeRestart
lastUp := n.lastUp
if n.initialBoot {
if n.initialStart {
logEventType = sql.EventLogNodeJoin
lastUp = n.startedAt
}
Expand Down
14 changes: 8 additions & 6 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -676,10 +676,11 @@ func (s *Server) NodeID() roachpb.NodeID {
return s.node.Descriptor.NodeID
}

// InitialBoot returns whether this is the first time the node has booted.
// Only intended to help print debugging info during server startup.
func (s *Server) InitialBoot() bool {
return s.node.initialBoot
// InitialStart returns whether this is the first time the node has started (as
// opposed to being restarted). Only intended to help print debugging info
// during server startup.
func (s *Server) InitialStart() bool {
return s.node.initialStart
}

// grpcGatewayServer represents a grpc service with HTTP endpoints through GRPC
Expand Down Expand Up @@ -1316,7 +1317,7 @@ func (s *Server) Start(ctx context.Context) error {
// connections.
startRPCServer(workersCtx)
onInitServerReady()
state, err := initServer.ServeAndWait(ctx, s.stopper, &s.cfg.Settings.SV, s.gossip)
state, initialStart, err := initServer.ServeAndWait(ctx, s.stopper, &s.cfg.Settings.SV, s.gossip)
if err != nil {
return errors.Wrap(err, "during init")
}
Expand Down Expand Up @@ -1415,6 +1416,7 @@ func (s *Server) Start(ctx context.Context) error {
advAddrU,
advSQLAddrU,
*state,
initialStart,
s.cfg.ClusterName,
s.cfg.NodeAttributes,
s.cfg.Locality,
Expand Down Expand Up @@ -1603,7 +1605,7 @@ func (s *Server) Start(ctx context.Context) error {
case enginepb.EngineTypeTeePebbleRocksDB:
nodeStartCounter += "pebble+rocksdb."
}
if s.InitialBoot() {
if s.InitialStart() {
nodeStartCounter += "initial-boot"
} else {
nodeStartCounter += "restart"
Expand Down

0 comments on commit 4beff80

Please sign in to comment.