From 7350cd2e3e78bf109001807cbb50346530b81f13 Mon Sep 17 00:00:00 2001 From: Bin Shi <39923490+binshi-bing@users.noreply.github.com> Date: Sun, 23 Apr 2023 19:48:44 -0700 Subject: [PATCH] mcs: fix duplicate start of RaftCluster. (#6358) * Using double-checked locking to avoid duplicate start of RaftCluster. Signed-off-by: Bin Shi * Handle feedback Signed-off-by: Bin Shi * improve locking Signed-off-by: Bin Shi * handle feedback Signed-off-by: Bin Shi --------- Signed-off-by: Bin Shi Co-authored-by: Ryan Leung --- server/cluster/cluster.go | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index c2bc7a908b0..69710be506c 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -22,7 +22,6 @@ import ( "strconv" "strings" "sync" - "sync/atomic" "time" "github.com/coreos/go-semver/semver" @@ -134,7 +133,7 @@ type RaftCluster struct { etcdClient *clientv3.Client httpClient *http.Client - running atomic.Bool + running bool meta *metapb.Cluster storeConfigManager *config.StoreConfigManager storage storage.Storage @@ -258,14 +257,14 @@ func (c *RaftCluster) InitCluster( // Start starts a cluster. func (c *RaftCluster) Start(s Server) error { - if c.IsRunning() { + c.Lock() + defer c.Unlock() + + if c.running { log.Warn("raft cluster has already been started") return nil } - c.Lock() - defer c.Unlock() - c.InitCluster(s.GetAllocator(), s.GetPersistOptions(), s.GetStorage(), s.GetBasicCluster(), s.GetKeyspaceGroupManager()) cluster, err := c.LoadClusterInfo() if err != nil { @@ -317,7 +316,7 @@ func (c *RaftCluster) Start(s Server) error { go c.runUpdateStoreStats() go c.startGCTuner() - c.running.Store(true) + c.running = true return nil } @@ -605,26 +604,31 @@ func (c *RaftCluster) runReplicationMode() { // Stop stops the cluster. func (c *RaftCluster) Stop() { c.Lock() - if !c.running.CompareAndSwap(true, false) { + if !c.running { c.Unlock() return } - + c.running = false c.coordinator.stop() c.cancel() c.Unlock() + c.wg.Wait() log.Info("raftcluster is stopped") } // IsRunning return if the cluster is running. func (c *RaftCluster) IsRunning() bool { - return c.running.Load() + c.RLock() + defer c.RUnlock() + return c.running } // Context returns the context of RaftCluster. func (c *RaftCluster) Context() context.Context { - if c.running.Load() { + c.RLock() + defer c.RUnlock() + if c.running { return c.ctx } return nil