From 025fc6e9f975ef155002a5badf35ead10364159c Mon Sep 17 00:00:00 2001 From: ptrus Date: Thu, 14 Apr 2022 15:06:51 +0200 Subject: [PATCH] go/control/status: Add fields for quick overview of node status --- go/consensus/api/api.go | 3 ++ go/consensus/tendermint/full/full.go | 4 ++ go/consensus/tendermint/seed/seed.go | 1 + .../scenario/e2e/early_query.go | 3 ++ go/oasis-test-runner/scenario/e2e/seed_api.go | 3 ++ go/worker/common/api/api.go | 3 ++ go/worker/common/committee/node.go | 41 +++++++++++++++++++ 7 files changed, 58 insertions(+) diff --git a/go/consensus/api/api.go b/go/consensus/api/api.go index b67594fb20c..81bfc6fa1c6 100644 --- a/go/consensus/api/api.go +++ b/go/consensus/api/api.go @@ -204,6 +204,9 @@ type Vote struct { // Status is the current status overview. type Status struct { // nolint: maligned + // Status is an concise status of the consensus backend. + Status string `json:"string"` + // Version is the version of the consensus protocol that the node is using. Version version.Version `json:"version"` // Backend is the consensus backend identifier. diff --git a/go/consensus/tendermint/full/full.go b/go/consensus/tendermint/full/full.go index 0aa109968a4..3df35615ccf 100644 --- a/go/consensus/tendermint/full/full.go +++ b/go/consensus/tendermint/full/full.go @@ -774,6 +774,7 @@ func (t *fullService) GetStatus(ctx context.Context) (*consensusAPI.Status, erro status.ChainContext = t.genesis.ChainContext() status.GenesisHeight = t.genesis.Height if t.started() { + status.Status = "Ready" // Only attempt to fetch blocks in case the consensus service has started as otherwise // requests will block. genBlk, err := t.GetBlock(ctx, t.genesis.Height) @@ -845,6 +846,9 @@ func (t *fullService) GetStatus(ctx context.Context) (*consensusAPI.Status, erro consensusAddr := []byte(crypto.PublicKeyToTendermint(&consensusPk).Address()) status.IsValidator = vals.HasAddress(consensusAddr) } + } else { + // If node is not started it is still syncing. + status.Status = "Syncing" } return status, nil diff --git a/go/consensus/tendermint/seed/seed.go b/go/consensus/tendermint/seed/seed.go index 529cbe7e679..9c283223c98 100644 --- a/go/consensus/tendermint/seed/seed.go +++ b/go/consensus/tendermint/seed/seed.go @@ -131,6 +131,7 @@ func (srv *seedService) SupportedFeatures() consensus.FeatureMask { // Implements Backend. func (srv *seedService) GetStatus(ctx context.Context) (*consensus.Status, error) { status := &consensus.Status{ + Status: "Ready", Version: version.ConsensusProtocol, Backend: api.BackendName, Features: srv.SupportedFeatures(), diff --git a/go/oasis-test-runner/scenario/e2e/early_query.go b/go/oasis-test-runner/scenario/e2e/early_query.go index d7dde3b4c12..ac97d06fa8a 100644 --- a/go/oasis-test-runner/scenario/e2e/early_query.go +++ b/go/oasis-test-runner/scenario/e2e/early_query.go @@ -95,6 +95,9 @@ func (sc *earlyQueryImpl) Run(childEnv *env.Env) error { if err != nil { return fmt.Errorf("failed to get status for node: %w", err) } + if status.Consensus.Status != "Syncing" { + return fmt.Errorf("node doesn't report as 'Syncing', got: %s", status.Consensus.Status) + } if status.Consensus.LatestHeight != 0 { return fmt.Errorf("node reports non-zero latest height before chain is initialized") } diff --git a/go/oasis-test-runner/scenario/e2e/seed_api.go b/go/oasis-test-runner/scenario/e2e/seed_api.go index 8ee956962fc..a98d5192e6f 100644 --- a/go/oasis-test-runner/scenario/e2e/seed_api.go +++ b/go/oasis-test-runner/scenario/e2e/seed_api.go @@ -82,6 +82,9 @@ func (sc *seedAPI) Run(childEnv *env.Env) error { // nolint: gocyclo if err != nil { return fmt.Errorf("failed to get status for node: %w", err) } + if status.Consensus.Status != "Ready" { + return fmt.Errorf("seed node consensus status should be 'Ready', got: %s", status.Consensus.Status) + } if status.Consensus.LatestHeight != int64(0) { return fmt.Errorf("seed node latest height should be 0, got: %d", status.Consensus.LatestHeight) } diff --git a/go/worker/common/api/api.go b/go/worker/common/api/api.go index 7979e294382..65ee655a14e 100644 --- a/go/worker/common/api/api.go +++ b/go/worker/common/api/api.go @@ -7,6 +7,9 @@ import ( // Status is the common runtime worker status. type Status struct { + // Status is an concise status of the committee node. + Status string `json:"status"` + // ActiveVersion is the currently active version. ActiveVersion *version.Version `json:"active_version"` diff --git a/go/worker/common/committee/node.go b/go/worker/common/committee/node.go index 75020193622..4a11b363b91 100644 --- a/go/worker/common/committee/node.go +++ b/go/worker/common/committee/node.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "sync" + "sync/atomic" "time" "github.com/prometheus/client_golang/prometheus" @@ -174,6 +175,14 @@ type Node struct { hooks []NodeHooks + // Initialization steps. + consensusSynced uint32 + runtimeRegistryDescriptor uint32 + keymanagerAvailable uint32 + hostedRuntimeProvisioned uint32 + historyReindexingDone uint32 + workersInitialized uint32 + // Mutable and shared between nodes' workers. // Guarded by .CrossNode. CrossNode sync.Mutex @@ -187,6 +196,28 @@ type Node struct { logger *logging.Logger } +func (n *Node) getInitStatus() string { + if atomic.LoadUint32(&n.consensusSynced) == 0 { + return "Waiting for consensus sync" + } + if atomic.LoadUint32(&n.runtimeRegistryDescriptor) == 0 { + return "Waiting for runtime registry descriptor" + } + if atomic.LoadUint32(&n.keymanagerAvailable) == 0 { + return "Waiting for available key manager" + } + if atomic.LoadUint32(&n.hostedRuntimeProvisioned) == 0 { + return "Waiting for hosted runtime provision" + } + if atomic.LoadUint32(&n.historyReindexingDone) == 0 { + return "Waiting for history reindex" + } + if atomic.LoadUint32(&n.workersInitialized) == 0 { + return "Waiting for workers to initialize" + } + return "Ready" +} + // Name returns the service name. func (n *Node) Name() string { return "committee node" @@ -250,6 +281,8 @@ func (n *Node) GetStatus(ctx context.Context) (*api.Status, error) { defer n.CrossNode.Unlock() var status api.Status + status.Status = n.getInitStatus() + if n.CurrentBlock != nil { status.LatestRound = n.CurrentBlock.Header.Round status.LatestHeight = n.CurrentBlockHeight @@ -539,6 +572,7 @@ func (n *Node) worker() { case <-n.Consensus.Synced(): } n.logger.Info("consensus has finished initial synchronization") + atomic.StoreUint32(&n.consensusSynced, 1) // Wait for the runtime. rt, err := n.Runtime.ActiveDescriptor(n.ctx) @@ -548,6 +582,7 @@ func (n *Node) worker() { ) return } + atomic.StoreUint32(&n.runtimeRegistryDescriptor, 1) n.CurrentEpoch, err = n.Consensus.Beacon().GetEpoch(n.ctx, consensus.HeightLatest) if err != nil { @@ -582,6 +617,7 @@ func (n *Node) worker() { n.logger.Info("runtime has a key manager available") } + atomic.StoreUint32(&n.keymanagerAvailable, 1) // Start watching consensus blocks. consensusBlocks, consensusBlocksSub, err := n.Consensus.WatchBlocks(n.ctx) @@ -621,6 +657,7 @@ func (n *Node) worker() { ) return } + atomic.StoreUint32(&n.hostedRuntimeProvisioned, 1) hrtEventCh, hrtSub, err := hrt.WatchEvents(n.ctx) if err != nil { @@ -649,7 +686,9 @@ func (n *Node) worker() { // Perform initial hosted runtime version update to ensure we have something even in cases where // initial block processing fails for any reason. + n.CrossNode.Lock() n.updateHostedRuntimeVersionLocked() + n.CrossNode.Unlock() initialized := false for { @@ -671,6 +710,7 @@ func (n *Node) worker() { // history reindexing has been completed. if !initialized { n.logger.Debug("common worker is initialized") + atomic.StoreUint32(&n.historyReindexingDone, 1) close(n.initCh) initialized = true @@ -686,6 +726,7 @@ func (n *Node) worker() { } } n.logger.Debug("all child workers are initialized") + atomic.StoreUint32(&n.workersInitialized, 1) } // Received a block (annotated).