From 19db51aa21b0201537db8fa4e1b8a38109b12b6f Mon Sep 17 00:00:00 2001 From: otherview Date: Wed, 4 Sep 2024 16:29:27 +0100 Subject: [PATCH 01/17] Adding Health endpoint --- api/api.go | 6 ++++ api/health/health.go | 41 +++++++++++++++++++++++ api/health/types.go | 10 ++++++ api/node/node_test.go | 3 +- cmd/thor/main.go | 13 ++++++-- cmd/thor/node/node.go | 6 ++++ cmd/thor/solo/solo.go | 6 ++++ cmd/thor/solo/solo_test.go | 3 +- cmd/thor/utils.go | 5 +-- comm/communicator.go | 6 +++- health/health.go | 66 ++++++++++++++++++++++++++++++++++++++ 11 files changed, 158 insertions(+), 7 deletions(-) create mode 100644 api/health/health.go create mode 100644 api/health/types.go create mode 100644 health/health.go diff --git a/api/api.go b/api/api.go index ea33a0c16..d524f4b57 100644 --- a/api/api.go +++ b/api/api.go @@ -17,6 +17,7 @@ import ( "github.com/vechain/thor/v2/api/debug" "github.com/vechain/thor/v2/api/doc" "github.com/vechain/thor/v2/api/events" + "github.com/vechain/thor/v2/api/health" "github.com/vechain/thor/v2/api/node" "github.com/vechain/thor/v2/api/subscriptions" "github.com/vechain/thor/v2/api/transactions" @@ -28,6 +29,8 @@ import ( "github.com/vechain/thor/v2/state" "github.com/vechain/thor/v2/thor" "github.com/vechain/thor/v2/txpool" + + healthstatus "github.com/vechain/thor/v2/health" ) var logger = log.WithContext("pkg", "api") @@ -40,6 +43,7 @@ func New( logDB *logdb.LogDB, bft bft.Committer, nw node.Network, + healthStatus *healthstatus.Health, forkConfig thor.ForkConfig, allowedOrigins string, backtraceLimit uint32, @@ -74,6 +78,8 @@ func New( accounts.New(repo, stater, callGasLimit, forkConfig, bft). Mount(router, "/accounts") + health.New(healthStatus).Mount(router, "/health") + if !skipLogs { events.New(repo, logDB, logsLimit). Mount(router, "/logs/event") diff --git a/api/health/health.go b/api/health/health.go new file mode 100644 index 000000000..e0b047bbe --- /dev/null +++ b/api/health/health.go @@ -0,0 +1,41 @@ +// Copyright (c) 2024 The VeChainThor developers + +// Distributed under the GNU Lesser General Public License v3.0 software license, see the accompanying +// file LICENSE or + +package health + +import ( + "net/http" + + "github.com/gorilla/mux" + "github.com/vechain/thor/v2/api/utils" + "github.com/vechain/thor/v2/health" +) + +type Health struct { + healthStatus *health.Health +} + +func New(healthStatus *health.Health) *Health { + return &Health{ + healthStatus: healthStatus, + } +} + +func (h *Health) handleGetHealth(w http.ResponseWriter, req *http.Request) error { + acc, err := h.healthStatus.Status() + if err != nil { + return err + } + return utils.WriteJSON(w, acc) +} + +func (h *Health) Mount(root *mux.Router, pathPrefix string) { + sub := root.PathPrefix(pathPrefix).Subrouter() + + sub.Path("/"). + Methods(http.MethodGet). + Name("health"). + HandlerFunc(utils.WrapHandlerFunc(h.handleGetHealth)) +} diff --git a/api/health/types.go b/api/health/types.go new file mode 100644 index 000000000..6646ecad9 --- /dev/null +++ b/api/health/types.go @@ -0,0 +1,10 @@ +// Copyright (c) 2024 The VeChainThor developers + +// Distributed under the GNU Lesser General Public License v3.0 software license, see the accompanying +// file LICENSE or + +package health + +type Response struct { + Healthy bool `json:"healthy"` +} diff --git a/api/node/node_test.go b/api/node/node_test.go index 90857d0d0..520633322 100644 --- a/api/node/node_test.go +++ b/api/node/node_test.go @@ -18,6 +18,7 @@ import ( "github.com/vechain/thor/v2/chain" "github.com/vechain/thor/v2/comm" "github.com/vechain/thor/v2/genesis" + "github.com/vechain/thor/v2/health" "github.com/vechain/thor/v2/muxdb" "github.com/vechain/thor/v2/state" "github.com/vechain/thor/v2/txpool" @@ -49,7 +50,7 @@ func initCommServer(t *testing.T) { Limit: 10000, LimitPerAccount: 16, MaxLifetime: 10 * time.Minute, - })) + }), &health.Health{}) router := mux.NewRouter() node.New(comm).Mount(router, "/node") ts = httptest.NewServer(router) diff --git a/cmd/thor/main.go b/cmd/thor/main.go index a165b9d06..95d662dc9 100644 --- a/cmd/thor/main.go +++ b/cmd/thor/main.go @@ -25,6 +25,7 @@ import ( "github.com/vechain/thor/v2/cmd/thor/optimizer" "github.com/vechain/thor/v2/cmd/thor/solo" "github.com/vechain/thor/v2/genesis" + "github.com/vechain/thor/v2/health" "github.com/vechain/thor/v2/log" "github.com/vechain/thor/v2/logdb" "github.com/vechain/thor/v2/metrics" @@ -222,6 +223,7 @@ func defaultAction(ctx *cli.Context) error { return err } + healthStatus := &health.Health{} printStartupMessage1(gene, repo, master, instanceDir, forkConfig) if !skipLogs { @@ -238,7 +240,7 @@ func defaultAction(ctx *cli.Context) error { txPool := txpool.New(repo, state.NewStater(mainDB), txpoolOpt) defer func() { log.Info("closing tx pool..."); txPool.Close() }() - p2pCommunicator, err := newP2PCommunicator(ctx, repo, txPool, instanceDir) + p2pCommunicator, err := newP2PCommunicator(ctx, repo, txPool, instanceDir, healthStatus) if err != nil { return err } @@ -255,6 +257,7 @@ func defaultAction(ctx *cli.Context) error { logDB, bftEngine, p2pCommunicator.Communicator(), + healthStatus, forkConfig, ctx.String(apiCorsFlag.Name), uint32(ctx.Uint64(apiBacktraceLimitFlag.Name)), @@ -297,7 +300,9 @@ func defaultAction(ctx *cli.Context) error { p2pCommunicator.Communicator(), ctx.Uint64(targetGasLimitFlag.Name), skipLogs, - forkConfig).Run(exitSignal) + forkConfig, + healthStatus, + ).Run(exitSignal) } func soloAction(ctx *cli.Context) error { @@ -399,6 +404,8 @@ func soloAction(ctx *cli.Context) error { defer func() { log.Info("closing tx pool..."); txPool.Close() }() bftEngine := solo.NewBFTEngine(repo) + healthStatus := &health.Health{} + apiHandler, apiCloser := api.New( repo, state.NewStater(mainDB), @@ -406,6 +413,7 @@ func soloAction(ctx *cli.Context) error { logDB, bftEngine, &solo.Communicator{}, + healthStatus, forkConfig, ctx.String(apiCorsFlag.Name), uint32(ctx.Uint64(apiBacktraceLimitFlag.Name)), @@ -443,6 +451,7 @@ func soloAction(ctx *cli.Context) error { return solo.New(repo, state.NewStater(mainDB), logDB, + healthStatus, txPool, ctx.Uint64(gasLimitFlag.Name), ctx.Bool(onDemandFlag.Name), diff --git a/cmd/thor/node/node.go b/cmd/thor/node/node.go index 663ff185f..7bbe2669e 100644 --- a/cmd/thor/node/node.go +++ b/cmd/thor/node/node.go @@ -26,6 +26,7 @@ import ( "github.com/vechain/thor/v2/co" "github.com/vechain/thor/v2/comm" "github.com/vechain/thor/v2/consensus" + "github.com/vechain/thor/v2/health" "github.com/vechain/thor/v2/log" "github.com/vechain/thor/v2/logdb" "github.com/vechain/thor/v2/packer" @@ -64,6 +65,8 @@ type Node struct { maxBlockNum uint32 processLock sync.Mutex logWorker *worker + + health *health.Health } func New( @@ -78,6 +81,7 @@ func New( targetGasLimit uint64, skipLogs bool, forkConfig thor.ForkConfig, + health *health.Health, ) *Node { return &Node{ packer: packer.New(repo, stater, master.Address(), master.Beneficiary, forkConfig), @@ -92,6 +96,7 @@ func New( targetGasLimit: targetGasLimit, skipLogs: skipLogs, forkConfig: forkConfig, + health: health, } } @@ -387,6 +392,7 @@ func (n *Node) processBlock(newBlock *block.Block, stats *blockStats) (bool, err return err } n.processFork(newBlock, oldBest.Header.ID()) + n.health.NewBestBlock(newBlock.Header().ID()) } commitElapsed := mclock.Now() - startTime - execElapsed diff --git a/cmd/thor/solo/solo.go b/cmd/thor/solo/solo.go index 43dd644a1..6a20579a4 100644 --- a/cmd/thor/solo/solo.go +++ b/cmd/thor/solo/solo.go @@ -23,6 +23,7 @@ import ( "github.com/vechain/thor/v2/cmd/thor/bandwidth" "github.com/vechain/thor/v2/co" "github.com/vechain/thor/v2/genesis" + "github.com/vechain/thor/v2/health" "github.com/vechain/thor/v2/log" "github.com/vechain/thor/v2/logdb" "github.com/vechain/thor/v2/packer" @@ -44,6 +45,7 @@ type Solo struct { txPool *txpool.TxPool packer *packer.Packer logDB *logdb.LogDB + health *health.Health gasLimit uint64 bandwidth bandwidth.Bandwidth blockInterval uint64 @@ -56,6 +58,7 @@ func New( repo *chain.Repository, stater *state.Stater, logDB *logdb.LogDB, + health *health.Health, txPool *txpool.TxPool, gasLimit uint64, onDemand bool, @@ -74,6 +77,7 @@ func New( &genesis.DevAccounts()[0].Address, forkConfig), logDB: logDB, + health: health, gasLimit: gasLimit, blockInterval: blockInterval, skipLogs: skipLogs, @@ -211,6 +215,8 @@ func (s *Solo) packing(pendingTxs tx.Transactions, onDemand bool) error { ) logger.Debug(b.String()) + s.health.NewBestBlock(b.Header().ID()) + return nil } diff --git a/cmd/thor/solo/solo_test.go b/cmd/thor/solo/solo_test.go index a4df3f35d..ac30f4980 100644 --- a/cmd/thor/solo/solo_test.go +++ b/cmd/thor/solo/solo_test.go @@ -14,6 +14,7 @@ import ( "github.com/vechain/thor/v2/builtin" "github.com/vechain/thor/v2/chain" "github.com/vechain/thor/v2/genesis" + "github.com/vechain/thor/v2/health" "github.com/vechain/thor/v2/logdb" "github.com/vechain/thor/v2/muxdb" "github.com/vechain/thor/v2/state" @@ -30,7 +31,7 @@ func newSolo() *Solo { repo, _ := chain.NewRepository(db, b) mempool := txpool.New(repo, stater, txpool.Options{Limit: 10000, LimitPerAccount: 16, MaxLifetime: 10 * time.Minute}) - return New(repo, stater, logDb, mempool, 0, true, false, thor.BlockInterval, thor.ForkConfig{}) + return New(repo, stater, logDb, &health.Health{}, mempool, 0, true, false, thor.BlockInterval, thor.ForkConfig{}) } func TestInitSolo(t *testing.T) { diff --git a/cmd/thor/utils.go b/cmd/thor/utils.go index fbbecfc90..763adcae4 100644 --- a/cmd/thor/utils.go +++ b/cmd/thor/utils.go @@ -44,6 +44,7 @@ import ( "github.com/vechain/thor/v2/co" "github.com/vechain/thor/v2/comm" "github.com/vechain/thor/v2/genesis" + "github.com/vechain/thor/v2/health" "github.com/vechain/thor/v2/log" "github.com/vechain/thor/v2/logdb" "github.com/vechain/thor/v2/muxdb" @@ -489,7 +490,7 @@ func loadNodeMaster(ctx *cli.Context) (*node.Master, error) { return master, nil } -func newP2PCommunicator(ctx *cli.Context, repo *chain.Repository, txPool *txpool.TxPool, instanceDir string) (*p2p.P2P, error) { +func newP2PCommunicator(ctx *cli.Context, repo *chain.Repository, txPool *txpool.TxPool, instanceDir string, health *health.Health) (*p2p.P2P, error) { // known peers will be loaded/stored from/in this file peersCachePath := filepath.Join(instanceDir, "peers.cache") @@ -529,7 +530,7 @@ func newP2PCommunicator(ctx *cli.Context, repo *chain.Repository, txPool *txpool } return p2p.New( - comm.New(repo, txPool), + comm.New(repo, txPool, health), key, instanceDir, userNAT, diff --git a/comm/communicator.go b/comm/communicator.go index 4bdd1e2b2..6ec125533 100644 --- a/comm/communicator.go +++ b/comm/communicator.go @@ -20,6 +20,7 @@ import ( "github.com/vechain/thor/v2/chain" "github.com/vechain/thor/v2/co" "github.com/vechain/thor/v2/comm/proto" + "github.com/vechain/thor/v2/health" "github.com/vechain/thor/v2/log" "github.com/vechain/thor/v2/thor" "github.com/vechain/thor/v2/tx" @@ -39,12 +40,13 @@ type Communicator struct { newBlockFeed event.Feed announcementCh chan *announcement feedScope event.SubscriptionScope + health *health.Health goes co.Goes onceSynced sync.Once } // New create a new Communicator instance. -func New(repo *chain.Repository, txPool *txpool.TxPool) *Communicator { +func New(repo *chain.Repository, txPool *txpool.TxPool, health *health.Health) *Communicator { ctx, cancel := context.WithCancel(context.Background()) return &Communicator{ repo: repo, @@ -52,6 +54,7 @@ func New(repo *chain.Repository, txPool *txpool.TxPool) *Communicator { ctx: ctx, cancel: cancel, peerSet: newPeerSet(), + health: health, syncedCh: make(chan struct{}), announcementCh: make(chan *announcement), } @@ -118,6 +121,7 @@ func (c *Communicator) Sync(ctx context.Context, handler HandleBlockStream) { if shouldSynced() { delay = syncInterval c.onceSynced.Do(func() { + c.health.ChainSynced() close(c.syncedCh) }) } diff --git a/health/health.go b/health/health.go new file mode 100644 index 000000000..56252ae01 --- /dev/null +++ b/health/health.go @@ -0,0 +1,66 @@ +// Copyright (c) 2024 The VeChainThor developers + +// Distributed under the GNU Lesser General Public License v3.0 software license, see the accompanying +// file LICENSE or + +package health + +import ( + "sync" + "time" + + "github.com/vechain/thor/v2/thor" +) + +type BlockIngestion struct { + BestBlock *thor.Bytes32 `json:"bestBlock"` + BestBlockTimestamp *time.Time `json:"bestBlockTimestamp"` +} + +type Status struct { + Healthy bool `json:"healthy"` + BlockIngestion *BlockIngestion `json:"blockIngestion"` + ChainSync bool `json:"chainSync"` +} + +type Health struct { + lock sync.RWMutex + newBestBlock time.Time + bestBlockID *thor.Bytes32 + chainSynced bool +} + +func (h *Health) NewBestBlock(ID thor.Bytes32) { + h.lock.Lock() + defer h.lock.Unlock() + + h.newBestBlock = time.Now() + h.bestBlockID = &ID +} + +func (h *Health) Status() (*Status, error) { + h.lock.RLock() + defer h.lock.RUnlock() + + blockIngest := &BlockIngestion{ + BestBlock: h.bestBlockID, + BestBlockTimestamp: &h.newBestBlock, + } + + // todo review time slots + healthy := time.Since(h.newBestBlock) >= 10*time.Second && + h.chainSynced + + return &Status{ + Healthy: healthy, + BlockIngestion: blockIngest, + ChainSync: h.chainSynced, + }, nil +} + +func (h *Health) ChainSynced() { + h.lock.Lock() + defer h.lock.Unlock() + + h.chainSynced = true +} From df30451451194df1a6f8814aa1579efbb5f4dace Mon Sep 17 00:00:00 2001 From: otherview Date: Tue, 29 Oct 2024 10:48:43 +0000 Subject: [PATCH 02/17] pr comments + 503 if not healthy --- api/health/health.go | 8 +++++++- comm/communicator.go | 3 ++- health/health.go | 12 ++++++------ 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/api/health/health.go b/api/health/health.go index e0b047bbe..f7d0b0c73 100644 --- a/api/health/health.go +++ b/api/health/health.go @@ -23,11 +23,17 @@ func New(healthStatus *health.Health) *Health { } } -func (h *Health) handleGetHealth(w http.ResponseWriter, req *http.Request) error { +func (h *Health) handleGetHealth(w http.ResponseWriter, _ *http.Request) error { acc, err := h.healthStatus.Status() if err != nil { return err } + + if !acc.Healthy { + w.WriteHeader(http.StatusServiceUnavailable) // Set the status to 503 + } else { + w.WriteHeader(http.StatusOK) // Set the status to 200 + } return utils.WriteJSON(w, acc) } diff --git a/comm/communicator.go b/comm/communicator.go index 8a2656363..7fbef2e22 100644 --- a/comm/communicator.go +++ b/comm/communicator.go @@ -119,9 +119,10 @@ func (c *Communicator) Sync(ctx context.Context, handler HandleBlockStream) { syncCount++ if shouldSynced() { + c.health.ChainSyncStatus(false) delay = syncInterval c.onceSynced.Do(func() { - c.health.ChainSynced() + c.health.ChainSyncStatus(true) close(c.syncedCh) }) } diff --git a/health/health.go b/health/health.go index 56252ae01..7d39a94ce 100644 --- a/health/health.go +++ b/health/health.go @@ -13,8 +13,8 @@ import ( ) type BlockIngestion struct { - BestBlock *thor.Bytes32 `json:"bestBlock"` - BestBlockTimestamp *time.Time `json:"bestBlockTimestamp"` + BestBlock *thor.Bytes32 `json:"bestBlock"` + BestBlockIngestionTimestamp *time.Time `json:"bestBlockIngestionTimestamp"` } type Status struct { @@ -43,8 +43,8 @@ func (h *Health) Status() (*Status, error) { defer h.lock.RUnlock() blockIngest := &BlockIngestion{ - BestBlock: h.bestBlockID, - BestBlockTimestamp: &h.newBestBlock, + BestBlock: h.bestBlockID, + BestBlockIngestionTimestamp: &h.newBestBlock, } // todo review time slots @@ -58,9 +58,9 @@ func (h *Health) Status() (*Status, error) { }, nil } -func (h *Health) ChainSynced() { +func (h *Health) ChainSyncStatus(syncStatus bool) { h.lock.Lock() defer h.lock.Unlock() - h.chainSynced = true + h.chainSynced = syncStatus } From ed0097c3cc024ccfe7d17676f2779b29a5288efb Mon Sep 17 00:00:00 2001 From: otherview Date: Wed, 30 Oct 2024 13:05:02 +0000 Subject: [PATCH 03/17] refactored admin server and api + health endpoint tests --- api/accounts/accounts_test.go | 44 ---------------- api/admin/admin.go | 30 +++++++++++ api/{ => admin}/health/health.go | 2 +- api/admin/health/health_test.go | 52 +++++++++++++++++++ api/{ => admin}/health/types.go | 0 api/{admin.go => admin/loglevel/log_level.go} | 32 +++++++++--- .../loglevel/log_level_test.go} | 11 ++-- api/admin/loglevel/types.go | 14 +++++ api/admin_server.go | 29 ++--------- api/api.go | 6 --- api/blocks/blocks_test.go | 14 ----- api/events/events_test.go | 19 ------- api/node/node_test.go | 15 ------ cmd/thor/main.go | 10 ++-- 14 files changed, 137 insertions(+), 141 deletions(-) create mode 100644 api/admin/admin.go rename api/{ => admin}/health/health.go (98%) create mode 100644 api/admin/health/health_test.go rename api/{ => admin}/health/types.go (100%) rename api/{admin.go => admin/loglevel/log_level.go} (65%) rename api/{admin_test.go => admin/loglevel/log_level_test.go} (93%) create mode 100644 api/admin/loglevel/types.go diff --git a/api/accounts/accounts_test.go b/api/accounts/accounts_test.go index cd4aac3cb..e126960b2 100644 --- a/api/accounts/accounts_test.go +++ b/api/accounts/accounts_test.go @@ -6,10 +6,8 @@ package accounts_test import ( - "bytes" "encoding/json" "fmt" - "io" "math/big" "net/http" "net/http/httptest" @@ -578,45 +576,3 @@ func batchCallWithNonExistingRevision(t *testing.T) { assert.Equal(t, http.StatusBadRequest, statusCode, "bad revision") assert.Equal(t, "revision: leveldb: not found\n", string(res), "revision not found") } - -func httpPost(t *testing.T, url string, body interface{}) ([]byte, int) { - data, err := json.Marshal(body) - if err != nil { - t.Fatal(err) - } - res, err := http.Post(url, "application/x-www-form-urlencoded", bytes.NewReader(data)) //#nosec G107 - if err != nil { - t.Fatal(err) - } - r, err := io.ReadAll(res.Body) - res.Body.Close() - if err != nil { - t.Fatal(err) - } - return r, res.StatusCode -} - -func httpGet(t *testing.T, url string) ([]byte, int) { - res, err := http.Get(url) //#nosec G107 - if err != nil { - t.Fatal(err) - } - r, err := io.ReadAll(res.Body) - res.Body.Close() - if err != nil { - t.Fatal(err) - } - return r, res.StatusCode -} - -func httpGetAccount(t *testing.T, path string) *accounts.Account { - res, statusCode := httpGet(t, ts.URL+"/accounts/"+path) - var acc accounts.Account - if err := json.Unmarshal(res, &acc); err != nil { - t.Fatal(err) - } - - assert.Equal(t, http.StatusOK, statusCode, "get account failed") - - return &acc -} diff --git a/api/admin/admin.go b/api/admin/admin.go new file mode 100644 index 000000000..95673aa45 --- /dev/null +++ b/api/admin/admin.go @@ -0,0 +1,30 @@ +// Copyright (c) 2024 The VeChainThor developers + +// Distributed under the GNU Lesser General Public License v3.0 software license, see the accompanying +// file LICENSE or + +package admin + +import ( + "log/slog" + "net/http" + + "github.com/gorilla/handlers" + "github.com/gorilla/mux" + "github.com/vechain/thor/v2/api/admin/loglevel" + "github.com/vechain/thor/v2/health" + + healthAPI "github.com/vechain/thor/v2/api/admin/health" +) + +func New(logLevel *slog.LevelVar, health *health.Health) http.HandlerFunc { + router := mux.NewRouter() + router.PathPrefix("/admin") + + loglevel.New(logLevel).Mount(router, "/loglevel") + healthAPI.New(health).Mount(router, "/health") + + handler := handlers.CompressHandler(router) + + return handler.ServeHTTP +} diff --git a/api/health/health.go b/api/admin/health/health.go similarity index 98% rename from api/health/health.go rename to api/admin/health/health.go index f7d0b0c73..44de46095 100644 --- a/api/health/health.go +++ b/api/admin/health/health.go @@ -40,7 +40,7 @@ func (h *Health) handleGetHealth(w http.ResponseWriter, _ *http.Request) error { func (h *Health) Mount(root *mux.Router, pathPrefix string) { sub := root.PathPrefix(pathPrefix).Subrouter() - sub.Path("/"). + sub.Path(""). Methods(http.MethodGet). Name("health"). HandlerFunc(utils.WrapHandlerFunc(h.handleGetHealth)) diff --git a/api/admin/health/health_test.go b/api/admin/health/health_test.go new file mode 100644 index 000000000..193fe688b --- /dev/null +++ b/api/admin/health/health_test.go @@ -0,0 +1,52 @@ +// Copyright (c) 2024 The VeChainThor developers + +// Distributed under the GNU Lesser General Public License v3.0 software license, see the accompanying +// file LICENSE or + +package health + +import ( + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "testing" + + "github.com/gorilla/mux" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/vechain/thor/v2/health" +) + +var ts *httptest.Server + +func TestHealth(t *testing.T) { + initAPIServer(t) + + var healthStatus health.Status + respBody, statusCode := httpGet(t, ts.URL+"/health") + require.NoError(t, json.Unmarshal(respBody, &healthStatus)) + assert.False(t, healthStatus.Healthy) + assert.Equal(t, http.StatusServiceUnavailable, statusCode) +} + +func initAPIServer(_ *testing.T) { + router := mux.NewRouter() + New(&health.Health{}).Mount(router, "/health") + + ts = httptest.NewServer(router) +} + +func httpGet(t *testing.T, url string) ([]byte, int) { + res, err := http.Get(url) //#nosec G107 + if err != nil { + t.Fatal(err) + } + defer res.Body.Close() + + r, err := io.ReadAll(res.Body) + if err != nil { + t.Fatal(err) + } + return r, res.StatusCode +} diff --git a/api/health/types.go b/api/admin/health/types.go similarity index 100% rename from api/health/types.go rename to api/admin/health/types.go diff --git a/api/admin.go b/api/admin/loglevel/log_level.go similarity index 65% rename from api/admin.go rename to api/admin/loglevel/log_level.go index afd299cfa..27b9a04fa 100644 --- a/api/admin.go +++ b/api/admin/loglevel/log_level.go @@ -3,28 +3,44 @@ // Distributed under the GNU Lesser General Public License v3.0 software license, see the accompanying // file LICENSE or -package api +package loglevel import ( "log/slog" "net/http" + "github.com/gorilla/mux" "github.com/pkg/errors" "github.com/vechain/thor/v2/api/utils" "github.com/vechain/thor/v2/log" ) -type logLevelRequest struct { - Level string `json:"level"` +type LogLevel struct { + logLevel *slog.LevelVar } -type logLevelResponse struct { - CurrentLevel string `json:"currentLevel"` +func New(logLevel *slog.LevelVar) *LogLevel { + return &LogLevel{ + logLevel: logLevel, + } +} + +func (l *LogLevel) Mount(root *mux.Router, pathPrefix string) { + sub := root.PathPrefix(pathPrefix).Subrouter() + sub.Path(""). + Methods(http.MethodGet). + Name("get-log-level"). + HandlerFunc(utils.WrapHandlerFunc(getLogLevelHandler(l.logLevel))) + + sub.Path(""). + Methods(http.MethodPost). + Name("post-log-level"). + HandlerFunc(utils.WrapHandlerFunc(postLogLevelHandler(l.logLevel))) } func getLogLevelHandler(logLevel *slog.LevelVar) utils.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) error { - return utils.WriteJSON(w, logLevelResponse{ + return utils.WriteJSON(w, Response{ CurrentLevel: logLevel.Level().String(), }) } @@ -32,7 +48,7 @@ func getLogLevelHandler(logLevel *slog.LevelVar) utils.HandlerFunc { func postLogLevelHandler(logLevel *slog.LevelVar) utils.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) error { - var req logLevelRequest + var req Request if err := utils.ParseJSON(r.Body, &req); err != nil { return utils.BadRequest(errors.WithMessage(err, "Invalid request body")) @@ -55,7 +71,7 @@ func postLogLevelHandler(logLevel *slog.LevelVar) utils.HandlerFunc { return utils.BadRequest(errors.New("Invalid verbosity level")) } - return utils.WriteJSON(w, logLevelResponse{ + return utils.WriteJSON(w, Response{ CurrentLevel: logLevel.Level().String(), }) } diff --git a/api/admin_test.go b/api/admin/loglevel/log_level_test.go similarity index 93% rename from api/admin_test.go rename to api/admin/loglevel/log_level_test.go index be2847cbf..d04320ce3 100644 --- a/api/admin_test.go +++ b/api/admin/loglevel/log_level_test.go @@ -3,7 +3,7 @@ // Distributed under the GNU Lesser General Public License v3.0 software license, see the accompanying // file LICENSE or -package api +package loglevel import ( "bytes" @@ -14,6 +14,8 @@ import ( "strings" "testing" + "github.com/gorilla/mux" + "github.com/stretchr/testify/assert" ) @@ -76,15 +78,16 @@ func TestLogLevelHandler(t *testing.T) { } rr := httptest.NewRecorder() - handler := http.HandlerFunc(HTTPHandler(&logLevel).ServeHTTP) - handler.ServeHTTP(rr, req) + router := mux.NewRouter() + New(&logLevel).Mount(router, "/admin/loglevel") + router.ServeHTTP(rr, req) if status := rr.Code; status != tt.expectedStatus { t.Errorf("handler returned wrong status code: got %v want %v", status, tt.expectedStatus) } if tt.expectedLevel != "" { - var response logLevelResponse + var response Response if err := json.NewDecoder(rr.Body).Decode(&response); err != nil { t.Fatalf("could not decode response: %v", err) } diff --git a/api/admin/loglevel/types.go b/api/admin/loglevel/types.go new file mode 100644 index 000000000..ce57187b1 --- /dev/null +++ b/api/admin/loglevel/types.go @@ -0,0 +1,14 @@ +// Copyright (c) 2024 The VeChainThor developers + +// Distributed under the GNU Lesser General Public License v3.0 software license, see the accompanying +// file LICENSE or + +package loglevel + +type Request struct { + Level string `json:"level"` +} + +type Response struct { + CurrentLevel string `json:"currentLevel"` +} diff --git a/api/admin_server.go b/api/admin_server.go index 26054e908..95ff613ab 100644 --- a/api/admin_server.go +++ b/api/admin_server.go @@ -11,40 +11,21 @@ import ( "net/http" "time" - "github.com/gorilla/handlers" - "github.com/gorilla/mux" "github.com/pkg/errors" - "github.com/vechain/thor/v2/api/utils" + "github.com/vechain/thor/v2/api/admin" "github.com/vechain/thor/v2/co" + "github.com/vechain/thor/v2/health" ) -func HTTPHandler(logLevel *slog.LevelVar) http.Handler { - router := mux.NewRouter() - sub := router.PathPrefix("/admin").Subrouter() - sub.Path("/loglevel"). - Methods(http.MethodGet). - Name("get-log-level"). - HandlerFunc(utils.WrapHandlerFunc(getLogLevelHandler(logLevel))) - - sub.Path("/loglevel"). - Methods(http.MethodPost). - Name("post-log-level"). - HandlerFunc(utils.WrapHandlerFunc(postLogLevelHandler(logLevel))) - - return handlers.CompressHandler(router) -} - -func StartAdminServer(addr string, logLevel *slog.LevelVar) (string, func(), error) { +func StartAdminServer(addr string, logLevel *slog.LevelVar, health *health.Health) (string, func(), error) { listener, err := net.Listen("tcp", addr) if err != nil { return "", nil, errors.Wrapf(err, "listen admin API addr [%v]", addr) } - router := mux.NewRouter() - router.PathPrefix("/admin").Handler(HTTPHandler(logLevel)) - handler := handlers.CompressHandler(router) + adminHandler := admin.New(logLevel, health) - srv := &http.Server{Handler: handler, ReadHeaderTimeout: time.Second, ReadTimeout: 5 * time.Second} + srv := &http.Server{Handler: adminHandler, ReadHeaderTimeout: time.Second, ReadTimeout: 5 * time.Second} var goes co.Goes goes.Go(func() { srv.Serve(listener) diff --git a/api/api.go b/api/api.go index 4d02c152d..38b412a97 100644 --- a/api/api.go +++ b/api/api.go @@ -17,7 +17,6 @@ import ( "github.com/vechain/thor/v2/api/debug" "github.com/vechain/thor/v2/api/doc" "github.com/vechain/thor/v2/api/events" - "github.com/vechain/thor/v2/api/health" "github.com/vechain/thor/v2/api/node" "github.com/vechain/thor/v2/api/subscriptions" "github.com/vechain/thor/v2/api/transactions" @@ -29,8 +28,6 @@ import ( "github.com/vechain/thor/v2/state" "github.com/vechain/thor/v2/thor" "github.com/vechain/thor/v2/txpool" - - healthstatus "github.com/vechain/thor/v2/health" ) var logger = log.WithContext("pkg", "api") @@ -43,7 +40,6 @@ func New( logDB *logdb.LogDB, bft bft.Committer, nw node.Network, - healthStatus *healthstatus.Health, forkConfig thor.ForkConfig, allowedOrigins string, backtraceLimit uint32, @@ -78,8 +74,6 @@ func New( accounts.New(repo, stater, callGasLimit, forkConfig, bft). Mount(router, "/accounts") - health.New(healthStatus).Mount(router, "/health") - if !skipLogs { events.New(repo, logDB, logsLimit). Mount(router, "/logs/event") diff --git a/api/blocks/blocks_test.go b/api/blocks/blocks_test.go index 60b96b299..a203cadd6 100644 --- a/api/blocks/blocks_test.go +++ b/api/blocks/blocks_test.go @@ -7,7 +7,6 @@ package blocks_test import ( "encoding/json" - "io" "math" "math/big" "net/http" @@ -267,16 +266,3 @@ func checkExpandedBlock(t *testing.T, expBl *block.Block, actBl *blocks.JSONExpa assert.Equal(t, tx.ID(), actBl.Transactions[i].ID, "txid should be equal") } } - -func httpGet(t *testing.T, url string) ([]byte, int) { - res, err := http.Get(url) //#nosec G107 - if err != nil { - t.Fatal(err) - } - r, err := io.ReadAll(res.Body) - res.Body.Close() - if err != nil { - t.Fatal(err) - } - return r, res.StatusCode -} diff --git a/api/events/events_test.go b/api/events/events_test.go index 9f859780b..3d28e41dc 100644 --- a/api/events/events_test.go +++ b/api/events/events_test.go @@ -6,9 +6,7 @@ package events_test import ( - "bytes" "encoding/json" - "io" "net/http" "net/http/httptest" "strings" @@ -212,23 +210,6 @@ func createDb(t *testing.T) *logdb.LogDB { } // Utilities functions -func httpPost(t *testing.T, url string, body interface{}) ([]byte, int) { - data, err := json.Marshal(body) - if err != nil { - t.Fatal(err) - } - res, err := http.Post(url, "application/x-www-form-urlencoded", bytes.NewReader(data)) //#nosec G107 - if err != nil { - t.Fatal(err) - } - r, err := io.ReadAll(res.Body) - res.Body.Close() - if err != nil { - t.Fatal(err) - } - return r, res.StatusCode -} - func insertBlocks(t *testing.T, db *logdb.LogDB, n int) { b := new(block.Builder).Build() for i := 0; i < n; i++ { diff --git a/api/node/node_test.go b/api/node/node_test.go index 6e324efcb..58b1c86a5 100644 --- a/api/node/node_test.go +++ b/api/node/node_test.go @@ -5,8 +5,6 @@ package node_test import ( - "io" - "net/http" "net/http/httptest" "testing" "time" @@ -55,16 +53,3 @@ func initCommServer(t *testing.T) { node.New(comm).Mount(router, "/node") ts = httptest.NewServer(router) } - -func httpGet(t *testing.T, url string) []byte { - res, err := http.Get(url) //#nosec G107 - if err != nil { - t.Fatal(err) - } - r, err := io.ReadAll(res.Body) - res.Body.Close() - if err != nil { - t.Fatal(err) - } - return r -} diff --git a/cmd/thor/main.go b/cmd/thor/main.go index 2997e8ea7..ba34f5543 100644 --- a/cmd/thor/main.go +++ b/cmd/thor/main.go @@ -167,6 +167,7 @@ func defaultAction(ctx *cli.Context) error { return errors.Wrap(err, "parse verbosity flag") } logLevel := initLogger(lvl, ctx.Bool(jsonLogsFlag.Name)) + healthStatus := &health.Health{} // enable metrics as soon as possible metricsURL := "" @@ -182,7 +183,7 @@ func defaultAction(ctx *cli.Context) error { adminURL := "" if ctx.Bool(enableAdminFlag.Name) { - url, closeFunc, err := api.StartAdminServer(ctx.String(adminAddrFlag.Name), logLevel) + url, closeFunc, err := api.StartAdminServer(ctx.String(adminAddrFlag.Name), logLevel, healthStatus) if err != nil { return fmt.Errorf("unable to start admin server - %w", err) } @@ -221,7 +222,6 @@ func defaultAction(ctx *cli.Context) error { return err } - healthStatus := &health.Health{} printStartupMessage1(gene, repo, master, instanceDir, forkConfig) skipLogs := ctx.Bool(skipLogsFlag.Name) @@ -256,7 +256,6 @@ func defaultAction(ctx *cli.Context) error { logDB, bftEngine, p2pCommunicator.Communicator(), - healthStatus, forkConfig, ctx.String(apiCorsFlag.Name), uint32(ctx.Uint64(apiBacktraceLimitFlag.Name)), @@ -314,6 +313,7 @@ func soloAction(ctx *cli.Context) error { } logLevel := initLogger(lvl, ctx.Bool(jsonLogsFlag.Name)) + healthStatus := &health.Health{} // enable metrics as soon as possible metricsURL := "" @@ -329,7 +329,7 @@ func soloAction(ctx *cli.Context) error { adminURL := "" if ctx.Bool(enableAdminFlag.Name) { - url, closeFunc, err := api.StartAdminServer(ctx.String(adminAddrFlag.Name), logLevel) + url, closeFunc, err := api.StartAdminServer(ctx.String(adminAddrFlag.Name), logLevel, healthStatus) if err != nil { return fmt.Errorf("unable to start admin server - %w", err) } @@ -404,7 +404,6 @@ func soloAction(ctx *cli.Context) error { defer func() { log.Info("closing tx pool..."); txPool.Close() }() bftEngine := solo.NewBFTEngine(repo) - healthStatus := &health.Health{} apiHandler, apiCloser := api.New( repo, @@ -413,7 +412,6 @@ func soloAction(ctx *cli.Context) error { logDB, bftEngine, &solo.Communicator{}, - healthStatus, forkConfig, ctx.String(apiCorsFlag.Name), uint32(ctx.Uint64(apiBacktraceLimitFlag.Name)), From 552629c009e286dfb22abd427e09ee42e70a6cd0 Mon Sep 17 00:00:00 2001 From: otherview Date: Wed, 30 Oct 2024 14:55:53 +0000 Subject: [PATCH 04/17] fix health condition --- health/health.go | 3 +- health/health_test.go | 85 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 2 deletions(-) create mode 100644 health/health_test.go diff --git a/health/health.go b/health/health.go index 7d39a94ce..edfec270d 100644 --- a/health/health.go +++ b/health/health.go @@ -47,8 +47,7 @@ func (h *Health) Status() (*Status, error) { BestBlockIngestionTimestamp: &h.newBestBlock, } - // todo review time slots - healthy := time.Since(h.newBestBlock) >= 10*time.Second && + healthy := time.Since(h.newBestBlock) <= 10*time.Second && // less than 10 secs have passed since a new block was received h.chainSynced return &Status{ diff --git a/health/health_test.go b/health/health_test.go new file mode 100644 index 000000000..30fd266c7 --- /dev/null +++ b/health/health_test.go @@ -0,0 +1,85 @@ +// Copyright (c) 2024 The VeChainThor developers + +// Distributed under the GNU Lesser General Public License v3.0 software license, see the accompanying +// file LICENSE or + +package health + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/vechain/thor/v2/thor" +) + +func TestHealth_NewBestBlock(t *testing.T) { + h := &Health{} + blockID := thor.Bytes32{0x01, 0x02, 0x03} + + h.NewBestBlock(blockID) + + if h.bestBlockID == nil || *h.bestBlockID != blockID { + t.Errorf("expected bestBlockID to be %v, got %v", blockID, h.bestBlockID) + } + + if time.Since(h.newBestBlock) > time.Second { + t.Errorf("newBestBlock timestamp is not recent") + } + + h.ChainSyncStatus(true) + + status, err := h.Status() + require.NoError(t, err) + + assert.True(t, status.Healthy) +} + +func TestHealth_ChainSyncStatus(t *testing.T) { + h := &Health{} + + h.ChainSyncStatus(true) + if !h.chainSynced { + t.Errorf("expected chainSynced to be true, got false") + } + + h.ChainSyncStatus(false) + if h.chainSynced { + t.Errorf("expected chainSynced to be false, got true") + } + + status, err := h.Status() + require.NoError(t, err) + + assert.False(t, status.Healthy) +} + +func TestHealth_Status(t *testing.T) { + h := &Health{} + blockID := thor.Bytes32{0x01, 0x02, 0x03} + + h.NewBestBlock(blockID) + h.ChainSyncStatus(true) + + status, err := h.Status() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if !status.Healthy { + t.Errorf("expected healthy to be true, got false") + } + + if status.BlockIngestion.BestBlock == nil || *status.BlockIngestion.BestBlock != blockID { + t.Errorf("expected bestBlock to be %v, got %v", blockID, status.BlockIngestion.BestBlock) + } + + if status.BlockIngestion.BestBlockIngestionTimestamp == nil || time.Since(*status.BlockIngestion.BestBlockIngestionTimestamp) > time.Second { + t.Errorf("bestBlockIngestionTimestamp is not recent") + } + + if !status.ChainSync { + t.Errorf("expected chainSync to be true, got false") + } +} From 91ceb58d0328e2e00ae3cce18c144043badb885a Mon Sep 17 00:00:00 2001 From: otherview Date: Wed, 30 Oct 2024 17:58:50 +0000 Subject: [PATCH 05/17] fix admin routing --- api/admin/admin.go | 6 +++--- comm/communicator.go | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/api/admin/admin.go b/api/admin/admin.go index 95673aa45..49ceb5157 100644 --- a/api/admin/admin.go +++ b/api/admin/admin.go @@ -19,10 +19,10 @@ import ( func New(logLevel *slog.LevelVar, health *health.Health) http.HandlerFunc { router := mux.NewRouter() - router.PathPrefix("/admin") + subRouter := router.PathPrefix("/admin").Subrouter() - loglevel.New(logLevel).Mount(router, "/loglevel") - healthAPI.New(health).Mount(router, "/health") + loglevel.New(logLevel).Mount(subRouter, "/loglevel") + healthAPI.New(health).Mount(subRouter, "/health") handler := handlers.CompressHandler(router) diff --git a/comm/communicator.go b/comm/communicator.go index 7fbef2e22..7639c17d3 100644 --- a/comm/communicator.go +++ b/comm/communicator.go @@ -119,7 +119,6 @@ func (c *Communicator) Sync(ctx context.Context, handler HandleBlockStream) { syncCount++ if shouldSynced() { - c.health.ChainSyncStatus(false) delay = syncInterval c.onceSynced.Do(func() { c.health.ChainSyncStatus(true) From 0206c1febc245e6496b293deb20af90af5efd148 Mon Sep 17 00:00:00 2001 From: otherview Date: Mon, 4 Nov 2024 17:01:31 +0000 Subject: [PATCH 06/17] added comments + changed from ChainSync to ChainBootstrapStatus --- comm/communicator.go | 7 ++++--- health/health.go | 26 +++++++++++++------------- health/health_test.go | 18 +++++++++--------- 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/comm/communicator.go b/comm/communicator.go index 7639c17d3..35fbcc650 100644 --- a/comm/communicator.go +++ b/comm/communicator.go @@ -75,7 +75,7 @@ func (c *Communicator) Sync(ctx context.Context, handler HandleBlockStream) { delay := initSyncInterval syncCount := 0 - shouldSynced := func() bool { + isSynced := func() bool { bestBlockTime := c.repo.BestBlockSummary().Header.Timestamp() now := uint64(time.Now().Unix()) if bestBlockTime+thor.BlockInterval >= now { @@ -118,10 +118,11 @@ func (c *Communicator) Sync(ctx context.Context, handler HandleBlockStream) { } syncCount++ - if shouldSynced() { + if isSynced() { delay = syncInterval c.onceSynced.Do(func() { - c.health.ChainSyncStatus(true) + // once off - after a bootstrap the syncedCh trigger the peers.syncTxs + c.health.BootstrapStatus(true) close(c.syncedCh) }) } diff --git a/health/health.go b/health/health.go index edfec270d..dbd9aefbe 100644 --- a/health/health.go +++ b/health/health.go @@ -18,16 +18,16 @@ type BlockIngestion struct { } type Status struct { - Healthy bool `json:"healthy"` - BlockIngestion *BlockIngestion `json:"blockIngestion"` - ChainSync bool `json:"chainSync"` + Healthy bool `json:"healthy"` + BlockIngestion *BlockIngestion `json:"blockIngestion"` + ChainBootstrapped bool `json:"chainBootstrapped"` } type Health struct { - lock sync.RWMutex - newBestBlock time.Time - bestBlockID *thor.Bytes32 - chainSynced bool + lock sync.RWMutex + newBestBlock time.Time + bestBlockID *thor.Bytes32 + bootstrapStatus bool } func (h *Health) NewBestBlock(ID thor.Bytes32) { @@ -48,18 +48,18 @@ func (h *Health) Status() (*Status, error) { } healthy := time.Since(h.newBestBlock) <= 10*time.Second && // less than 10 secs have passed since a new block was received - h.chainSynced + h.bootstrapStatus return &Status{ - Healthy: healthy, - BlockIngestion: blockIngest, - ChainSync: h.chainSynced, + Healthy: healthy, + BlockIngestion: blockIngest, + ChainBootstrapped: h.bootstrapStatus, }, nil } -func (h *Health) ChainSyncStatus(syncStatus bool) { +func (h *Health) BootstrapStatus(bootstrapStatus bool) { h.lock.Lock() defer h.lock.Unlock() - h.chainSynced = syncStatus + h.bootstrapStatus = bootstrapStatus } diff --git a/health/health_test.go b/health/health_test.go index 30fd266c7..d7eec9c1c 100644 --- a/health/health_test.go +++ b/health/health_test.go @@ -28,7 +28,7 @@ func TestHealth_NewBestBlock(t *testing.T) { t.Errorf("newBestBlock timestamp is not recent") } - h.ChainSyncStatus(true) + h.BootstrapStatus(true) status, err := h.Status() require.NoError(t, err) @@ -39,14 +39,14 @@ func TestHealth_NewBestBlock(t *testing.T) { func TestHealth_ChainSyncStatus(t *testing.T) { h := &Health{} - h.ChainSyncStatus(true) - if !h.chainSynced { - t.Errorf("expected chainSynced to be true, got false") + h.BootstrapStatus(true) + if !h.bootstrapStatus { + t.Errorf("expected bootstrapStatus to be true, got false") } - h.ChainSyncStatus(false) - if h.chainSynced { - t.Errorf("expected chainSynced to be false, got true") + h.BootstrapStatus(false) + if h.bootstrapStatus { + t.Errorf("expected bootstrapStatus to be false, got true") } status, err := h.Status() @@ -60,7 +60,7 @@ func TestHealth_Status(t *testing.T) { blockID := thor.Bytes32{0x01, 0x02, 0x03} h.NewBestBlock(blockID) - h.ChainSyncStatus(true) + h.BootstrapStatus(true) status, err := h.Status() if err != nil { @@ -79,7 +79,7 @@ func TestHealth_Status(t *testing.T) { t.Errorf("bestBlockIngestionTimestamp is not recent") } - if !status.ChainSync { + if !status.ChainBootstrapped { t.Errorf("expected chainSync to be true, got false") } } From 17c2ce98f78d9f6a2699c8cbedf08d2fd942c020 Mon Sep 17 00:00:00 2001 From: otherview Date: Mon, 4 Nov 2024 18:14:38 +0000 Subject: [PATCH 07/17] Adding healthcheck for solo mode --- cmd/thor/main.go | 25 ++++++++++++++++--------- health/health.go | 39 +++++++++++++++++++++++++++++---------- health/health_test.go | 2 +- 3 files changed, 46 insertions(+), 20 deletions(-) diff --git a/cmd/thor/main.go b/cmd/thor/main.go index ba34f5543..1271cfdaa 100644 --- a/cmd/thor/main.go +++ b/cmd/thor/main.go @@ -9,6 +9,7 @@ import ( "encoding/json" "fmt" "io" + "math" "os" "path/filepath" "strings" @@ -167,7 +168,7 @@ func defaultAction(ctx *cli.Context) error { return errors.Wrap(err, "parse verbosity flag") } logLevel := initLogger(lvl, ctx.Bool(jsonLogsFlag.Name)) - healthStatus := &health.Health{} + healthStatus := health.New(time.Duration(thor.BlockInterval)) // enable metrics as soon as possible metricsURL := "" @@ -313,7 +314,18 @@ func soloAction(ctx *cli.Context) error { } logLevel := initLogger(lvl, ctx.Bool(jsonLogsFlag.Name)) - healthStatus := &health.Health{} + + onDemandBlockProduction := ctx.Bool(onDemandFlag.Name) + blockProductionInterval := ctx.Uint64(blockInterval.Name) + if blockProductionInterval == 0 { + return errors.New("block-interval cannot be zero") + } + + blockProductionHealthCheck := time.Duration(blockProductionInterval) * time.Second + if onDemandBlockProduction { + blockProductionHealthCheck = math.MaxUint16 * time.Second + } + healthStatus := health.NewSolo(blockProductionHealthCheck) // enable metrics as soon as possible metricsURL := "" @@ -436,11 +448,6 @@ func soloAction(ctx *cli.Context) error { srvCloser() }() - blockInterval := ctx.Uint64(blockInterval.Name) - if blockInterval == 0 { - return errors.New("block-interval cannot be zero") - } - printStartupMessage2(gene, apiURL, "", metricsURL, adminURL) optimizer := optimizer.New(mainDB, repo, !ctx.Bool(disablePrunerFlag.Name)) @@ -452,9 +459,9 @@ func soloAction(ctx *cli.Context) error { healthStatus, txPool, ctx.Uint64(gasLimitFlag.Name), - ctx.Bool(onDemandFlag.Name), + onDemandBlockProduction, skipLogs, - blockInterval, + blockProductionInterval, forkConfig).Run(exitSignal) } diff --git a/health/health.go b/health/health.go index dbd9aefbe..44337a3fd 100644 --- a/health/health.go +++ b/health/health.go @@ -6,6 +6,7 @@ package health import ( + "fmt" "sync" "time" @@ -24,18 +25,26 @@ type Status struct { } type Health struct { - lock sync.RWMutex - newBestBlock time.Time - bestBlockID *thor.Bytes32 - bootstrapStatus bool + lock sync.RWMutex + newBestBlock time.Time + bestBlockID *thor.Bytes32 + bootstrapStatus bool + timeBetweenBlocks time.Duration } -func (h *Health) NewBestBlock(ID thor.Bytes32) { - h.lock.Lock() - defer h.lock.Unlock() +const delayBuffer = 5 * time.Second - h.newBestBlock = time.Now() - h.bestBlockID = &ID +func NewSolo(timeBetweenBlocks time.Duration) *Health { + return &Health{ + timeBetweenBlocks: timeBetweenBlocks + delayBuffer, + // there is no bootstrap in solo mode + bootstrapStatus: true, + } +} +func New(timeBetweenBlocks time.Duration) *Health { + return &Health{ + timeBetweenBlocks: timeBetweenBlocks + delayBuffer, + } } func (h *Health) Status() (*Status, error) { @@ -47,9 +56,11 @@ func (h *Health) Status() (*Status, error) { BestBlockIngestionTimestamp: &h.newBestBlock, } - healthy := time.Since(h.newBestBlock) <= 10*time.Second && // less than 10 secs have passed since a new block was received + healthy := time.Since(h.newBestBlock) <= h.timeBetweenBlocks && // less than 10 secs have passed since a new block was received h.bootstrapStatus + fmt.Println("time between blocks", time.Since(h.newBestBlock).Seconds(), "of max", h.timeBetweenBlocks.Seconds()) + return &Status{ Healthy: healthy, BlockIngestion: blockIngest, @@ -57,6 +68,14 @@ func (h *Health) Status() (*Status, error) { }, nil } +func (h *Health) NewBestBlock(ID thor.Bytes32) { + h.lock.Lock() + defer h.lock.Unlock() + + h.newBestBlock = time.Now() + h.bestBlockID = &ID +} + func (h *Health) BootstrapStatus(bootstrapStatus bool) { h.lock.Lock() defer h.lock.Unlock() diff --git a/health/health_test.go b/health/health_test.go index d7eec9c1c..11f76dcec 100644 --- a/health/health_test.go +++ b/health/health_test.go @@ -15,7 +15,7 @@ import ( ) func TestHealth_NewBestBlock(t *testing.T) { - h := &Health{} + h := New(time.Duration(thor.BlockInterval) * time.Second) blockID := thor.Bytes32{0x01, 0x02, 0x03} h.NewBestBlock(blockID) From 1797c228120439d7b84a44c3581cff2bc3ee47dd Mon Sep 17 00:00:00 2001 From: otherview Date: Tue, 5 Nov 2024 15:28:19 +0000 Subject: [PATCH 08/17] adding solo + tests --- health/health.go | 11 ++++------- health/health_test.go | 19 +++++++++++++++---- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/health/health.go b/health/health.go index 44337a3fd..ffb3ad521 100644 --- a/health/health.go +++ b/health/health.go @@ -6,7 +6,6 @@ package health import ( - "fmt" "sync" "time" @@ -14,8 +13,8 @@ import ( ) type BlockIngestion struct { - BestBlock *thor.Bytes32 `json:"bestBlock"` - BestBlockIngestionTimestamp *time.Time `json:"bestBlockIngestionTimestamp"` + ID *thor.Bytes32 `json:"id"` + Timestamp *time.Time `json:"timestamp"` } type Status struct { @@ -52,15 +51,13 @@ func (h *Health) Status() (*Status, error) { defer h.lock.RUnlock() blockIngest := &BlockIngestion{ - BestBlock: h.bestBlockID, - BestBlockIngestionTimestamp: &h.newBestBlock, + ID: h.bestBlockID, + Timestamp: &h.newBestBlock, } healthy := time.Since(h.newBestBlock) <= h.timeBetweenBlocks && // less than 10 secs have passed since a new block was received h.bootstrapStatus - fmt.Println("time between blocks", time.Since(h.newBestBlock).Seconds(), "of max", h.timeBetweenBlocks.Seconds()) - return &Status{ Healthy: healthy, BlockIngestion: blockIngest, diff --git a/health/health_test.go b/health/health_test.go index 11f76dcec..3fc7f147a 100644 --- a/health/health_test.go +++ b/health/health_test.go @@ -56,7 +56,7 @@ func TestHealth_ChainSyncStatus(t *testing.T) { } func TestHealth_Status(t *testing.T) { - h := &Health{} + h := New(time.Second) blockID := thor.Bytes32{0x01, 0x02, 0x03} h.NewBestBlock(blockID) @@ -71,11 +71,11 @@ func TestHealth_Status(t *testing.T) { t.Errorf("expected healthy to be true, got false") } - if status.BlockIngestion.BestBlock == nil || *status.BlockIngestion.BestBlock != blockID { - t.Errorf("expected bestBlock to be %v, got %v", blockID, status.BlockIngestion.BestBlock) + if status.BlockIngestion.ID == nil || *status.BlockIngestion.ID != blockID { + t.Errorf("expected bestBlock to be %v, got %v", blockID, status.BlockIngestion.ID) } - if status.BlockIngestion.BestBlockIngestionTimestamp == nil || time.Since(*status.BlockIngestion.BestBlockIngestionTimestamp) > time.Second { + if status.BlockIngestion.Timestamp == nil || time.Since(*status.BlockIngestion.Timestamp) > time.Second { t.Errorf("bestBlockIngestionTimestamp is not recent") } @@ -83,3 +83,14 @@ func TestHealth_Status(t *testing.T) { t.Errorf("expected chainSync to be true, got false") } } + +func TestHealth_Solo(t *testing.T) { + h := NewSolo(time.Duration(thor.BlockInterval) * time.Second) + h.NewBestBlock(thor.Bytes32{0x01, 0x02, 0x03}) + + status, err := h.Status() + require.NoError(t, err) + + require.Equal(t, status.ChainBootstrapped, true) + require.Equal(t, status.Healthy, true) +} From 15a18fc6f1a80a0553409e2c4bea475686590899 Mon Sep 17 00:00:00 2001 From: otherview Date: Wed, 6 Nov 2024 16:49:30 +0000 Subject: [PATCH 09/17] fix log_level handler funcs --- api/admin/loglevel/log_level.go | 64 ++++++++++++++++----------------- 1 file changed, 30 insertions(+), 34 deletions(-) diff --git a/api/admin/loglevel/log_level.go b/api/admin/loglevel/log_level.go index 27b9a04fa..d3c339ce2 100644 --- a/api/admin/loglevel/log_level.go +++ b/api/admin/loglevel/log_level.go @@ -30,49 +30,45 @@ func (l *LogLevel) Mount(root *mux.Router, pathPrefix string) { sub.Path(""). Methods(http.MethodGet). Name("get-log-level"). - HandlerFunc(utils.WrapHandlerFunc(getLogLevelHandler(l.logLevel))) + HandlerFunc(utils.WrapHandlerFunc(l.getLogLevelHandler)) sub.Path(""). Methods(http.MethodPost). Name("post-log-level"). - HandlerFunc(utils.WrapHandlerFunc(postLogLevelHandler(l.logLevel))) + HandlerFunc(utils.WrapHandlerFunc(l.postLogLevelHandler)) } -func getLogLevelHandler(logLevel *slog.LevelVar) utils.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) error { - return utils.WriteJSON(w, Response{ - CurrentLevel: logLevel.Level().String(), - }) - } +func (l *LogLevel) getLogLevelHandler(w http.ResponseWriter, _ *http.Request) error { + return utils.WriteJSON(w, Response{ + CurrentLevel: l.logLevel.Level().String(), + }) } -func postLogLevelHandler(logLevel *slog.LevelVar) utils.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) error { - var req Request - - if err := utils.ParseJSON(r.Body, &req); err != nil { - return utils.BadRequest(errors.WithMessage(err, "Invalid request body")) - } +func (l *LogLevel) postLogLevelHandler(w http.ResponseWriter, r *http.Request) error { + var req Request - switch req.Level { - case "debug": - logLevel.Set(log.LevelDebug) - case "info": - logLevel.Set(log.LevelInfo) - case "warn": - logLevel.Set(log.LevelWarn) - case "error": - logLevel.Set(log.LevelError) - case "trace": - logLevel.Set(log.LevelTrace) - case "crit": - logLevel.Set(log.LevelCrit) - default: - return utils.BadRequest(errors.New("Invalid verbosity level")) - } + if err := utils.ParseJSON(r.Body, &req); err != nil { + return utils.BadRequest(errors.WithMessage(err, "Invalid request body")) + } - return utils.WriteJSON(w, Response{ - CurrentLevel: logLevel.Level().String(), - }) + switch req.Level { + case "debug": + l.logLevel.Set(log.LevelDebug) + case "info": + l.logLevel.Set(log.LevelInfo) + case "warn": + l.logLevel.Set(log.LevelWarn) + case "error": + l.logLevel.Set(log.LevelError) + case "trace": + l.logLevel.Set(log.LevelTrace) + case "crit": + l.logLevel.Set(log.LevelCrit) + default: + return utils.BadRequest(errors.New("Invalid verbosity level")) } + + return utils.WriteJSON(w, Response{ + CurrentLevel: l.logLevel.Level().String(), + }) } From 6b22089bb4e2f64a6a364963abd074941374be6d Mon Sep 17 00:00:00 2001 From: otherview Date: Fri, 15 Nov 2024 15:46:58 +0000 Subject: [PATCH 10/17] refactor health package + add p2p count --- api/admin/health/health_test.go | 13 ++- api/node/node_test.go | 2 - cmd/thor/main.go | 26 +++--- cmd/thor/node/node.go | 6 -- cmd/thor/solo/solo.go | 6 -- cmd/thor/solo/solo_test.go | 3 +- cmd/thor/utils.go | 5 +- comm/communicator.go | 6 +- health/health.go | 77 +++++++++++------- health/health_test.go | 136 +++++++++++++++++--------------- p2psrv/server.go | 4 + p2psrv/server_test.go | 1 + thorclient/api_test.go | 2 - 13 files changed, 152 insertions(+), 135 deletions(-) diff --git a/api/admin/health/health_test.go b/api/admin/health/health_test.go index 193fe688b..d2b584e05 100644 --- a/api/admin/health/health_test.go +++ b/api/admin/health/health_test.go @@ -7,15 +7,19 @@ package health import ( "encoding/json" + "github.com/vechain/thor/v2/txpool" "io" "net/http" "net/http/httptest" "testing" + "time" "github.com/gorilla/mux" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/vechain/thor/v2/comm" "github.com/vechain/thor/v2/health" + "github.com/vechain/thor/v2/test/testchain" ) var ts *httptest.Server @@ -30,9 +34,14 @@ func TestHealth(t *testing.T) { assert.Equal(t, http.StatusServiceUnavailable, statusCode) } -func initAPIServer(_ *testing.T) { +func initAPIServer(t *testing.T) { + thorChain, err := testchain.NewIntegrationTestChain() + require.NoError(t, err) + router := mux.NewRouter() - New(&health.Health{}).Mount(router, "/health") + New( + health.New(thorChain.Repo(), comm.New(thorChain.Repo(), txpool.New(thorChain.Repo(), nil, txpool.Options{})), time.Second), + ).Mount(router, "/health") ts = httptest.NewServer(router) } diff --git a/api/node/node_test.go b/api/node/node_test.go index 28cdcacce..873ad29ad 100644 --- a/api/node/node_test.go +++ b/api/node/node_test.go @@ -14,7 +14,6 @@ import ( "github.com/stretchr/testify/require" "github.com/vechain/thor/v2/api/node" "github.com/vechain/thor/v2/comm" - "github.com/vechain/thor/v2/health" "github.com/vechain/thor/v2/test/testchain" "github.com/vechain/thor/v2/thorclient" "github.com/vechain/thor/v2/txpool" @@ -42,7 +41,6 @@ func initCommServer(t *testing.T) { LimitPerAccount: 16, MaxLifetime: 10 * time.Minute, }), - &health.Health{}, ) router := mux.NewRouter() diff --git a/cmd/thor/main.go b/cmd/thor/main.go index 1271cfdaa..05e3472a7 100644 --- a/cmd/thor/main.go +++ b/cmd/thor/main.go @@ -168,7 +168,6 @@ func defaultAction(ctx *cli.Context) error { return errors.Wrap(err, "parse verbosity flag") } logLevel := initLogger(lvl, ctx.Bool(jsonLogsFlag.Name)) - healthStatus := health.New(time.Duration(thor.BlockInterval)) // enable metrics as soon as possible metricsURL := "" @@ -182,16 +181,6 @@ func defaultAction(ctx *cli.Context) error { defer func() { log.Info("stopping metrics server..."); closeFunc() }() } - adminURL := "" - if ctx.Bool(enableAdminFlag.Name) { - url, closeFunc, err := api.StartAdminServer(ctx.String(adminAddrFlag.Name), logLevel, healthStatus) - if err != nil { - return fmt.Errorf("unable to start admin server - %w", err) - } - adminURL = url - defer func() { log.Info("stopping admin server..."); closeFunc() }() - } - gene, forkConfig, err := selectGenesis(ctx) if err != nil { return err @@ -240,11 +229,22 @@ func defaultAction(ctx *cli.Context) error { txPool := txpool.New(repo, state.NewStater(mainDB), txpoolOpt) defer func() { log.Info("closing tx pool..."); txPool.Close() }() - p2pCommunicator, err := newP2PCommunicator(ctx, repo, txPool, instanceDir, healthStatus) + p2pCommunicator, err := newP2PCommunicator(ctx, repo, txPool, instanceDir) if err != nil { return err } + healthStatus := health.New(repo, p2pCommunicator.Communicator(), time.Duration(thor.BlockInterval)) + adminURL := "" + if ctx.Bool(enableAdminFlag.Name) { + url, closeFunc, err := api.StartAdminServer(ctx.String(adminAddrFlag.Name), logLevel, healthStatus) + if err != nil { + return fmt.Errorf("unable to start admin server - %w", err) + } + adminURL = url + defer func() { log.Info("stopping admin server..."); closeFunc() }() + } + bftEngine, err := bft.NewEngine(repo, mainDB, forkConfig, master.Address()) if err != nil { return errors.Wrap(err, "init bft engine") @@ -300,7 +300,6 @@ func defaultAction(ctx *cli.Context) error { ctx.Uint64(targetGasLimitFlag.Name), skipLogs, forkConfig, - healthStatus, ).Run(exitSignal) } @@ -456,7 +455,6 @@ func soloAction(ctx *cli.Context) error { return solo.New(repo, state.NewStater(mainDB), logDB, - healthStatus, txPool, ctx.Uint64(gasLimitFlag.Name), onDemandBlockProduction, diff --git a/cmd/thor/node/node.go b/cmd/thor/node/node.go index a3a37800a..d103f227a 100644 --- a/cmd/thor/node/node.go +++ b/cmd/thor/node/node.go @@ -26,7 +26,6 @@ import ( "github.com/vechain/thor/v2/co" "github.com/vechain/thor/v2/comm" "github.com/vechain/thor/v2/consensus" - "github.com/vechain/thor/v2/health" "github.com/vechain/thor/v2/log" "github.com/vechain/thor/v2/logdb" "github.com/vechain/thor/v2/packer" @@ -65,8 +64,6 @@ type Node struct { maxBlockNum uint32 processLock sync.Mutex logWorker *worker - - health *health.Health } func New( @@ -81,7 +78,6 @@ func New( targetGasLimit uint64, skipLogs bool, forkConfig thor.ForkConfig, - health *health.Health, ) *Node { return &Node{ packer: packer.New(repo, stater, master.Address(), master.Beneficiary, forkConfig), @@ -96,7 +92,6 @@ func New( targetGasLimit: targetGasLimit, skipLogs: skipLogs, forkConfig: forkConfig, - health: health, } } @@ -392,7 +387,6 @@ func (n *Node) processBlock(newBlock *block.Block, stats *blockStats) (bool, err return err } n.processFork(newBlock, oldBest.Header.ID()) - n.health.NewBestBlock(newBlock.Header().ID()) } commitElapsed := mclock.Now() - startTime - execElapsed diff --git a/cmd/thor/solo/solo.go b/cmd/thor/solo/solo.go index 81cbe46ae..638aa74ff 100644 --- a/cmd/thor/solo/solo.go +++ b/cmd/thor/solo/solo.go @@ -22,7 +22,6 @@ import ( "github.com/vechain/thor/v2/cmd/thor/bandwidth" "github.com/vechain/thor/v2/co" "github.com/vechain/thor/v2/genesis" - "github.com/vechain/thor/v2/health" "github.com/vechain/thor/v2/log" "github.com/vechain/thor/v2/logdb" "github.com/vechain/thor/v2/packer" @@ -44,7 +43,6 @@ type Solo struct { txPool *txpool.TxPool packer *packer.Packer logDB *logdb.LogDB - health *health.Health gasLimit uint64 bandwidth bandwidth.Bandwidth blockInterval uint64 @@ -57,7 +55,6 @@ func New( repo *chain.Repository, stater *state.Stater, logDB *logdb.LogDB, - health *health.Health, txPool *txpool.TxPool, gasLimit uint64, onDemand bool, @@ -76,7 +73,6 @@ func New( &genesis.DevAccounts()[0].Address, forkConfig), logDB: logDB, - health: health, gasLimit: gasLimit, blockInterval: blockInterval, skipLogs: skipLogs, @@ -214,8 +210,6 @@ func (s *Solo) packing(pendingTxs tx.Transactions, onDemand bool) error { ) logger.Debug(b.String()) - s.health.NewBestBlock(b.Header().ID()) - return nil } diff --git a/cmd/thor/solo/solo_test.go b/cmd/thor/solo/solo_test.go index ac30f4980..a4df3f35d 100644 --- a/cmd/thor/solo/solo_test.go +++ b/cmd/thor/solo/solo_test.go @@ -14,7 +14,6 @@ import ( "github.com/vechain/thor/v2/builtin" "github.com/vechain/thor/v2/chain" "github.com/vechain/thor/v2/genesis" - "github.com/vechain/thor/v2/health" "github.com/vechain/thor/v2/logdb" "github.com/vechain/thor/v2/muxdb" "github.com/vechain/thor/v2/state" @@ -31,7 +30,7 @@ func newSolo() *Solo { repo, _ := chain.NewRepository(db, b) mempool := txpool.New(repo, stater, txpool.Options{Limit: 10000, LimitPerAccount: 16, MaxLifetime: 10 * time.Minute}) - return New(repo, stater, logDb, &health.Health{}, mempool, 0, true, false, thor.BlockInterval, thor.ForkConfig{}) + return New(repo, stater, logDb, mempool, 0, true, false, thor.BlockInterval, thor.ForkConfig{}) } func TestInitSolo(t *testing.T) { diff --git a/cmd/thor/utils.go b/cmd/thor/utils.go index c20c7e55d..5c6799354 100644 --- a/cmd/thor/utils.go +++ b/cmd/thor/utils.go @@ -44,7 +44,6 @@ import ( "github.com/vechain/thor/v2/co" "github.com/vechain/thor/v2/comm" "github.com/vechain/thor/v2/genesis" - "github.com/vechain/thor/v2/health" "github.com/vechain/thor/v2/log" "github.com/vechain/thor/v2/logdb" "github.com/vechain/thor/v2/muxdb" @@ -490,7 +489,7 @@ func loadNodeMaster(ctx *cli.Context) (*node.Master, error) { return master, nil } -func newP2PCommunicator(ctx *cli.Context, repo *chain.Repository, txPool *txpool.TxPool, instanceDir string, health *health.Health) (*p2p.P2P, error) { +func newP2PCommunicator(ctx *cli.Context, repo *chain.Repository, txPool *txpool.TxPool, instanceDir string) (*p2p.P2P, error) { // known peers will be loaded/stored from/in this file peersCachePath := filepath.Join(instanceDir, "peers.cache") @@ -530,7 +529,7 @@ func newP2PCommunicator(ctx *cli.Context, repo *chain.Repository, txPool *txpool } return p2p.New( - comm.New(repo, txPool, health), + comm.New(repo, txPool), key, instanceDir, userNAT, diff --git a/comm/communicator.go b/comm/communicator.go index 35fbcc650..48419779a 100644 --- a/comm/communicator.go +++ b/comm/communicator.go @@ -20,7 +20,6 @@ import ( "github.com/vechain/thor/v2/chain" "github.com/vechain/thor/v2/co" "github.com/vechain/thor/v2/comm/proto" - "github.com/vechain/thor/v2/health" "github.com/vechain/thor/v2/log" "github.com/vechain/thor/v2/thor" "github.com/vechain/thor/v2/tx" @@ -40,13 +39,12 @@ type Communicator struct { newBlockFeed event.Feed announcementCh chan *announcement feedScope event.SubscriptionScope - health *health.Health goes co.Goes onceSynced sync.Once } // New create a new Communicator instance. -func New(repo *chain.Repository, txPool *txpool.TxPool, health *health.Health) *Communicator { +func New(repo *chain.Repository, txPool *txpool.TxPool) *Communicator { ctx, cancel := context.WithCancel(context.Background()) return &Communicator{ repo: repo, @@ -54,7 +52,6 @@ func New(repo *chain.Repository, txPool *txpool.TxPool, health *health.Health) * ctx: ctx, cancel: cancel, peerSet: newPeerSet(), - health: health, syncedCh: make(chan struct{}), announcementCh: make(chan *announcement), } @@ -122,7 +119,6 @@ func (c *Communicator) Sync(ctx context.Context, handler HandleBlockStream) { delay = syncInterval c.onceSynced.Do(func() { // once off - after a bootstrap the syncedCh trigger the peers.syncTxs - c.health.BootstrapStatus(true) close(c.syncedCh) }) } diff --git a/health/health.go b/health/health.go index ffb3ad521..37f19b540 100644 --- a/health/health.go +++ b/health/health.go @@ -9,6 +9,8 @@ import ( "sync" "time" + "github.com/vechain/thor/v2/chain" + "github.com/vechain/thor/v2/comm" "github.com/vechain/thor/v2/thor" ) @@ -21,14 +23,14 @@ type Status struct { Healthy bool `json:"healthy"` BlockIngestion *BlockIngestion `json:"blockIngestion"` ChainBootstrapped bool `json:"chainBootstrapped"` + PeerCount int `json:"peerCount"` } type Health struct { lock sync.RWMutex - newBestBlock time.Time - bestBlockID *thor.Bytes32 - bootstrapStatus bool timeBetweenBlocks time.Duration + repo *chain.Repository + p2p *comm.Communicator } const delayBuffer = 5 * time.Second @@ -37,45 +39,62 @@ func NewSolo(timeBetweenBlocks time.Duration) *Health { return &Health{ timeBetweenBlocks: timeBetweenBlocks + delayBuffer, // there is no bootstrap in solo mode - bootstrapStatus: true, } } -func New(timeBetweenBlocks time.Duration) *Health { + +func New(repo *chain.Repository, p2p *comm.Communicator, timeBetweenBlocks time.Duration) *Health { return &Health{ + repo: repo, timeBetweenBlocks: timeBetweenBlocks + delayBuffer, + p2p: p2p, } } +// isNetworkProgressing checks if the network is producing new blocks within the allowed interval. +func (h *Health) isNetworkProgressing(now time.Time, bestBlockTimestamp time.Time) bool { + return now.Sub(bestBlockTimestamp) <= h.timeBetweenBlocks +} + +// hasNodeBootstrapped checks if the node has bootstrapped by comparing the block interval. +func (h *Health) hasNodeBootstrapped(now time.Time, bestBlockTimestamp time.Time) bool { + blockInterval := time.Duration(thor.BlockInterval) * time.Second + return bestBlockTimestamp.Add(blockInterval).After(now) +} + +// isNodeConnectedP2P checks if the node is connected to peers +func (h *Health) isNodeConnectedP2P(peerCount int) bool { + return peerCount > 1 +} + func (h *Health) Status() (*Status, error) { h.lock.RLock() defer h.lock.RUnlock() - blockIngest := &BlockIngestion{ - ID: h.bestBlockID, - Timestamp: &h.newBestBlock, - } - - healthy := time.Since(h.newBestBlock) <= h.timeBetweenBlocks && // less than 10 secs have passed since a new block was received - h.bootstrapStatus - - return &Status{ - Healthy: healthy, - BlockIngestion: blockIngest, - ChainBootstrapped: h.bootstrapStatus, - }, nil -} + // Fetch the best block details + bestBlock := h.repo.BestBlockSummary() + bestBlockID := bestBlock.Header.ID() + bestBlockTimestamp := time.Unix(int64(bestBlock.Header.Timestamp()), 0) -func (h *Health) NewBestBlock(ID thor.Bytes32) { - h.lock.Lock() - defer h.lock.Unlock() + // Fetch the current connected peers + connectedPeerCount := h.p2p.PeerCount() + now := time.Now() - h.newBestBlock = time.Now() - h.bestBlockID = &ID -} + // Perform the checks + networkProgressing := h.isNetworkProgressing(now, bestBlockTimestamp) + nodeBootstrapped := h.hasNodeBootstrapped(now, bestBlockTimestamp) + nodeConnected := h.isNodeConnectedP2P(connectedPeerCount) -func (h *Health) BootstrapStatus(bootstrapStatus bool) { - h.lock.Lock() - defer h.lock.Unlock() + // Calculate overall health status + healthy := networkProgressing && nodeBootstrapped && nodeConnected - h.bootstrapStatus = bootstrapStatus + // Return the current status + return &Status{ + Healthy: healthy, + BlockIngestion: &BlockIngestion{ + ID: &bestBlockID, + Timestamp: &bestBlockTimestamp, + }, + ChainBootstrapped: nodeBootstrapped, + PeerCount: connectedPeerCount, + }, nil } diff --git a/health/health_test.go b/health/health_test.go index 3fc7f147a..34fc145bf 100644 --- a/health/health_test.go +++ b/health/health_test.go @@ -10,87 +10,95 @@ import ( "time" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" "github.com/vechain/thor/v2/thor" ) -func TestHealth_NewBestBlock(t *testing.T) { - h := New(time.Duration(thor.BlockInterval) * time.Second) - blockID := thor.Bytes32{0x01, 0x02, 0x03} - - h.NewBestBlock(blockID) - - if h.bestBlockID == nil || *h.bestBlockID != blockID { - t.Errorf("expected bestBlockID to be %v, got %v", blockID, h.bestBlockID) +func TestHealth_isNetworkProgressing(t *testing.T) { + h := &Health{ + timeBetweenBlocks: 10 * time.Second, } - if time.Since(h.newBestBlock) > time.Second { - t.Errorf("newBestBlock timestamp is not recent") + now := time.Now() + + tests := []struct { + name string + bestBlockTimestamp time.Time + expectedProgressing bool + }{ + { + name: "Progressing - block within timeBetweenBlocks", + bestBlockTimestamp: now.Add(-5 * time.Second), + expectedProgressing: true, + }, + { + name: "Not Progressing - block outside timeBetweenBlocks", + bestBlockTimestamp: now.Add(-15 * time.Second), + expectedProgressing: false, + }, } - h.BootstrapStatus(true) - - status, err := h.Status() - require.NoError(t, err) - - assert.True(t, status.Healthy) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + isProgressing := h.isNetworkProgressing(now, tt.bestBlockTimestamp) + assert.Equal(t, tt.expectedProgressing, isProgressing, "isNetworkProgressing result mismatch") + }) + } } -func TestHealth_ChainSyncStatus(t *testing.T) { +func TestHealth_hasNodeBootstrapped(t *testing.T) { h := &Health{} - - h.BootstrapStatus(true) - if !h.bootstrapStatus { - t.Errorf("expected bootstrapStatus to be true, got false") + blockInterval := time.Duration(thor.BlockInterval) * time.Second + now := time.Now() + + tests := []struct { + name string + bestBlockTimestamp time.Time + expectedBootstrap bool + }{ + { + name: "Bootstrapped - block timestamp within interval", + bestBlockTimestamp: now.Add(-blockInterval + 1*time.Second), + expectedBootstrap: true, + }, + { + name: "Not Bootstrapped - block timestamp outside interval", + bestBlockTimestamp: now.Add(-blockInterval - 1*time.Second), + expectedBootstrap: false, + }, } - h.BootstrapStatus(false) - if h.bootstrapStatus { - t.Errorf("expected bootstrapStatus to be false, got true") + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + isBootstrapped := h.hasNodeBootstrapped(now, tt.bestBlockTimestamp) + assert.Equal(t, tt.expectedBootstrap, isBootstrapped, "hasNodeBootstrapped result mismatch") + }) } - - status, err := h.Status() - require.NoError(t, err) - - assert.False(t, status.Healthy) } -func TestHealth_Status(t *testing.T) { - h := New(time.Second) - blockID := thor.Bytes32{0x01, 0x02, 0x03} - - h.NewBestBlock(blockID) - h.BootstrapStatus(true) - - status, err := h.Status() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - if !status.Healthy { - t.Errorf("expected healthy to be true, got false") - } - - if status.BlockIngestion.ID == nil || *status.BlockIngestion.ID != blockID { - t.Errorf("expected bestBlock to be %v, got %v", blockID, status.BlockIngestion.ID) - } +func TestHealth_isNodeConnectedP2P(t *testing.T) { + h := &Health{} - if status.BlockIngestion.Timestamp == nil || time.Since(*status.BlockIngestion.Timestamp) > time.Second { - t.Errorf("bestBlockIngestionTimestamp is not recent") + tests := []struct { + name string + peerCount int + expectedConnected bool + }{ + { + name: "Connected - more than one peer", + peerCount: 2, + expectedConnected: true, + }, + { + name: "Not Connected - one or fewer peers", + peerCount: 1, + expectedConnected: false, + }, } - if !status.ChainBootstrapped { - t.Errorf("expected chainSync to be true, got false") + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + isConnected := h.isNodeConnectedP2P(tt.peerCount) + assert.Equal(t, tt.expectedConnected, isConnected, "isNodeConnectedP2P result mismatch") + }) } } - -func TestHealth_Solo(t *testing.T) { - h := NewSolo(time.Duration(thor.BlockInterval) * time.Second) - h.NewBestBlock(thor.Bytes32{0x01, 0x02, 0x03}) - - status, err := h.Status() - require.NoError(t, err) - - require.Equal(t, status.ChainBootstrapped, true) - require.Equal(t, status.Healthy, true) -} diff --git a/p2psrv/server.go b/p2psrv/server.go index 3bdf50ea6..3c821f902 100644 --- a/p2psrv/server.go +++ b/p2psrv/server.go @@ -373,3 +373,7 @@ func (s *Server) fetchBootstrap() { func (s *Server) Options() *Options { return s.opts } + +func (s *Server) PeerCount() int { + return s.srv.PeerCount() +} diff --git a/p2psrv/server_test.go b/p2psrv/server_test.go index 5cdc45d12..f98bc644c 100644 --- a/p2psrv/server_test.go +++ b/p2psrv/server_test.go @@ -32,6 +32,7 @@ func TestNewServer(t *testing.T) { } server := New(opts) + server.KnownNodes() assert.Equal(t, "testNode", server.opts.Name) assert.Equal(t, privateKey, server.opts.PrivateKey) diff --git a/thorclient/api_test.go b/thorclient/api_test.go index 55b5376da..e6a0e43be 100644 --- a/thorclient/api_test.go +++ b/thorclient/api_test.go @@ -24,7 +24,6 @@ import ( "github.com/vechain/thor/v2/api/node" "github.com/vechain/thor/v2/comm" "github.com/vechain/thor/v2/genesis" - "github.com/vechain/thor/v2/health" "github.com/vechain/thor/v2/logdb" "github.com/vechain/thor/v2/test/datagen" "github.com/vechain/thor/v2/test/testchain" @@ -70,7 +69,6 @@ func initAPIServer(t *testing.T) (*testchain.Chain, *httptest.Server) { LimitPerAccount: 16, MaxLifetime: 10 * time.Minute, }), - &health.Health{}, ) node.New(communicator).Mount(router, "/node") From 1fdc48b8b3e867f38af0d5905fd8cd88c0f22310 Mon Sep 17 00:00:00 2001 From: otherview Date: Fri, 15 Nov 2024 15:57:18 +0000 Subject: [PATCH 11/17] remove solo methods --- api/admin/health/health_test.go | 2 +- cmd/thor/main.go | 28 ++++++++++++---------------- health/health.go | 15 +++++++-------- p2psrv/server.go | 4 ---- p2psrv/server_test.go | 1 - 5 files changed, 20 insertions(+), 30 deletions(-) diff --git a/api/admin/health/health_test.go b/api/admin/health/health_test.go index d2b584e05..d91efd56a 100644 --- a/api/admin/health/health_test.go +++ b/api/admin/health/health_test.go @@ -7,7 +7,6 @@ package health import ( "encoding/json" - "github.com/vechain/thor/v2/txpool" "io" "net/http" "net/http/httptest" @@ -20,6 +19,7 @@ import ( "github.com/vechain/thor/v2/comm" "github.com/vechain/thor/v2/health" "github.com/vechain/thor/v2/test/testchain" + "github.com/vechain/thor/v2/txpool" ) var ts *httptest.Server diff --git a/cmd/thor/main.go b/cmd/thor/main.go index 05e3472a7..467a7f681 100644 --- a/cmd/thor/main.go +++ b/cmd/thor/main.go @@ -9,7 +9,6 @@ import ( "encoding/json" "fmt" "io" - "math" "os" "path/filepath" "strings" @@ -319,12 +318,7 @@ func soloAction(ctx *cli.Context) error { if blockProductionInterval == 0 { return errors.New("block-interval cannot be zero") } - blockProductionHealthCheck := time.Duration(blockProductionInterval) * time.Second - if onDemandBlockProduction { - blockProductionHealthCheck = math.MaxUint16 * time.Second - } - healthStatus := health.NewSolo(blockProductionHealthCheck) // enable metrics as soon as possible metricsURL := "" @@ -338,16 +332,6 @@ func soloAction(ctx *cli.Context) error { defer func() { log.Info("stopping metrics server..."); closeFunc() }() } - adminURL := "" - if ctx.Bool(enableAdminFlag.Name) { - url, closeFunc, err := api.StartAdminServer(ctx.String(adminAddrFlag.Name), logLevel, healthStatus) - if err != nil { - return fmt.Errorf("unable to start admin server - %w", err) - } - adminURL = url - defer func() { log.Info("stopping admin server..."); closeFunc() }() - } - var ( gene *genesis.Genesis forkConfig thor.ForkConfig @@ -392,6 +376,18 @@ func soloAction(ctx *cli.Context) error { return err } + healthStatus := health.New(repo, nil, blockProductionHealthCheck) + + adminURL := "" + if ctx.Bool(enableAdminFlag.Name) { + url, closeFunc, err := api.StartAdminServer(ctx.String(adminAddrFlag.Name), logLevel, healthStatus) + if err != nil { + return fmt.Errorf("unable to start admin server - %w", err) + } + adminURL = url + defer func() { log.Info("stopping admin server..."); closeFunc() }() + } + printStartupMessage1(gene, repo, nil, instanceDir, forkConfig) skipLogs := ctx.Bool(skipLogsFlag.Name) diff --git a/health/health.go b/health/health.go index 37f19b540..a8dfdfcca 100644 --- a/health/health.go +++ b/health/health.go @@ -35,13 +35,6 @@ type Health struct { const delayBuffer = 5 * time.Second -func NewSolo(timeBetweenBlocks time.Duration) *Health { - return &Health{ - timeBetweenBlocks: timeBetweenBlocks + delayBuffer, - // there is no bootstrap in solo mode - } -} - func New(repo *chain.Repository, p2p *comm.Communicator, timeBetweenBlocks time.Duration) *Health { return &Health{ repo: repo, @@ -76,7 +69,13 @@ func (h *Health) Status() (*Status, error) { bestBlockTimestamp := time.Unix(int64(bestBlock.Header.Timestamp()), 0) // Fetch the current connected peers - connectedPeerCount := h.p2p.PeerCount() + var connectedPeerCount int + if h.p2p == nil { + connectedPeerCount = 5010 // ignore peers in solo mode + } else { + connectedPeerCount = h.p2p.PeerCount() + } + now := time.Now() // Perform the checks diff --git a/p2psrv/server.go b/p2psrv/server.go index 3c821f902..3bdf50ea6 100644 --- a/p2psrv/server.go +++ b/p2psrv/server.go @@ -373,7 +373,3 @@ func (s *Server) fetchBootstrap() { func (s *Server) Options() *Options { return s.opts } - -func (s *Server) PeerCount() int { - return s.srv.PeerCount() -} diff --git a/p2psrv/server_test.go b/p2psrv/server_test.go index f98bc644c..5cdc45d12 100644 --- a/p2psrv/server_test.go +++ b/p2psrv/server_test.go @@ -32,7 +32,6 @@ func TestNewServer(t *testing.T) { } server := New(opts) - server.KnownNodes() assert.Equal(t, "testNode", server.opts.Name) assert.Equal(t, privateKey, server.opts.PrivateKey) From 430c458013538a3e7c5a44a0d4c7047aa79a6570 Mon Sep 17 00:00:00 2001 From: otherview Date: Mon, 18 Nov 2024 14:28:55 +0000 Subject: [PATCH 12/17] moving health service to api pkg --- api/admin/admin.go | 8 +- api/admin/health/health.go | 98 ++++++++++++++++++------ api/admin/health/health_api.go | 46 +++++++++++ api/admin/health/health_api_test.go | 60 +++++++++++++++ api/admin/health/health_test.go | 113 +++++++++++++++++++--------- api/admin/health/types.go | 10 --- api/admin_server.go | 14 +++- cmd/thor/main.go | 14 ++-- health/health.go | 99 ------------------------ health/health_test.go | 104 ------------------------- 10 files changed, 281 insertions(+), 285 deletions(-) create mode 100644 api/admin/health/health_api.go create mode 100644 api/admin/health/health_api_test.go delete mode 100644 api/admin/health/types.go delete mode 100644 health/health.go delete mode 100644 health/health_test.go diff --git a/api/admin/admin.go b/api/admin/admin.go index 49ceb5157..1e16415f8 100644 --- a/api/admin/admin.go +++ b/api/admin/admin.go @@ -11,18 +11,16 @@ import ( "github.com/gorilla/handlers" "github.com/gorilla/mux" - "github.com/vechain/thor/v2/api/admin/loglevel" - "github.com/vechain/thor/v2/health" - healthAPI "github.com/vechain/thor/v2/api/admin/health" + "github.com/vechain/thor/v2/api/admin/loglevel" ) -func New(logLevel *slog.LevelVar, health *health.Health) http.HandlerFunc { +func New(logLevel *slog.LevelVar, health *healthAPI.Health) http.HandlerFunc { router := mux.NewRouter() subRouter := router.PathPrefix("/admin").Subrouter() loglevel.New(logLevel).Mount(subRouter, "/loglevel") - healthAPI.New(health).Mount(subRouter, "/health") + healthAPI.NewAPI(health).Mount(subRouter, "/health") handler := handlers.CompressHandler(router) diff --git a/api/admin/health/health.go b/api/admin/health/health.go index 44de46095..a8dfdfcca 100644 --- a/api/admin/health/health.go +++ b/api/admin/health/health.go @@ -6,42 +6,94 @@ package health import ( - "net/http" + "sync" + "time" - "github.com/gorilla/mux" - "github.com/vechain/thor/v2/api/utils" - "github.com/vechain/thor/v2/health" + "github.com/vechain/thor/v2/chain" + "github.com/vechain/thor/v2/comm" + "github.com/vechain/thor/v2/thor" ) +type BlockIngestion struct { + ID *thor.Bytes32 `json:"id"` + Timestamp *time.Time `json:"timestamp"` +} + +type Status struct { + Healthy bool `json:"healthy"` + BlockIngestion *BlockIngestion `json:"blockIngestion"` + ChainBootstrapped bool `json:"chainBootstrapped"` + PeerCount int `json:"peerCount"` +} + type Health struct { - healthStatus *health.Health + lock sync.RWMutex + timeBetweenBlocks time.Duration + repo *chain.Repository + p2p *comm.Communicator } -func New(healthStatus *health.Health) *Health { +const delayBuffer = 5 * time.Second + +func New(repo *chain.Repository, p2p *comm.Communicator, timeBetweenBlocks time.Duration) *Health { return &Health{ - healthStatus: healthStatus, + repo: repo, + timeBetweenBlocks: timeBetweenBlocks + delayBuffer, + p2p: p2p, } } -func (h *Health) handleGetHealth(w http.ResponseWriter, _ *http.Request) error { - acc, err := h.healthStatus.Status() - if err != nil { - return err - } +// isNetworkProgressing checks if the network is producing new blocks within the allowed interval. +func (h *Health) isNetworkProgressing(now time.Time, bestBlockTimestamp time.Time) bool { + return now.Sub(bestBlockTimestamp) <= h.timeBetweenBlocks +} + +// hasNodeBootstrapped checks if the node has bootstrapped by comparing the block interval. +func (h *Health) hasNodeBootstrapped(now time.Time, bestBlockTimestamp time.Time) bool { + blockInterval := time.Duration(thor.BlockInterval) * time.Second + return bestBlockTimestamp.Add(blockInterval).After(now) +} + +// isNodeConnectedP2P checks if the node is connected to peers +func (h *Health) isNodeConnectedP2P(peerCount int) bool { + return peerCount > 1 +} + +func (h *Health) Status() (*Status, error) { + h.lock.RLock() + defer h.lock.RUnlock() - if !acc.Healthy { - w.WriteHeader(http.StatusServiceUnavailable) // Set the status to 503 + // Fetch the best block details + bestBlock := h.repo.BestBlockSummary() + bestBlockID := bestBlock.Header.ID() + bestBlockTimestamp := time.Unix(int64(bestBlock.Header.Timestamp()), 0) + + // Fetch the current connected peers + var connectedPeerCount int + if h.p2p == nil { + connectedPeerCount = 5010 // ignore peers in solo mode } else { - w.WriteHeader(http.StatusOK) // Set the status to 200 + connectedPeerCount = h.p2p.PeerCount() } - return utils.WriteJSON(w, acc) -} -func (h *Health) Mount(root *mux.Router, pathPrefix string) { - sub := root.PathPrefix(pathPrefix).Subrouter() + now := time.Now() + + // Perform the checks + networkProgressing := h.isNetworkProgressing(now, bestBlockTimestamp) + nodeBootstrapped := h.hasNodeBootstrapped(now, bestBlockTimestamp) + nodeConnected := h.isNodeConnectedP2P(connectedPeerCount) + + // Calculate overall health status + healthy := networkProgressing && nodeBootstrapped && nodeConnected - sub.Path(""). - Methods(http.MethodGet). - Name("health"). - HandlerFunc(utils.WrapHandlerFunc(h.handleGetHealth)) + // Return the current status + return &Status{ + Healthy: healthy, + BlockIngestion: &BlockIngestion{ + ID: &bestBlockID, + Timestamp: &bestBlockTimestamp, + }, + ChainBootstrapped: nodeBootstrapped, + PeerCount: connectedPeerCount, + }, nil } diff --git a/api/admin/health/health_api.go b/api/admin/health/health_api.go new file mode 100644 index 000000000..d756ec6af --- /dev/null +++ b/api/admin/health/health_api.go @@ -0,0 +1,46 @@ +// Copyright (c) 2024 The VeChainThor developers + +// Distributed under the GNU Lesser General Public License v3.0 software license, see the accompanying +// file LICENSE or + +package health + +import ( + "net/http" + + "github.com/gorilla/mux" + "github.com/vechain/thor/v2/api/utils" +) + +type API struct { + healthStatus *Health +} + +func NewAPI(healthStatus *Health) *API { + return &API{ + healthStatus: healthStatus, + } +} + +func (h *API) handleGetHealth(w http.ResponseWriter, _ *http.Request) error { + acc, err := h.healthStatus.Status() + if err != nil { + return err + } + + if !acc.Healthy { + w.WriteHeader(http.StatusServiceUnavailable) // Set the status to 503 + } else { + w.WriteHeader(http.StatusOK) // Set the status to 200 + } + return utils.WriteJSON(w, acc) +} + +func (h *API) Mount(root *mux.Router, pathPrefix string) { + sub := root.PathPrefix(pathPrefix).Subrouter() + + sub.Path(""). + Methods(http.MethodGet). + Name("health"). + HandlerFunc(utils.WrapHandlerFunc(h.handleGetHealth)) +} diff --git a/api/admin/health/health_api_test.go b/api/admin/health/health_api_test.go new file mode 100644 index 000000000..1c4339207 --- /dev/null +++ b/api/admin/health/health_api_test.go @@ -0,0 +1,60 @@ +// Copyright (c) 2024 The VeChainThor developers + +// Distributed under the GNU Lesser General Public License v3.0 software license, see the accompanying +// file LICENSE or + +package health + +import ( + "encoding/json" + "io" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/gorilla/mux" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/vechain/thor/v2/comm" + "github.com/vechain/thor/v2/test/testchain" + "github.com/vechain/thor/v2/txpool" +) + +var ts *httptest.Server + +func TestHealth(t *testing.T) { + initAPIServer(t) + + var healthStatus Status + respBody, statusCode := httpGet(t, ts.URL+"/health") + require.NoError(t, json.Unmarshal(respBody, &healthStatus)) + assert.False(t, healthStatus.Healthy) + assert.Equal(t, http.StatusServiceUnavailable, statusCode) +} + +func initAPIServer(t *testing.T) { + thorChain, err := testchain.NewIntegrationTestChain() + require.NoError(t, err) + + router := mux.NewRouter() + NewAPI( + New(thorChain.Repo(), comm.New(thorChain.Repo(), txpool.New(thorChain.Repo(), nil, txpool.Options{})), time.Second), + ).Mount(router, "/health") + + ts = httptest.NewServer(router) +} + +func httpGet(t *testing.T, url string) ([]byte, int) { + res, err := http.Get(url) //#nosec G107 + if err != nil { + t.Fatal(err) + } + defer res.Body.Close() + + r, err := io.ReadAll(res.Body) + if err != nil { + t.Fatal(err) + } + return r, res.StatusCode +} diff --git a/api/admin/health/health_test.go b/api/admin/health/health_test.go index d91efd56a..34fc145bf 100644 --- a/api/admin/health/health_test.go +++ b/api/admin/health/health_test.go @@ -6,56 +6,99 @@ package health import ( - "encoding/json" - "io" - "net/http" - "net/http/httptest" "testing" "time" - "github.com/gorilla/mux" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/vechain/thor/v2/comm" - "github.com/vechain/thor/v2/health" - "github.com/vechain/thor/v2/test/testchain" - "github.com/vechain/thor/v2/txpool" + "github.com/vechain/thor/v2/thor" ) -var ts *httptest.Server +func TestHealth_isNetworkProgressing(t *testing.T) { + h := &Health{ + timeBetweenBlocks: 10 * time.Second, + } + + now := time.Now() -func TestHealth(t *testing.T) { - initAPIServer(t) + tests := []struct { + name string + bestBlockTimestamp time.Time + expectedProgressing bool + }{ + { + name: "Progressing - block within timeBetweenBlocks", + bestBlockTimestamp: now.Add(-5 * time.Second), + expectedProgressing: true, + }, + { + name: "Not Progressing - block outside timeBetweenBlocks", + bestBlockTimestamp: now.Add(-15 * time.Second), + expectedProgressing: false, + }, + } - var healthStatus health.Status - respBody, statusCode := httpGet(t, ts.URL+"/health") - require.NoError(t, json.Unmarshal(respBody, &healthStatus)) - assert.False(t, healthStatus.Healthy) - assert.Equal(t, http.StatusServiceUnavailable, statusCode) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + isProgressing := h.isNetworkProgressing(now, tt.bestBlockTimestamp) + assert.Equal(t, tt.expectedProgressing, isProgressing, "isNetworkProgressing result mismatch") + }) + } } -func initAPIServer(t *testing.T) { - thorChain, err := testchain.NewIntegrationTestChain() - require.NoError(t, err) +func TestHealth_hasNodeBootstrapped(t *testing.T) { + h := &Health{} + blockInterval := time.Duration(thor.BlockInterval) * time.Second + now := time.Now() - router := mux.NewRouter() - New( - health.New(thorChain.Repo(), comm.New(thorChain.Repo(), txpool.New(thorChain.Repo(), nil, txpool.Options{})), time.Second), - ).Mount(router, "/health") + tests := []struct { + name string + bestBlockTimestamp time.Time + expectedBootstrap bool + }{ + { + name: "Bootstrapped - block timestamp within interval", + bestBlockTimestamp: now.Add(-blockInterval + 1*time.Second), + expectedBootstrap: true, + }, + { + name: "Not Bootstrapped - block timestamp outside interval", + bestBlockTimestamp: now.Add(-blockInterval - 1*time.Second), + expectedBootstrap: false, + }, + } - ts = httptest.NewServer(router) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + isBootstrapped := h.hasNodeBootstrapped(now, tt.bestBlockTimestamp) + assert.Equal(t, tt.expectedBootstrap, isBootstrapped, "hasNodeBootstrapped result mismatch") + }) + } } -func httpGet(t *testing.T, url string) ([]byte, int) { - res, err := http.Get(url) //#nosec G107 - if err != nil { - t.Fatal(err) +func TestHealth_isNodeConnectedP2P(t *testing.T) { + h := &Health{} + + tests := []struct { + name string + peerCount int + expectedConnected bool + }{ + { + name: "Connected - more than one peer", + peerCount: 2, + expectedConnected: true, + }, + { + name: "Not Connected - one or fewer peers", + peerCount: 1, + expectedConnected: false, + }, } - defer res.Body.Close() - r, err := io.ReadAll(res.Body) - if err != nil { - t.Fatal(err) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + isConnected := h.isNodeConnectedP2P(tt.peerCount) + assert.Equal(t, tt.expectedConnected, isConnected, "isNodeConnectedP2P result mismatch") + }) } - return r, res.StatusCode } diff --git a/api/admin/health/types.go b/api/admin/health/types.go deleted file mode 100644 index 6646ecad9..000000000 --- a/api/admin/health/types.go +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright (c) 2024 The VeChainThor developers - -// Distributed under the GNU Lesser General Public License v3.0 software license, see the accompanying -// file LICENSE or - -package health - -type Response struct { - Healthy bool `json:"healthy"` -} diff --git a/api/admin_server.go b/api/admin_server.go index 95ff613ab..be8da5dfe 100644 --- a/api/admin_server.go +++ b/api/admin_server.go @@ -13,17 +13,25 @@ import ( "github.com/pkg/errors" "github.com/vechain/thor/v2/api/admin" + "github.com/vechain/thor/v2/api/admin/health" + "github.com/vechain/thor/v2/chain" "github.com/vechain/thor/v2/co" - "github.com/vechain/thor/v2/health" + "github.com/vechain/thor/v2/comm" ) -func StartAdminServer(addr string, logLevel *slog.LevelVar, health *health.Health) (string, func(), error) { +func StartAdminServer( + addr string, + logLevel *slog.LevelVar, + repo *chain.Repository, + p2p *comm.Communicator, + timeBetweenBlocks time.Duration, +) (string, func(), error) { listener, err := net.Listen("tcp", addr) if err != nil { return "", nil, errors.Wrapf(err, "listen admin API addr [%v]", addr) } - adminHandler := admin.New(logLevel, health) + adminHandler := admin.New(logLevel, health.New(repo, p2p, timeBetweenBlocks)) srv := &http.Server{Handler: adminHandler, ReadHeaderTimeout: time.Second, ReadTimeout: 5 * time.Second} var goes co.Goes diff --git a/cmd/thor/main.go b/cmd/thor/main.go index 467a7f681..5837eedf6 100644 --- a/cmd/thor/main.go +++ b/cmd/thor/main.go @@ -25,7 +25,6 @@ import ( "github.com/vechain/thor/v2/cmd/thor/optimizer" "github.com/vechain/thor/v2/cmd/thor/solo" "github.com/vechain/thor/v2/genesis" - "github.com/vechain/thor/v2/health" "github.com/vechain/thor/v2/log" "github.com/vechain/thor/v2/logdb" "github.com/vechain/thor/v2/metrics" @@ -233,10 +232,15 @@ func defaultAction(ctx *cli.Context) error { return err } - healthStatus := health.New(repo, p2pCommunicator.Communicator(), time.Duration(thor.BlockInterval)) adminURL := "" if ctx.Bool(enableAdminFlag.Name) { - url, closeFunc, err := api.StartAdminServer(ctx.String(adminAddrFlag.Name), logLevel, healthStatus) + url, closeFunc, err := api.StartAdminServer( + ctx.String(adminAddrFlag.Name), + logLevel, + repo, + p2pCommunicator.Communicator(), + time.Duration(thor.BlockInterval), + ) if err != nil { return fmt.Errorf("unable to start admin server - %w", err) } @@ -376,11 +380,9 @@ func soloAction(ctx *cli.Context) error { return err } - healthStatus := health.New(repo, nil, blockProductionHealthCheck) - adminURL := "" if ctx.Bool(enableAdminFlag.Name) { - url, closeFunc, err := api.StartAdminServer(ctx.String(adminAddrFlag.Name), logLevel, healthStatus) + url, closeFunc, err := api.StartAdminServer(ctx.String(adminAddrFlag.Name), logLevel, repo, nil, blockProductionHealthCheck) if err != nil { return fmt.Errorf("unable to start admin server - %w", err) } diff --git a/health/health.go b/health/health.go deleted file mode 100644 index a8dfdfcca..000000000 --- a/health/health.go +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright (c) 2024 The VeChainThor developers - -// Distributed under the GNU Lesser General Public License v3.0 software license, see the accompanying -// file LICENSE or - -package health - -import ( - "sync" - "time" - - "github.com/vechain/thor/v2/chain" - "github.com/vechain/thor/v2/comm" - "github.com/vechain/thor/v2/thor" -) - -type BlockIngestion struct { - ID *thor.Bytes32 `json:"id"` - Timestamp *time.Time `json:"timestamp"` -} - -type Status struct { - Healthy bool `json:"healthy"` - BlockIngestion *BlockIngestion `json:"blockIngestion"` - ChainBootstrapped bool `json:"chainBootstrapped"` - PeerCount int `json:"peerCount"` -} - -type Health struct { - lock sync.RWMutex - timeBetweenBlocks time.Duration - repo *chain.Repository - p2p *comm.Communicator -} - -const delayBuffer = 5 * time.Second - -func New(repo *chain.Repository, p2p *comm.Communicator, timeBetweenBlocks time.Duration) *Health { - return &Health{ - repo: repo, - timeBetweenBlocks: timeBetweenBlocks + delayBuffer, - p2p: p2p, - } -} - -// isNetworkProgressing checks if the network is producing new blocks within the allowed interval. -func (h *Health) isNetworkProgressing(now time.Time, bestBlockTimestamp time.Time) bool { - return now.Sub(bestBlockTimestamp) <= h.timeBetweenBlocks -} - -// hasNodeBootstrapped checks if the node has bootstrapped by comparing the block interval. -func (h *Health) hasNodeBootstrapped(now time.Time, bestBlockTimestamp time.Time) bool { - blockInterval := time.Duration(thor.BlockInterval) * time.Second - return bestBlockTimestamp.Add(blockInterval).After(now) -} - -// isNodeConnectedP2P checks if the node is connected to peers -func (h *Health) isNodeConnectedP2P(peerCount int) bool { - return peerCount > 1 -} - -func (h *Health) Status() (*Status, error) { - h.lock.RLock() - defer h.lock.RUnlock() - - // Fetch the best block details - bestBlock := h.repo.BestBlockSummary() - bestBlockID := bestBlock.Header.ID() - bestBlockTimestamp := time.Unix(int64(bestBlock.Header.Timestamp()), 0) - - // Fetch the current connected peers - var connectedPeerCount int - if h.p2p == nil { - connectedPeerCount = 5010 // ignore peers in solo mode - } else { - connectedPeerCount = h.p2p.PeerCount() - } - - now := time.Now() - - // Perform the checks - networkProgressing := h.isNetworkProgressing(now, bestBlockTimestamp) - nodeBootstrapped := h.hasNodeBootstrapped(now, bestBlockTimestamp) - nodeConnected := h.isNodeConnectedP2P(connectedPeerCount) - - // Calculate overall health status - healthy := networkProgressing && nodeBootstrapped && nodeConnected - - // Return the current status - return &Status{ - Healthy: healthy, - BlockIngestion: &BlockIngestion{ - ID: &bestBlockID, - Timestamp: &bestBlockTimestamp, - }, - ChainBootstrapped: nodeBootstrapped, - PeerCount: connectedPeerCount, - }, nil -} diff --git a/health/health_test.go b/health/health_test.go deleted file mode 100644 index 34fc145bf..000000000 --- a/health/health_test.go +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright (c) 2024 The VeChainThor developers - -// Distributed under the GNU Lesser General Public License v3.0 software license, see the accompanying -// file LICENSE or - -package health - -import ( - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/vechain/thor/v2/thor" -) - -func TestHealth_isNetworkProgressing(t *testing.T) { - h := &Health{ - timeBetweenBlocks: 10 * time.Second, - } - - now := time.Now() - - tests := []struct { - name string - bestBlockTimestamp time.Time - expectedProgressing bool - }{ - { - name: "Progressing - block within timeBetweenBlocks", - bestBlockTimestamp: now.Add(-5 * time.Second), - expectedProgressing: true, - }, - { - name: "Not Progressing - block outside timeBetweenBlocks", - bestBlockTimestamp: now.Add(-15 * time.Second), - expectedProgressing: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - isProgressing := h.isNetworkProgressing(now, tt.bestBlockTimestamp) - assert.Equal(t, tt.expectedProgressing, isProgressing, "isNetworkProgressing result mismatch") - }) - } -} - -func TestHealth_hasNodeBootstrapped(t *testing.T) { - h := &Health{} - blockInterval := time.Duration(thor.BlockInterval) * time.Second - now := time.Now() - - tests := []struct { - name string - bestBlockTimestamp time.Time - expectedBootstrap bool - }{ - { - name: "Bootstrapped - block timestamp within interval", - bestBlockTimestamp: now.Add(-blockInterval + 1*time.Second), - expectedBootstrap: true, - }, - { - name: "Not Bootstrapped - block timestamp outside interval", - bestBlockTimestamp: now.Add(-blockInterval - 1*time.Second), - expectedBootstrap: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - isBootstrapped := h.hasNodeBootstrapped(now, tt.bestBlockTimestamp) - assert.Equal(t, tt.expectedBootstrap, isBootstrapped, "hasNodeBootstrapped result mismatch") - }) - } -} - -func TestHealth_isNodeConnectedP2P(t *testing.T) { - h := &Health{} - - tests := []struct { - name string - peerCount int - expectedConnected bool - }{ - { - name: "Connected - more than one peer", - peerCount: 2, - expectedConnected: true, - }, - { - name: "Not Connected - one or fewer peers", - peerCount: 1, - expectedConnected: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - isConnected := h.isNodeConnectedP2P(tt.peerCount) - assert.Equal(t, tt.expectedConnected, isConnected, "isNodeConnectedP2P result mismatch") - }) - } -} From f1b000057806f978da307ce2973c12a8b74a6182 Mon Sep 17 00:00:00 2001 From: otherview Date: Wed, 20 Nov 2024 15:47:47 +0000 Subject: [PATCH 13/17] added defaults + api health query --- api/admin/health/health.go | 47 ++++++++++++++++++----------- api/admin/health/health_api.go | 26 ++++++++++++++-- api/admin/health/health_api_test.go | 3 +- api/admin/health/health_test.go | 28 +++++++++-------- api/admin_server.go | 3 +- cmd/thor/main.go | 7 +---- 6 files changed, 71 insertions(+), 43 deletions(-) diff --git a/api/admin/health/health.go b/api/admin/health/health.go index a8dfdfcca..6db522976 100644 --- a/api/admin/health/health.go +++ b/api/admin/health/health.go @@ -27,39 +27,50 @@ type Status struct { } type Health struct { - lock sync.RWMutex - timeBetweenBlocks time.Duration - repo *chain.Repository - p2p *comm.Communicator + lock sync.RWMutex + repo *chain.Repository + p2p *comm.Communicator + isNodeBootstrapped bool } -const delayBuffer = 5 * time.Second +const ( + defaultMaxTimeBetweenSlots = time.Duration(2*thor.BlockInterval) * time.Second + defaultMinPeerCount = 2 +) -func New(repo *chain.Repository, p2p *comm.Communicator, timeBetweenBlocks time.Duration) *Health { +func New(repo *chain.Repository, p2p *comm.Communicator) *Health { return &Health{ - repo: repo, - timeBetweenBlocks: timeBetweenBlocks + delayBuffer, - p2p: p2p, + repo: repo, + p2p: p2p, } } // isNetworkProgressing checks if the network is producing new blocks within the allowed interval. -func (h *Health) isNetworkProgressing(now time.Time, bestBlockTimestamp time.Time) bool { - return now.Sub(bestBlockTimestamp) <= h.timeBetweenBlocks +func (h *Health) isNetworkProgressing(now time.Time, bestBlockTimestamp time.Time, maxTimeBetweenSlots time.Duration) bool { + return now.Sub(bestBlockTimestamp) <= maxTimeBetweenSlots } // hasNodeBootstrapped checks if the node has bootstrapped by comparing the block interval. +// Once it's marked as done, it never reverts. func (h *Health) hasNodeBootstrapped(now time.Time, bestBlockTimestamp time.Time) bool { + if h.isNodeBootstrapped { + return true + } + blockInterval := time.Duration(thor.BlockInterval) * time.Second - return bestBlockTimestamp.Add(blockInterval).After(now) + if bestBlockTimestamp.Add(blockInterval).After(now) { + h.isNodeBootstrapped = true + } + + return h.isNodeBootstrapped } // isNodeConnectedP2P checks if the node is connected to peers -func (h *Health) isNodeConnectedP2P(peerCount int) bool { - return peerCount > 1 +func (h *Health) isNodeConnectedP2P(peerCount int, minPeerCount int) bool { + return peerCount >= minPeerCount } -func (h *Health) Status() (*Status, error) { +func (h *Health) Status(maxTimeBetweenSlots time.Duration, minPeerCount int) (*Status, error) { h.lock.RLock() defer h.lock.RUnlock() @@ -71,7 +82,7 @@ func (h *Health) Status() (*Status, error) { // Fetch the current connected peers var connectedPeerCount int if h.p2p == nil { - connectedPeerCount = 5010 // ignore peers in solo mode + connectedPeerCount = minPeerCount // ignore peers in solo mode } else { connectedPeerCount = h.p2p.PeerCount() } @@ -79,9 +90,9 @@ func (h *Health) Status() (*Status, error) { now := time.Now() // Perform the checks - networkProgressing := h.isNetworkProgressing(now, bestBlockTimestamp) + networkProgressing := h.isNetworkProgressing(now, bestBlockTimestamp, maxTimeBetweenSlots) nodeBootstrapped := h.hasNodeBootstrapped(now, bestBlockTimestamp) - nodeConnected := h.isNodeConnectedP2P(connectedPeerCount) + nodeConnected := h.isNodeConnectedP2P(connectedPeerCount, minPeerCount) // Calculate overall health status healthy := networkProgressing && nodeBootstrapped && nodeConnected diff --git a/api/admin/health/health_api.go b/api/admin/health/health_api.go index d756ec6af..9363f82d0 100644 --- a/api/admin/health/health_api.go +++ b/api/admin/health/health_api.go @@ -7,6 +7,8 @@ package health import ( "net/http" + "strconv" + "time" "github.com/gorilla/mux" "github.com/vechain/thor/v2/api/utils" @@ -22,8 +24,28 @@ func NewAPI(healthStatus *Health) *API { } } -func (h *API) handleGetHealth(w http.ResponseWriter, _ *http.Request) error { - acc, err := h.healthStatus.Status() +func (h *API) handleGetHealth(w http.ResponseWriter, r *http.Request) error { + // Parse query parameters + query := r.URL.Query() + + // Default to constants if query parameters are not provided + maxTimeBetweenSlots := defaultMaxTimeBetweenSlots + minPeerCount := defaultMinPeerCount + + // Override with query parameters if they exist + if queryMaxTimeBetweenSlots := query.Get("maxTimeBetweenSlots"); queryMaxTimeBetweenSlots != "" { + if parsed, err := time.ParseDuration(queryMaxTimeBetweenSlots); err == nil { + maxTimeBetweenSlots = parsed + } + } + + if queryMinPeerCount := query.Get("minPeerCount"); queryMinPeerCount != "" { + if parsed, err := strconv.Atoi(queryMinPeerCount); err == nil { + minPeerCount = parsed + } + } + + acc, err := h.healthStatus.Status(maxTimeBetweenSlots, minPeerCount) if err != nil { return err } diff --git a/api/admin/health/health_api_test.go b/api/admin/health/health_api_test.go index 1c4339207..e50af0398 100644 --- a/api/admin/health/health_api_test.go +++ b/api/admin/health/health_api_test.go @@ -11,7 +11,6 @@ import ( "net/http" "net/http/httptest" "testing" - "time" "github.com/gorilla/mux" "github.com/stretchr/testify/assert" @@ -39,7 +38,7 @@ func initAPIServer(t *testing.T) { router := mux.NewRouter() NewAPI( - New(thorChain.Repo(), comm.New(thorChain.Repo(), txpool.New(thorChain.Repo(), nil, txpool.Options{})), time.Second), + New(thorChain.Repo(), comm.New(thorChain.Repo(), txpool.New(thorChain.Repo(), nil, txpool.Options{}))), ).Mount(router, "/health") ts = httptest.NewServer(router) diff --git a/api/admin/health/health_test.go b/api/admin/health/health_test.go index 34fc145bf..034c4e64e 100644 --- a/api/admin/health/health_test.go +++ b/api/admin/health/health_test.go @@ -10,13 +10,10 @@ import ( "time" "github.com/stretchr/testify/assert" - "github.com/vechain/thor/v2/thor" ) func TestHealth_isNetworkProgressing(t *testing.T) { - h := &Health{ - timeBetweenBlocks: 10 * time.Second, - } + h := &Health{} now := time.Now() @@ -32,14 +29,14 @@ func TestHealth_isNetworkProgressing(t *testing.T) { }, { name: "Not Progressing - block outside timeBetweenBlocks", - bestBlockTimestamp: now.Add(-15 * time.Second), + bestBlockTimestamp: now.Add(-25 * time.Second), expectedProgressing: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - isProgressing := h.isNetworkProgressing(now, tt.bestBlockTimestamp) + isProgressing := h.isNetworkProgressing(now, tt.bestBlockTimestamp, defaultMaxTimeBetweenSlots) assert.Equal(t, tt.expectedProgressing, isProgressing, "isNetworkProgressing result mismatch") }) } @@ -47,7 +44,6 @@ func TestHealth_isNetworkProgressing(t *testing.T) { func TestHealth_hasNodeBootstrapped(t *testing.T) { h := &Health{} - blockInterval := time.Duration(thor.BlockInterval) * time.Second now := time.Now() tests := []struct { @@ -55,15 +51,21 @@ func TestHealth_hasNodeBootstrapped(t *testing.T) { bestBlockTimestamp time.Time expectedBootstrap bool }{ + // keep the order as it matters for health state + { + name: "Not Bootstrapped - block timestamp outside interval", + bestBlockTimestamp: now.Add(-defaultMaxTimeBetweenSlots + 1), + expectedBootstrap: false, + }, { name: "Bootstrapped - block timestamp within interval", - bestBlockTimestamp: now.Add(-blockInterval + 1*time.Second), + bestBlockTimestamp: now.Add(defaultMaxTimeBetweenSlots), expectedBootstrap: true, }, { - name: "Not Bootstrapped - block timestamp outside interval", - bestBlockTimestamp: now.Add(-blockInterval - 1*time.Second), - expectedBootstrap: false, + name: "Bootstrapped only once", + bestBlockTimestamp: now.Add(-defaultMaxTimeBetweenSlots + 1), + expectedBootstrap: true, }, } @@ -85,7 +87,7 @@ func TestHealth_isNodeConnectedP2P(t *testing.T) { }{ { name: "Connected - more than one peer", - peerCount: 2, + peerCount: 3, expectedConnected: true, }, { @@ -97,7 +99,7 @@ func TestHealth_isNodeConnectedP2P(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - isConnected := h.isNodeConnectedP2P(tt.peerCount) + isConnected := h.isNodeConnectedP2P(tt.peerCount, defaultMinPeerCount) assert.Equal(t, tt.expectedConnected, isConnected, "isNodeConnectedP2P result mismatch") }) } diff --git a/api/admin_server.go b/api/admin_server.go index be8da5dfe..f2315aeb1 100644 --- a/api/admin_server.go +++ b/api/admin_server.go @@ -24,14 +24,13 @@ func StartAdminServer( logLevel *slog.LevelVar, repo *chain.Repository, p2p *comm.Communicator, - timeBetweenBlocks time.Duration, ) (string, func(), error) { listener, err := net.Listen("tcp", addr) if err != nil { return "", nil, errors.Wrapf(err, "listen admin API addr [%v]", addr) } - adminHandler := admin.New(logLevel, health.New(repo, p2p, timeBetweenBlocks)) + adminHandler := admin.New(logLevel, health.New(repo, p2p)) srv := &http.Server{Handler: adminHandler, ReadHeaderTimeout: time.Second, ReadTimeout: 5 * time.Second} var goes co.Goes diff --git a/cmd/thor/main.go b/cmd/thor/main.go index 5837eedf6..e80d17d23 100644 --- a/cmd/thor/main.go +++ b/cmd/thor/main.go @@ -239,7 +239,6 @@ func defaultAction(ctx *cli.Context) error { logLevel, repo, p2pCommunicator.Communicator(), - time.Duration(thor.BlockInterval), ) if err != nil { return fmt.Errorf("unable to start admin server - %w", err) @@ -319,10 +318,6 @@ func soloAction(ctx *cli.Context) error { onDemandBlockProduction := ctx.Bool(onDemandFlag.Name) blockProductionInterval := ctx.Uint64(blockInterval.Name) - if blockProductionInterval == 0 { - return errors.New("block-interval cannot be zero") - } - blockProductionHealthCheck := time.Duration(blockProductionInterval) * time.Second // enable metrics as soon as possible metricsURL := "" @@ -382,7 +377,7 @@ func soloAction(ctx *cli.Context) error { adminURL := "" if ctx.Bool(enableAdminFlag.Name) { - url, closeFunc, err := api.StartAdminServer(ctx.String(adminAddrFlag.Name), logLevel, repo, nil, blockProductionHealthCheck) + url, closeFunc, err := api.StartAdminServer(ctx.String(adminAddrFlag.Name), logLevel, repo, nil) if err != nil { return fmt.Errorf("unable to start admin server - %w", err) } From c0aee23192ee04f8b4abaeada83d7d22bf0a5552 Mon Sep 17 00:00:00 2001 From: otherview Date: Thu, 21 Nov 2024 12:27:00 +0000 Subject: [PATCH 14/17] pr comments --- api/admin/health/health.go | 12 ++++++------ api/admin/health/health_api.go | 10 +++++----- api/admin/health/health_test.go | 8 ++++---- api/admin/loglevel/log_level_test.go | 1 - 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/api/admin/health/health.go b/api/admin/health/health.go index 6db522976..373bac476 100644 --- a/api/admin/health/health.go +++ b/api/admin/health/health.go @@ -34,8 +34,8 @@ type Health struct { } const ( - defaultMaxTimeBetweenSlots = time.Duration(2*thor.BlockInterval) * time.Second - defaultMinPeerCount = 2 + defaultBlockTolerance = time.Duration(2*thor.BlockInterval) * time.Second // 2 blocks tolerance + defaultMinPeerCount = 2 ) func New(repo *chain.Repository, p2p *comm.Communicator) *Health { @@ -46,8 +46,8 @@ func New(repo *chain.Repository, p2p *comm.Communicator) *Health { } // isNetworkProgressing checks if the network is producing new blocks within the allowed interval. -func (h *Health) isNetworkProgressing(now time.Time, bestBlockTimestamp time.Time, maxTimeBetweenSlots time.Duration) bool { - return now.Sub(bestBlockTimestamp) <= maxTimeBetweenSlots +func (h *Health) isNetworkProgressing(now time.Time, bestBlockTimestamp time.Time, blockTolerance time.Duration) bool { + return now.Sub(bestBlockTimestamp) <= blockTolerance } // hasNodeBootstrapped checks if the node has bootstrapped by comparing the block interval. @@ -70,7 +70,7 @@ func (h *Health) isNodeConnectedP2P(peerCount int, minPeerCount int) bool { return peerCount >= minPeerCount } -func (h *Health) Status(maxTimeBetweenSlots time.Duration, minPeerCount int) (*Status, error) { +func (h *Health) Status(blockTolerance time.Duration, minPeerCount int) (*Status, error) { h.lock.RLock() defer h.lock.RUnlock() @@ -90,7 +90,7 @@ func (h *Health) Status(maxTimeBetweenSlots time.Duration, minPeerCount int) (*S now := time.Now() // Perform the checks - networkProgressing := h.isNetworkProgressing(now, bestBlockTimestamp, maxTimeBetweenSlots) + networkProgressing := h.isNetworkProgressing(now, bestBlockTimestamp, blockTolerance) nodeBootstrapped := h.hasNodeBootstrapped(now, bestBlockTimestamp) nodeConnected := h.isNodeConnectedP2P(connectedPeerCount, minPeerCount) diff --git a/api/admin/health/health_api.go b/api/admin/health/health_api.go index 9363f82d0..3bad13f07 100644 --- a/api/admin/health/health_api.go +++ b/api/admin/health/health_api.go @@ -29,13 +29,13 @@ func (h *API) handleGetHealth(w http.ResponseWriter, r *http.Request) error { query := r.URL.Query() // Default to constants if query parameters are not provided - maxTimeBetweenSlots := defaultMaxTimeBetweenSlots + blockTolerance := defaultBlockTolerance minPeerCount := defaultMinPeerCount // Override with query parameters if they exist - if queryMaxTimeBetweenSlots := query.Get("maxTimeBetweenSlots"); queryMaxTimeBetweenSlots != "" { - if parsed, err := time.ParseDuration(queryMaxTimeBetweenSlots); err == nil { - maxTimeBetweenSlots = parsed + if queryBlockTolerance := query.Get("blockTolerance"); queryBlockTolerance != "" { + if parsed, err := time.ParseDuration(queryBlockTolerance); err == nil { + blockTolerance = parsed } } @@ -45,7 +45,7 @@ func (h *API) handleGetHealth(w http.ResponseWriter, r *http.Request) error { } } - acc, err := h.healthStatus.Status(maxTimeBetweenSlots, minPeerCount) + acc, err := h.healthStatus.Status(blockTolerance, minPeerCount) if err != nil { return err } diff --git a/api/admin/health/health_test.go b/api/admin/health/health_test.go index 034c4e64e..35d73fe2e 100644 --- a/api/admin/health/health_test.go +++ b/api/admin/health/health_test.go @@ -36,7 +36,7 @@ func TestHealth_isNetworkProgressing(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - isProgressing := h.isNetworkProgressing(now, tt.bestBlockTimestamp, defaultMaxTimeBetweenSlots) + isProgressing := h.isNetworkProgressing(now, tt.bestBlockTimestamp, defaultBlockTolerance) assert.Equal(t, tt.expectedProgressing, isProgressing, "isNetworkProgressing result mismatch") }) } @@ -54,17 +54,17 @@ func TestHealth_hasNodeBootstrapped(t *testing.T) { // keep the order as it matters for health state { name: "Not Bootstrapped - block timestamp outside interval", - bestBlockTimestamp: now.Add(-defaultMaxTimeBetweenSlots + 1), + bestBlockTimestamp: now.Add(-defaultBlockTolerance + 1), expectedBootstrap: false, }, { name: "Bootstrapped - block timestamp within interval", - bestBlockTimestamp: now.Add(defaultMaxTimeBetweenSlots), + bestBlockTimestamp: now.Add(defaultBlockTolerance), expectedBootstrap: true, }, { name: "Bootstrapped only once", - bestBlockTimestamp: now.Add(-defaultMaxTimeBetweenSlots + 1), + bestBlockTimestamp: now.Add(-defaultBlockTolerance + 1), expectedBootstrap: true, }, } diff --git a/api/admin/loglevel/log_level_test.go b/api/admin/loglevel/log_level_test.go index d04320ce3..3d1a8a960 100644 --- a/api/admin/loglevel/log_level_test.go +++ b/api/admin/loglevel/log_level_test.go @@ -15,7 +15,6 @@ import ( "testing" "github.com/gorilla/mux" - "github.com/stretchr/testify/assert" ) From 34a8adc55016093bc09711db5e41b688c433ad13 Mon Sep 17 00:00:00 2001 From: otherview Date: Wed, 27 Nov 2024 12:14:28 +0000 Subject: [PATCH 15/17] pr comments --- api/admin/health/health.go | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/api/admin/health/health.go b/api/admin/health/health.go index 373bac476..c3b09ad90 100644 --- a/api/admin/health/health.go +++ b/api/admin/health/health.go @@ -20,10 +20,10 @@ type BlockIngestion struct { } type Status struct { - Healthy bool `json:"healthy"` - BlockIngestion *BlockIngestion `json:"blockIngestion"` - ChainBootstrapped bool `json:"chainBootstrapped"` - PeerCount int `json:"peerCount"` + Healthy bool `json:"healthy"` + BestBlockTimestamp *time.Time `json:"bestBlockTimestamp"` + WasChainSynced bool `json:"wasChainSynced"` + PeerCount int `json:"peerCount"` } type Health struct { @@ -76,7 +76,6 @@ func (h *Health) Status(blockTolerance time.Duration, minPeerCount int) (*Status // Fetch the best block details bestBlock := h.repo.BestBlockSummary() - bestBlockID := bestBlock.Header.ID() bestBlockTimestamp := time.Unix(int64(bestBlock.Header.Timestamp()), 0) // Fetch the current connected peers @@ -91,20 +90,17 @@ func (h *Health) Status(blockTolerance time.Duration, minPeerCount int) (*Status // Perform the checks networkProgressing := h.isNetworkProgressing(now, bestBlockTimestamp, blockTolerance) - nodeBootstrapped := h.hasNodeBootstrapped(now, bestBlockTimestamp) + wasChainSynced := h.hasNodeBootstrapped(now, bestBlockTimestamp) nodeConnected := h.isNodeConnectedP2P(connectedPeerCount, minPeerCount) // Calculate overall health status - healthy := networkProgressing && nodeBootstrapped && nodeConnected + healthy := networkProgressing && wasChainSynced && nodeConnected // Return the current status return &Status{ - Healthy: healthy, - BlockIngestion: &BlockIngestion{ - ID: &bestBlockID, - Timestamp: &bestBlockTimestamp, - }, - ChainBootstrapped: nodeBootstrapped, - PeerCount: connectedPeerCount, + Healthy: healthy, + BestBlockTimestamp: &bestBlockTimestamp, + WasChainSynced: wasChainSynced, + PeerCount: connectedPeerCount, }, nil } From 93a222a36437ca46912c204b17ea79b8c940d51b Mon Sep 17 00:00:00 2001 From: otherview Date: Fri, 6 Dec 2024 17:17:27 +0000 Subject: [PATCH 16/17] pr comments --- api/admin/health/health.go | 44 +++++++++------------------------ api/admin/health/health_test.go | 35 -------------------------- 2 files changed, 11 insertions(+), 68 deletions(-) diff --git a/api/admin/health/health.go b/api/admin/health/health.go index c3b09ad90..41522e32d 100644 --- a/api/admin/health/health.go +++ b/api/admin/health/health.go @@ -6,7 +6,6 @@ package health import ( - "sync" "time" "github.com/vechain/thor/v2/chain" @@ -20,17 +19,15 @@ type BlockIngestion struct { } type Status struct { - Healthy bool `json:"healthy"` - BestBlockTimestamp *time.Time `json:"bestBlockTimestamp"` - WasChainSynced bool `json:"wasChainSynced"` - PeerCount int `json:"peerCount"` + Healthy bool `json:"healthy"` + BestBlockTime *time.Time `json:"bestBlockTime"` + PeerCount int `json:"peerCount"` + IsNetworkProgressing bool `json:"isNetworkProgressing"` } type Health struct { - lock sync.RWMutex - repo *chain.Repository - p2p *comm.Communicator - isNodeBootstrapped bool + repo *chain.Repository + p2p *comm.Communicator } const ( @@ -50,30 +47,12 @@ func (h *Health) isNetworkProgressing(now time.Time, bestBlockTimestamp time.Tim return now.Sub(bestBlockTimestamp) <= blockTolerance } -// hasNodeBootstrapped checks if the node has bootstrapped by comparing the block interval. -// Once it's marked as done, it never reverts. -func (h *Health) hasNodeBootstrapped(now time.Time, bestBlockTimestamp time.Time) bool { - if h.isNodeBootstrapped { - return true - } - - blockInterval := time.Duration(thor.BlockInterval) * time.Second - if bestBlockTimestamp.Add(blockInterval).After(now) { - h.isNodeBootstrapped = true - } - - return h.isNodeBootstrapped -} - // isNodeConnectedP2P checks if the node is connected to peers func (h *Health) isNodeConnectedP2P(peerCount int, minPeerCount int) bool { return peerCount >= minPeerCount } func (h *Health) Status(blockTolerance time.Duration, minPeerCount int) (*Status, error) { - h.lock.RLock() - defer h.lock.RUnlock() - // Fetch the best block details bestBlock := h.repo.BestBlockSummary() bestBlockTimestamp := time.Unix(int64(bestBlock.Header.Timestamp()), 0) @@ -90,17 +69,16 @@ func (h *Health) Status(blockTolerance time.Duration, minPeerCount int) (*Status // Perform the checks networkProgressing := h.isNetworkProgressing(now, bestBlockTimestamp, blockTolerance) - wasChainSynced := h.hasNodeBootstrapped(now, bestBlockTimestamp) nodeConnected := h.isNodeConnectedP2P(connectedPeerCount, minPeerCount) // Calculate overall health status - healthy := networkProgressing && wasChainSynced && nodeConnected + healthy := networkProgressing && nodeConnected // Return the current status return &Status{ - Healthy: healthy, - BestBlockTimestamp: &bestBlockTimestamp, - WasChainSynced: wasChainSynced, - PeerCount: connectedPeerCount, + Healthy: healthy, + BestBlockTime: &bestBlockTimestamp, + IsNetworkProgressing: networkProgressing, + PeerCount: connectedPeerCount, }, nil } diff --git a/api/admin/health/health_test.go b/api/admin/health/health_test.go index 35d73fe2e..60f9a3dcd 100644 --- a/api/admin/health/health_test.go +++ b/api/admin/health/health_test.go @@ -42,41 +42,6 @@ func TestHealth_isNetworkProgressing(t *testing.T) { } } -func TestHealth_hasNodeBootstrapped(t *testing.T) { - h := &Health{} - now := time.Now() - - tests := []struct { - name string - bestBlockTimestamp time.Time - expectedBootstrap bool - }{ - // keep the order as it matters for health state - { - name: "Not Bootstrapped - block timestamp outside interval", - bestBlockTimestamp: now.Add(-defaultBlockTolerance + 1), - expectedBootstrap: false, - }, - { - name: "Bootstrapped - block timestamp within interval", - bestBlockTimestamp: now.Add(defaultBlockTolerance), - expectedBootstrap: true, - }, - { - name: "Bootstrapped only once", - bestBlockTimestamp: now.Add(-defaultBlockTolerance + 1), - expectedBootstrap: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - isBootstrapped := h.hasNodeBootstrapped(now, tt.bestBlockTimestamp) - assert.Equal(t, tt.expectedBootstrap, isBootstrapped, "hasNodeBootstrapped result mismatch") - }) - } -} - func TestHealth_isNodeConnectedP2P(t *testing.T) { h := &Health{} From 05e7dbf7c72d60b33da6d1bd211c0d49ad031c3e Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Mon, 9 Dec 2024 10:42:04 +0000 Subject: [PATCH 17/17] Update cmd/thor/main.go --- cmd/thor/main.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmd/thor/main.go b/cmd/thor/main.go index e80d17d23..8e8d51d46 100644 --- a/cmd/thor/main.go +++ b/cmd/thor/main.go @@ -318,6 +318,9 @@ func soloAction(ctx *cli.Context) error { onDemandBlockProduction := ctx.Bool(onDemandFlag.Name) blockProductionInterval := ctx.Uint64(blockInterval.Name) + if blockProductionInterval == 0 { + return errors.New("block-interval cannot be zero") + } // enable metrics as soon as possible metricsURL := ""