From e0fbb3a8e6e8f8735565bf95ee9d869eb19e8420 Mon Sep 17 00:00:00 2001 From: Tanmay Date: Wed, 31 May 2023 13:43:43 -0400 Subject: [PATCH 1/7] add check for keygen stuck at pending --- cmd/zetaclientd/keygen_tss.go | 2 +- cmd/zetaclientd/start.go | 64 +++++++++++++++++++---------------- zetaclient/query.go | 18 +++++++--- zetaclient/tx.go | 4 ++- 4 files changed, 52 insertions(+), 36 deletions(-) diff --git a/cmd/zetaclientd/keygen_tss.go b/cmd/zetaclientd/keygen_tss.go index 5d7216e815..2e4114d847 100644 --- a/cmd/zetaclientd/keygen_tss.go +++ b/cmd/zetaclientd/keygen_tss.go @@ -18,7 +18,7 @@ func keygenTss(cfg *config.Config, tss *mc.TSS, logger zerolog.Logger) error { res, err := tss.Server.Keygen(req) if err != nil || res.Status != tsscommon.Success || res.PubKey == "" { keygenLogger.Error().Msgf("keygen fail: reason %s blame nodes %s", res.Blame.FailReason, res.Blame.BlameNodes) - return errors.Wrap(err, fmt.Sprintf("Keygen fail: reason %s blame nodes %s", res.Blame.FailReason, res.Blame.BlameNodes)) + return errors.New(fmt.Sprintf("Keygen fail: reason %s blame nodes %s, ERR : %s ", res.Blame.FailReason, res.Blame.BlameNodes, err.Error())) } tss.CurrentPubkey = res.PubKey diff --git a/cmd/zetaclientd/start.go b/cmd/zetaclientd/start.go index ef7f94e4c3..8138b995e4 100644 --- a/cmd/zetaclientd/start.go +++ b/cmd/zetaclientd/start.go @@ -131,6 +131,7 @@ func start(_ *cobra.Command, _ []string) error { // Break out of loop only when TSS is generated successfully , either at the keygenBlock or if it has been generated already , Block set as zero in genesis file // This loop will try keygen at the keygen block and then wait for keygen to be successfully reported by all nodes before breaking out of the loop. // If keygen is unsuccessful , it will reset the triedKeygenAtBlock flag and try again at a new keygen block. + if cfg.KeyGenStatus == crosschaintypes.KeygenStatus_KeyGenSuccess { break } @@ -140,47 +141,52 @@ func start(_ *cobra.Command, _ []string) error { continue } // Try generating TSS at keygen block , only when status is pending keygen and generation has not been tried at the block - if cfg.KeyGenStatus == crosschaintypes.KeygenStatus_PendingKeygen && !triedKeygenAtBlock { + if cfg.KeyGenStatus == crosschaintypes.KeygenStatus_PendingKeygen { // Return error if RPC is not working currentBlock, err := zetaBridge.GetZetaBlockHeight() if err != nil { startLogger.Error().Err(err).Msg("GetZetaBlockHeight RPC error") continue } - // If not at keygen block do not try to generate TSS - if currentBlock != cfg.KeygenBlock { - if currentBlock > lastBlock { - lastBlock = currentBlock - startLogger.Info().Msgf("Waiting For Keygen Block to arrive or new keygen block to be set. Keygen Block : %d", cfg.KeygenBlock) - } - continue + if cfg.KeygenBlock > currentBlock { + triedKeygenAtBlock = false } - // Try keygen only once at a particular block, irrespective of whether it is successful or failure - triedKeygenAtBlock = true - err = keygenTss(cfg, tss, masterLogger) - if err != nil { - startLogger.Error().Err(err).Msg("keygenTss error") - tssFailedVoteHash, err := zetaBridge.SetTSS("", cfg.KeygenBlock, common.ReceiveStatus_Failed) + if !triedKeygenAtBlock { + // If not at keygen block do not try to generate TSS + if currentBlock != cfg.KeygenBlock { + if currentBlock > lastBlock { + lastBlock = currentBlock + startLogger.Info().Msgf("Waiting For Keygen Block to arrive or new keygen block to be set. Keygen Block : %d Current Block : %d", cfg.KeygenBlock, currentBlock) + } + continue + } + // Try keygen only once at a particular block, irrespective of whether it is successful or failure + triedKeygenAtBlock = true + err = keygenTss(cfg, tss, masterLogger) + if err != nil { + startLogger.Error().Err(err).Msg("keygenTss error") + tssFailedVoteHash, err := zetaBridge.SetTSS("", cfg.KeygenBlock, common.ReceiveStatus_Failed) + if err != nil { + startLogger.Error().Err(err).Msg("Failed to broadcast Failed TSS Vote to zetacore") + return err + } + startLogger.Info().Msgf("TSS Failed Vote: %s", tssFailedVoteHash) + continue + } + + // If TSS is successful , broadcast the vote to zetacore and set Pubkey + tssSuccessVoteHash, err := zetaBridge.SetTSS(tss.CurrentPubkey, cfg.KeygenBlock, common.ReceiveStatus_Success) if err != nil { - startLogger.Error().Err(err).Msg("Failed to broadcast Failed TSS Vote to zetacore") + startLogger.Error().Err(err).Msg("TSS successful but unable to broadcast vote to zeta-core") return err } - startLogger.Info().Msgf("TSS Failed Vote: %s", tssFailedVoteHash) + startLogger.Info().Msgf("TSS successful Vote: %s", tssSuccessVoteHash) + err = SetTSSPubKey(tss, masterLogger) + if err != nil { + startLogger.Error().Err(err).Msg("SetTSSPubKey error") + } continue } - - // If TSS is successful , broadcast the vote to zetacore and set Pubkey - tssSuccessVoteHash, err := zetaBridge.SetTSS(tss.CurrentPubkey, cfg.KeygenBlock, common.ReceiveStatus_Success) - if err != nil { - startLogger.Error().Err(err).Msg("TSS successful but unable to broadcast vote to zeta-core") - return err - } - startLogger.Info().Msgf("TSS successful Vote: %s", tssSuccessVoteHash) - err = SetTSSPubKey(tss, masterLogger) - if err != nil { - startLogger.Error().Err(err).Msg("SetTSSPubKey error") - } - continue } } err = TestTSS(tss, masterLogger) diff --git a/zetaclient/query.go b/zetaclient/query.go index d5c03304e9..8155632eb9 100644 --- a/zetaclient/query.go +++ b/zetaclient/query.go @@ -2,13 +2,16 @@ package zetaclient import ( "context" + "fmt" "github.com/cosmos/cosmos-sdk/client/grpc/tmservice" "github.com/cosmos/cosmos-sdk/types/query" + "github.com/pkg/errors" tmtypes "github.com/tendermint/tendermint/proto/tendermint/types" "github.com/zeta-chain/zetacore/common" "github.com/zeta-chain/zetacore/x/crosschain/types" zetaObserverTypes "github.com/zeta-chain/zetacore/x/observer/types" "google.golang.org/grpc" + "time" ) func (b *ZetaCoreBridge) GetInboundPermissions() (types.PermissionFlags, error) { @@ -32,11 +35,16 @@ func (b *ZetaCoreBridge) GetCoreParamsForChainID(externalChainID int64) (*zetaOb func (b *ZetaCoreBridge) GetCoreParams() ([]*zetaObserverTypes.CoreParams, error) { client := zetaObserverTypes.NewQueryClient(b.grpcConn) - resp, err := client.GetCoreParams(context.Background(), &zetaObserverTypes.QueryGetCoreParamsRequest{}) - if err != nil { - return nil, err - } - return resp.CoreParams.CoreParams, nil + err := error(nil) + resp := &zetaObserverTypes.QueryGetCoreParamsResponse{} + for i := 0; i <= DefaultRetryCount; i++ { + resp, err = client.GetCoreParams(context.Background(), &zetaObserverTypes.QueryGetCoreParamsRequest{}) + if err == nil { + return resp.CoreParams.CoreParams, nil + } + time.Sleep(DefaultRetryInterval * time.Second) + } + return nil, errors.New(fmt.Sprintf("failed to get core params | err %s", err.Error())) } func (b *ZetaCoreBridge) GetObserverParams() (zetaObserverTypes.Params, error) { diff --git a/zetaclient/tx.go b/zetaclient/tx.go index 632ba1b6ed..805d67bd9c 100644 --- a/zetaclient/tx.go +++ b/zetaclient/tx.go @@ -23,6 +23,7 @@ const ( PostReceiveConfirmationGasLimit = 200_000 DefaultGasLimit = 200_000 DefaultRetryCount = 5 + DefaultRetryInterval = 5 ) func (b *ZetaCoreBridge) WrapMessageWithAuthz(msg sdk.Msg) (sdk.Msg, AuthZSigner) { @@ -120,8 +121,9 @@ func (b *ZetaCoreBridge) SetTSS(tssPubkey string, keyGenZetaHeight int64, status if err == nil { return zetaTxHash, nil } + time.Sleep(DefaultRetryInterval * time.Second) } - return "", errors.Wrap(err, "set tss failed") + return "", errors.New(fmt.Sprintf("set tss failed | err %s", err.Error())) } func (b *ZetaCoreBridge) ConfigUpdater(cfg *config.Config) { From 07499435f4024a5436c62e67167a6b3152d9bf20 Mon Sep 17 00:00:00 2001 From: Tanmay Date: Wed, 31 May 2023 15:46:47 -0400 Subject: [PATCH 2/7] return err only if not empty --- cmd/zetaclientd/keygen_tss.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cmd/zetaclientd/keygen_tss.go b/cmd/zetaclientd/keygen_tss.go index 2e4114d847..ffbd297808 100644 --- a/cmd/zetaclientd/keygen_tss.go +++ b/cmd/zetaclientd/keygen_tss.go @@ -16,9 +16,13 @@ func keygenTss(cfg *config.Config, tss *mc.TSS, logger zerolog.Logger) error { var req keygen.Request req = keygen.NewRequest(cfg.KeyGenPubKeys, cfg.KeygenBlock, "0.14.0") res, err := tss.Server.Keygen(req) - if err != nil || res.Status != tsscommon.Success || res.PubKey == "" { + if res.Status != tsscommon.Success || res.PubKey == "" { keygenLogger.Error().Msgf("keygen fail: reason %s blame nodes %s", res.Blame.FailReason, res.Blame.BlameNodes) - return errors.New(fmt.Sprintf("Keygen fail: reason %s blame nodes %s, ERR : %s ", res.Blame.FailReason, res.Blame.BlameNodes, err.Error())) + return errors.New(fmt.Sprintf("Keygen fail: reason %s blame nodes %s", res.Blame.FailReason, res.Blame.BlameNodes)) + } + if err != nil { + keygenLogger.Error().Msgf("keygen fail: reason %s ", err.Error()) + return err } tss.CurrentPubkey = res.PubKey From e1a0e4ac43a8cbe36de5711be5bd915d13f4ee92 Mon Sep 17 00:00:00 2001 From: Tanmay Date: Wed, 31 May 2023 17:17:41 -0400 Subject: [PATCH 3/7] add comments to explain keygen logic --- cmd/zetaclientd/keygen_tss.go | 3 +-- zetaclient/query.go | 3 +-- zetaclient/tx.go | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/cmd/zetaclientd/keygen_tss.go b/cmd/zetaclientd/keygen_tss.go index ffbd297808..b9ee0e8238 100644 --- a/cmd/zetaclientd/keygen_tss.go +++ b/cmd/zetaclientd/keygen_tss.go @@ -2,7 +2,6 @@ package main import ( "fmt" - "github.com/pkg/errors" "github.com/rs/zerolog" mc "github.com/zeta-chain/zetacore/zetaclient" "github.com/zeta-chain/zetacore/zetaclient/config" @@ -18,7 +17,7 @@ func keygenTss(cfg *config.Config, tss *mc.TSS, logger zerolog.Logger) error { res, err := tss.Server.Keygen(req) if res.Status != tsscommon.Success || res.PubKey == "" { keygenLogger.Error().Msgf("keygen fail: reason %s blame nodes %s", res.Blame.FailReason, res.Blame.BlameNodes) - return errors.New(fmt.Sprintf("Keygen fail: reason %s blame nodes %s", res.Blame.FailReason, res.Blame.BlameNodes)) + return fmt.Errorf("keygen fail: reason %s blame nodes %s", res.Blame.FailReason, res.Blame.BlameNodes) } if err != nil { keygenLogger.Error().Msgf("keygen fail: reason %s ", err.Error()) diff --git a/zetaclient/query.go b/zetaclient/query.go index 8155632eb9..f11a28c085 100644 --- a/zetaclient/query.go +++ b/zetaclient/query.go @@ -5,7 +5,6 @@ import ( "fmt" "github.com/cosmos/cosmos-sdk/client/grpc/tmservice" "github.com/cosmos/cosmos-sdk/types/query" - "github.com/pkg/errors" tmtypes "github.com/tendermint/tendermint/proto/tendermint/types" "github.com/zeta-chain/zetacore/common" "github.com/zeta-chain/zetacore/x/crosschain/types" @@ -44,7 +43,7 @@ func (b *ZetaCoreBridge) GetCoreParams() ([]*zetaObserverTypes.CoreParams, error } time.Sleep(DefaultRetryInterval * time.Second) } - return nil, errors.New(fmt.Sprintf("failed to get core params | err %s", err.Error())) + return nil, fmt.Errorf("failed to get core params | err %s", err.Error()) } func (b *ZetaCoreBridge) GetObserverParams() (zetaObserverTypes.Params, error) { diff --git a/zetaclient/tx.go b/zetaclient/tx.go index 805d67bd9c..4b0b4f74d9 100644 --- a/zetaclient/tx.go +++ b/zetaclient/tx.go @@ -5,7 +5,6 @@ import ( "fmt" sdk "github.com/cosmos/cosmos-sdk/types" "github.com/cosmos/cosmos-sdk/x/authz" - "github.com/pkg/errors" "github.com/zeta-chain/zetacore/zetaclient/config" "math/big" "time" @@ -123,7 +122,7 @@ func (b *ZetaCoreBridge) SetTSS(tssPubkey string, keyGenZetaHeight int64, status } time.Sleep(DefaultRetryInterval * time.Second) } - return "", errors.New(fmt.Sprintf("set tss failed | err %s", err.Error())) + return "", fmt.Errorf("set tss failed | err %s", err.Error()) } func (b *ZetaCoreBridge) ConfigUpdater(cfg *config.Config) { From abf321e919d54624072c2788d2130b78d67be0d4 Mon Sep 17 00:00:00 2001 From: Tanmay Date: Wed, 31 May 2023 17:32:36 -0400 Subject: [PATCH 4/7] add additional logs --- cmd/zetaclientd/start.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmd/zetaclientd/start.go b/cmd/zetaclientd/start.go index 8138b995e4..187e28ae34 100644 --- a/cmd/zetaclientd/start.go +++ b/cmd/zetaclientd/start.go @@ -148,6 +148,7 @@ func start(_ *cobra.Command, _ []string) error { startLogger.Error().Err(err).Msg("GetZetaBlockHeight RPC error") continue } + // Reset the flag if the keygen block has passed and a new keygen block has been set . This condition is only reached if the older keygen is stuck at PendingKeygen for some reason if cfg.KeygenBlock > currentBlock { triedKeygenAtBlock = false } @@ -188,6 +189,7 @@ func start(_ *cobra.Command, _ []string) error { continue } } + startLogger.Info().Msgf("Waiting for TSS to be generated or Current Keygen to be be finalized. Keygen Block : %d ", cfg.KeygenBlock) } err = TestTSS(tss, masterLogger) if err != nil { From d1926f1832f4598a56fcee55fd8ad5202dcc5f9f Mon Sep 17 00:00:00 2001 From: Tanmay Date: Wed, 31 May 2023 18:44:06 -0400 Subject: [PATCH 5/7] add check for empty LocalID --- cmd/zetaclientd/start.go | 3 ++- zetaclient/tss_signer.go | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/cmd/zetaclientd/start.go b/cmd/zetaclientd/start.go index 187e28ae34..064bfea59f 100644 --- a/cmd/zetaclientd/start.go +++ b/cmd/zetaclientd/start.go @@ -117,6 +117,7 @@ func start(_ *cobra.Command, _ []string) error { startLogger.Error().Err(err).Msg("NewTSS error") return err } + // If Keygen block is set it will try to generate new TSS at the block // This is a blocking thread and will wait until the ceremony is complete successfully // If the TSS generation is unsuccessful , it will loop indefinitely until a new TSS is generated @@ -189,7 +190,7 @@ func start(_ *cobra.Command, _ []string) error { continue } } - startLogger.Info().Msgf("Waiting for TSS to be generated or Current Keygen to be be finalized. Keygen Block : %d ", cfg.KeygenBlock) + startLogger.Debug().Msgf("Waiting for TSS to be generated or Current Keygen to be be finalized. Keygen Block : %d ", cfg.KeygenBlock) } err = TestTSS(tss, masterLogger) if err != nil { diff --git a/zetaclient/tss_signer.go b/zetaclient/tss_signer.go index b84b800d2b..be3da1af12 100644 --- a/zetaclient/tss_signer.go +++ b/zetaclient/tss_signer.go @@ -5,6 +5,7 @@ import ( "encoding/base64" "encoding/hex" "fmt" + peer2 "github.com/libp2p/go-libp2p/core/peer" "github.com/zeta-chain/zetacore/zetaclient/config" "gitlab.com/thorchain/tss/go-tss/p2p" "path" @@ -404,6 +405,13 @@ func SetupTSSServer(peer p2p.AddrList, privkey tmcrypto.PrivKey, preParams *keyg } log.Info().Msgf("LocalID: %v", tssServer.GetLocalPeerID()) + if tssServer.GetLocalPeerID() == "" || + tssServer.GetLocalPeerID() == "0" || + tssServer.GetLocalPeerID() == "000000000000000000000000000000" || + tssServer.GetLocalPeerID() == peer2.ID("").String() { + log.Error().Msg("tss server start error") + return nil, nil, fmt.Errorf("tss server start error") + } s := NewHTTPServer(tssServer.GetLocalPeerID()) go func() { From ba4ea871f601176d03cc05cb6896a4a44204aa78 Mon Sep 17 00:00:00 2001 From: Tanmay Date: Wed, 31 May 2023 19:15:39 -0400 Subject: [PATCH 6/7] Remove zetaclient exit on empty chains --- cmd/zetaclientd/start.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cmd/zetaclientd/start.go b/cmd/zetaclientd/start.go index 064bfea59f..3a538372cb 100644 --- a/cmd/zetaclientd/start.go +++ b/cmd/zetaclientd/start.go @@ -79,8 +79,7 @@ func start(_ *cobra.Command, _ []string) error { time.Sleep((time.Duration(cfg.ConfigUpdateTicker) + 1) * time.Second) startLogger.Info().Msgf("Config is updated from ZetaCore %s", cfg.String()) if len(cfg.ChainsEnabled) == 0 { - startLogger.Info().Msgf("No chains enabled, exiting") - return nil + startLogger.Error().Msgf("No chains enabled in updated config ", cfg.String()) } // Generate TSS address . The Tss address is generated through Keygen ceremony. The TSS key is used to sign all outbound transactions . @@ -88,7 +87,7 @@ func start(_ *cobra.Command, _ []string) error { // After generating the key , the address is set on the zetacore bridgePk, err := zetaBridge.GetKeys().GetPrivateKey() if err != nil { - startLogger.Error().Err(err).Msg("GetKeys GetPrivateKey error:") + startLogger.Error().Err(err).Msg("zetabridge getPrivateKey error") } startLogger.Debug().Msgf("bridgePk %s", bridgePk.String()) if len(bridgePk.Bytes()) != 32 { From 29db04d6d3b4db290da39985b5a4f8a51ebfae4f Mon Sep 17 00:00:00 2001 From: Tanmay Date: Wed, 31 May 2023 19:19:44 -0400 Subject: [PATCH 7/7] Remove zetaclient exit on empty chains --- cmd/zetaclientd/start.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/zetaclientd/start.go b/cmd/zetaclientd/start.go index 3a538372cb..e71c84ce2d 100644 --- a/cmd/zetaclientd/start.go +++ b/cmd/zetaclientd/start.go @@ -79,7 +79,7 @@ func start(_ *cobra.Command, _ []string) error { time.Sleep((time.Duration(cfg.ConfigUpdateTicker) + 1) * time.Second) startLogger.Info().Msgf("Config is updated from ZetaCore %s", cfg.String()) if len(cfg.ChainsEnabled) == 0 { - startLogger.Error().Msgf("No chains enabled in updated config ", cfg.String()) + startLogger.Error().Msgf("No chains enabled in updated config %s ", cfg.String()) } // Generate TSS address . The Tss address is generated through Keygen ceremony. The TSS key is used to sign all outbound transactions .