From 59d8130e4af8d48dc10ef3053aa51384707f7d77 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Thu, 16 Jan 2020 11:11:02 +0000 Subject: [PATCH 1/4] common: Deprecate the debug sub-package in favor of the runtime --- go/common/crash/crash.go | 3 +-- go/common/debug/debug.go | 10 ---------- go/common/debug/debug_amd64.s | 7 ------- 3 files changed, 1 insertion(+), 19 deletions(-) delete mode 100644 go/common/debug/debug.go delete mode 100644 go/common/debug/debug_amd64.s diff --git a/go/common/crash/crash.go b/go/common/crash/crash.go index f953767624c..4f4aac654e8 100644 --- a/go/common/crash/crash.go +++ b/go/common/crash/crash.go @@ -11,7 +11,6 @@ import ( flag "github.com/spf13/pflag" "github.com/spf13/viper" - "github.com/oasislabs/oasis-core/go/common/debug" "github.com/oasislabs/oasis-core/go/common/logging" "github.com/oasislabs/oasis-core/go/common/random" cmdFlags "github.com/oasislabs/oasis-core/go/oasis-node/cmd/common/flags" @@ -55,7 +54,7 @@ func newDefaultRandomProvider() RandomProvider { } func defaultCrashMethod() { - debug.Trap() + runtime.Breakpoint() } var crashGlobal *Crasher diff --git a/go/common/debug/debug.go b/go/common/debug/debug.go deleted file mode 100644 index be45e2b463e..00000000000 --- a/go/common/debug/debug.go +++ /dev/null @@ -1,10 +0,0 @@ -// Package debug implements various debugging utilities. -package debug - -//go:noescape -func debugTrap() - -// Trap crashes the current process with `SIGTRAP`. -func Trap() { - debugTrap() -} diff --git a/go/common/debug/debug_amd64.s b/go/common/debug/debug_amd64.s deleted file mode 100644 index 461be4a8949..00000000000 --- a/go/common/debug/debug_amd64.s +++ /dev/null @@ -1,7 +0,0 @@ -// +build !noasm - -#include "textflag.h" - -TEXT ·debugTrap(SB), NOSPLIT|NOFRAME, $0-0 - BYTE $0xcc // INT 3 - RET From 56ea5e8651fbf1d5505686c3b0b3c56c802acad9 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Fri, 17 Jan 2020 10:37:05 +0000 Subject: [PATCH 2/4] go: Update the backoff module to the latest version --- go/consensus/api/submission.go | 2 +- go/go.mod | 2 +- go/go.sum | 4 ++-- go/keymanager/client/client.go | 2 +- go/runtime/client/client.go | 2 +- go/runtime/tagindexer/tagindexer.go | 2 +- go/storage/client/client.go | 2 +- go/worker/common/p2p/p2p.go | 2 +- go/worker/registration/worker.go | 2 +- 9 files changed, 10 insertions(+), 10 deletions(-) diff --git a/go/consensus/api/submission.go b/go/consensus/api/submission.go index 4cfb9205432..deccc021581 100644 --- a/go/consensus/api/submission.go +++ b/go/consensus/api/submission.go @@ -4,7 +4,7 @@ import ( "context" "time" - "github.com/cenkalti/backoff" + "github.com/cenkalti/backoff/v4" "github.com/oasislabs/oasis-core/go/common/crypto/signature" "github.com/oasislabs/oasis-core/go/common/errors" diff --git a/go/go.mod b/go/go.mod index 0d92de2fe9f..60c2dc21486 100644 --- a/go/go.mod +++ b/go/go.mod @@ -14,7 +14,7 @@ require ( github.com/blevesearch/go-porterstemmer v1.0.2 // indirect github.com/blevesearch/segment v0.0.0-20160915185041-762005e7a34f // indirect github.com/btcsuite/btcd v0.0.0-20190614013741-962a206e94e9 // indirect - github.com/cenkalti/backoff v2.1.1+incompatible + github.com/cenkalti/backoff/v4 v4.0.0 github.com/codahale/hdrhistogram v0.0.0-20161010025455-3a0bb77429bd // indirect github.com/couchbase/vellum v0.0.0-20190610201045-ec7b775d247f // indirect github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d // indirect diff --git a/go/go.sum b/go/go.sum index 4a2775ecd69..918711d773e 100644 --- a/go/go.sum +++ b/go/go.sum @@ -43,8 +43,8 @@ github.com/btcsuite/goleveldb v0.0.0-20160330041536-7834afc9e8cd/go.mod h1:F+uVa github.com/btcsuite/snappy-go v0.0.0-20151229074030-0bdef8d06723/go.mod h1:8woku9dyThutzjeg+3xrA5iCpBRH8XEEg3lh6TiUghc= github.com/btcsuite/websocket v0.0.0-20150119174127-31079b680792/go.mod h1:ghJtEyQwv5/p4Mg4C0fgbePVuGr935/5ddU9Z3TmDRY= github.com/btcsuite/winsvc v1.0.0/go.mod h1:jsenWakMcC0zFBFurPLEAyrnc/teJEM1O46fmI40EZs= -github.com/cenkalti/backoff v2.1.1+incompatible h1:tKJnvO2kl0zmb/jA5UKAt4VoEVw1qxKWjE/Bpp46npY= -github.com/cenkalti/backoff v2.1.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= +github.com/cenkalti/backoff/v4 v4.0.0 h1:6VeaLF9aI+MAUQ95106HwWzYZgJJpZ4stumjj6RFYAU= +github.com/cenkalti/backoff/v4 v4.0.0/go.mod h1:eEew/i+1Q6OrCDZh3WiXYv3+nJwBASZ8Bog/87DQnVg= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= diff --git a/go/keymanager/client/client.go b/go/keymanager/client/client.go index 0bf576dff78..79f7160c013 100644 --- a/go/keymanager/client/client.go +++ b/go/keymanager/client/client.go @@ -9,7 +9,7 @@ import ( "sync" "time" - "github.com/cenkalti/backoff" + "github.com/cenkalti/backoff/v4" "github.com/pkg/errors" "google.golang.org/grpc" "google.golang.org/grpc/balancer/roundrobin" diff --git a/go/runtime/client/client.go b/go/runtime/client/client.go index 1abe3c972c9..e56f0e9744f 100644 --- a/go/runtime/client/client.go +++ b/go/runtime/client/client.go @@ -7,7 +7,7 @@ import ( "sync" "time" - "github.com/cenkalti/backoff" + "github.com/cenkalti/backoff/v4" "github.com/pkg/errors" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" diff --git a/go/runtime/tagindexer/tagindexer.go b/go/runtime/tagindexer/tagindexer.go index 7f66fda8c94..1153b4a95d7 100644 --- a/go/runtime/tagindexer/tagindexer.go +++ b/go/runtime/tagindexer/tagindexer.go @@ -5,7 +5,7 @@ import ( "context" "time" - "github.com/cenkalti/backoff" + "github.com/cenkalti/backoff/v4" "github.com/oasislabs/oasis-core/go/common" "github.com/oasislabs/oasis-core/go/common/logging" diff --git a/go/storage/client/client.go b/go/storage/client/client.go index 6b34329ee82..841015260e6 100644 --- a/go/storage/client/client.go +++ b/go/storage/client/client.go @@ -8,7 +8,7 @@ import ( "math/rand" "time" - "github.com/cenkalti/backoff" + "github.com/cenkalti/backoff/v4" "github.com/pkg/errors" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" diff --git a/go/worker/common/p2p/p2p.go b/go/worker/common/p2p/p2p.go index 0eb50115b82..17146e97c09 100644 --- a/go/worker/common/p2p/p2p.go +++ b/go/worker/common/p2p/p2p.go @@ -8,7 +8,7 @@ import ( "sync" "time" - "github.com/cenkalti/backoff" + "github.com/cenkalti/backoff/v4" "github.com/libp2p/go-libp2p" "github.com/libp2p/go-libp2p-core" "github.com/libp2p/go-libp2p-core/helpers" diff --git a/go/worker/registration/worker.go b/go/worker/registration/worker.go index 15fa86e821c..b4bbfea16f2 100644 --- a/go/worker/registration/worker.go +++ b/go/worker/registration/worker.go @@ -7,7 +7,7 @@ import ( "sync" "sync/atomic" - "github.com/cenkalti/backoff" + "github.com/cenkalti/backoff/v4" "github.com/pkg/errors" flag "github.com/spf13/pflag" "github.com/spf13/viper" From fb8cf19a3fa2e7b1d6f1a1657a2aeee6a8a1287f Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Tue, 21 Jan 2020 11:37:58 +0000 Subject: [PATCH 3/4] go: Bump the tendermint version to v0.32.8-oasis2 --- .changelog/2569.bugfix.1.md | 10 ++++++++++ go/go.mod | 2 +- go/go.sum | 4 ++++ 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 .changelog/2569.bugfix.1.md diff --git a/.changelog/2569.bugfix.1.md b/.changelog/2569.bugfix.1.md new file mode 100644 index 00000000000..24024468bfc --- /dev/null +++ b/.changelog/2569.bugfix.1.md @@ -0,0 +1,10 @@ +Use a newer version of the oasis-core tendermint fork + +The updated fork has additional changes to tendermint to hopefully +prevent the node from crashing if the file descriptors available to the +process get exhausted due to hitting the rlimit. + +While no forward progress can be made while the node is re-opening the +WAL, the node will now flush incoming connections that are in the process +of handshaking, and retry re-opening the WAL instead of crashing with +a panic. diff --git a/go/go.mod b/go/go.mod index 60c2dc21486..fce95d3f0a7 100644 --- a/go/go.mod +++ b/go/go.mod @@ -2,7 +2,7 @@ module github.com/oasislabs/oasis-core/go replace ( github.com/tendermint/iavl => github.com/oasislabs/iavl v0.12.0-ekiden3 - github.com/tendermint/tendermint => github.com/oasislabs/tendermint v0.32.8-oasis1 + github.com/tendermint/tendermint => github.com/oasislabs/tendermint v0.32.8-oasis2 golang.org/x/crypto/curve25519 => github.com/oasislabs/ed25519/extra/x25519 v0.0.0-20191022155220-a426dcc8ad5f golang.org/x/crypto/ed25519 => github.com/oasislabs/ed25519 v0.0.0-20191109133925-b197a691e30d ) diff --git a/go/go.sum b/go/go.sum index 918711d773e..6225cc9e530 100644 --- a/go/go.sum +++ b/go/go.sum @@ -352,8 +352,12 @@ github.com/oasislabs/ed25519 v0.0.0-20191122104632-9d9ffc15f526 h1:xKlK+m6tNFucK github.com/oasislabs/ed25519 v0.0.0-20191122104632-9d9ffc15f526/go.mod h1:xIpCyrK2ouGA4QBGbiNbkoONrvJ00u9P3QOkXSOAC0c= github.com/oasislabs/iavl v0.12.0-ekiden3 h1:8544fXJb57urhAEpTlIwDBdTJukgpPS/FCS/yj14I8E= github.com/oasislabs/iavl v0.12.0-ekiden3/go.mod h1:B/tMpl5cg7n42n3xYQTCckJzQezoI75jedkc8FOiOF0= +github.com/oasislabs/safeopen v0.0.0-20200117113835-6aa648f43ff8 h1:KC7dcrx0WEeyAWGAG+vdJjmIW36PUfw1x/LUnHjLm2E= +github.com/oasislabs/safeopen v0.0.0-20200117113835-6aa648f43ff8/go.mod h1:ABsG2IHM7bpTRIH3EvQ8CZQEBkzuhLxXFxaYApYMB9Y= github.com/oasislabs/tendermint v0.32.8-oasis1 h1:y+RZsI7D6jFqV/OufXA0meDOgn6eJLsXa5a0iyRLIRU= github.com/oasislabs/tendermint v0.32.8-oasis1/go.mod h1:SJjyKg9RLf9FUvqo9sJSpcT/COwormN9U05vR70It/A= +github.com/oasislabs/tendermint v0.32.8-oasis2 h1:PSEUtAp8Rfe/0T7endF6Iqg4p9+pPkSDSx0E83bD2LM= +github.com/oasislabs/tendermint v0.32.8-oasis2/go.mod h1:uxexUd6P+G+Zg1yACNBycfcaV1dPI985r79I+IXP38w= github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= From c0aab0e42641822b6a3fc8c95398aadfd67a09df Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Tue, 21 Jan 2020 11:53:39 +0000 Subject: [PATCH 4/4] go/oasis-node: Warn if the rlimit appears to be low 1024 file descriptors is low, and probably insufficient for an actual node. --- go/oasis-node/cmd/common/common.go | 42 +++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/go/oasis-node/cmd/common/common.go b/go/oasis-node/cmd/common/common.go index 989fb3ad1b4..67f2276c825 100644 --- a/go/oasis-node/cmd/common/common.go +++ b/go/oasis-node/cmd/common/common.go @@ -6,6 +6,7 @@ import ( "io" "os" "path/filepath" + "syscall" "github.com/spf13/cobra" flag "github.com/spf13/pflag" @@ -27,8 +28,13 @@ const ( // keys. CfgDebugAllowTestKeys = "debug.allow_test_keys" + // CfgDebugRlimit is the command flag to set RLIMIT_NOFILE on launch. + CfgDebugRlimit = "debug.rlimit" + cfgConfigFile = "config" CfgDataDir = "datadir" + + badDefaultRlimit = 1024 ) var ( @@ -37,6 +43,7 @@ var ( rootLog = logging.GetLogger("oasis-node") debugAllowTestKeysFlag = flag.NewFlagSet("", flag.ContinueOnError) + debugRlimitFlag = flag.NewFlagSet("", flag.ContinueOnError) // RootFlags has the flags that are common across all commands. RootFlags = flag.NewFlagSet("", flag.ContinueOnError) @@ -90,6 +97,7 @@ func Init() error { initDataDir, initLogging, initPublicKeyBlacklist, + initRlimit, } for _, fn := range initFns { @@ -111,16 +119,21 @@ func Logger() *logging.Logger { func init() { initLoggingFlags() - debugAllowTestKeysFlag.Bool(CfgDebugAllowTestKeys, false, "Allow test keys (UNSAFE)") + debugAllowTestKeysFlag.Bool(CfgDebugAllowTestKeys, false, "allow test keys (UNSAFE)") _ = debugAllowTestKeysFlag.MarkHidden(CfgDebugAllowTestKeys) _ = viper.BindPFlags(debugAllowTestKeysFlag) + debugRlimitFlag.Uint64(CfgDebugRlimit, 0, "set RLIMIT_NOFILE") + _ = debugRlimitFlag.MarkHidden(CfgDebugRlimit) + _ = viper.BindPFlags(debugRlimitFlag) + RootFlags.StringVar(&cfgFile, cfgConfigFile, "", "config file") RootFlags.String(CfgDataDir, "", "data directory") _ = viper.BindPFlags(RootFlags) RootFlags.AddFlagSet(loggingFlags) RootFlags.AddFlagSet(debugAllowTestKeysFlag) + RootFlags.AddFlagSet(debugRlimitFlag) RootFlags.AddFlagSet(flags.DebugDontBlameOasisFlag) } @@ -184,6 +197,33 @@ func initPublicKeyBlacklist() error { return nil } +func initRlimit() error { + var rlim syscall.Rlimit + if err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rlim); err != nil { + // Log, but don't return the error, this is only used for testing + // and to warn the user if it's set too low. + rootLog.Warn("failed to query RLIMIT_NOFILE", + "err", err, + ) + return nil + } + + if rlim.Cur <= badDefaultRlimit { + rootLog.Warn("the node user has a very low RLIMIT_NOFILE, consider increasing", + "cur", rlim.Cur, + "max", rlim.Max, + ) + } + + desiredLimit := viper.GetUint64(CfgDebugRlimit) + if !flags.DebugDontBlameOasis() || desiredLimit == 0 { + return nil + } + + rlim.Cur = desiredLimit + return syscall.Setrlimit(syscall.RLIMIT_NOFILE, &rlim) +} + // GetOutputWriter will create a file if the config string is set, // and otherwise return os.Stdout. func GetOutputWriter(cmd *cobra.Command, cfg string) (io.WriteCloser, bool, error) {