From df7ebad2e11ff5de76b58911def41e3fbbe18277 Mon Sep 17 00:00:00 2001 From: mdrakos Date: Fri, 1 Nov 2024 13:00:37 -0700 Subject: [PATCH 1/6] Handle pannics in the poller --- cmd/state-svc/internal/resolver/resolver.go | 33 ++++++++------------- internal/poller/poller.go | 11 ++++++- internal/runbits/panics/panics.go | 7 +++++ 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/cmd/state-svc/internal/resolver/resolver.go b/cmd/state-svc/internal/resolver/resolver.go index 7995751f88..512f5e921d 100644 --- a/cmd/state-svc/internal/resolver/resolver.go +++ b/cmd/state-svc/internal/resolver/resolver.go @@ -24,9 +24,9 @@ import ( "github.com/ActiveState/cli/internal/graph" "github.com/ActiveState/cli/internal/logging" configMediator "github.com/ActiveState/cli/internal/mediators/config" - "github.com/ActiveState/cli/internal/multilog" "github.com/ActiveState/cli/internal/poller" "github.com/ActiveState/cli/internal/rtutils/ptr" + "github.com/ActiveState/cli/internal/runbits/panics" "github.com/ActiveState/cli/internal/updater" "github.com/ActiveState/cli/pkg/platform/authentication" "github.com/ActiveState/cli/pkg/projectfile" @@ -50,6 +50,7 @@ type Resolver struct { // var _ genserver.ResolverRoot = &Resolver{} // Must implement ResolverRoot func New(cfg *config.Instance, an *sync.Client, auth *authentication.Auth) (*Resolver, error) { + defer func() { panics.LogAndPanic(recover(), debug.Stack()) }() msg, err := messages.New(cfg, auth) if err != nil { return nil, errs.Wrap(err, "Could not initialize messages") @@ -110,7 +111,7 @@ func (r *Resolver) Query() genserver.QueryResolver { return r } func (r *Resolver) Mutation() genserver.MutationResolver { return r } func (r *Resolver) Version(ctx context.Context) (*graph.Version, error) { - defer func() { handlePanics(recover(), debug.Stack()) }() + defer func() { panics.LogAndPanic(recover(), debug.Stack()) }() r.an.EventWithLabel(anaConsts.CatStateSvc, "endpoint", "Version") logging.Debug("Version resolver") @@ -126,7 +127,7 @@ func (r *Resolver) Version(ctx context.Context) (*graph.Version, error) { } func (r *Resolver) AvailableUpdate(ctx context.Context, desiredChannel, desiredVersion string) (*graph.AvailableUpdate, error) { - defer func() { handlePanics(recover(), debug.Stack()) }() + defer func() { panics.LogAndPanic(recover(), debug.Stack()) }() if desiredChannel == "" { desiredChannel = constants.ChannelName @@ -174,7 +175,7 @@ func (r *Resolver) AvailableUpdate(ctx context.Context, desiredChannel, desiredV } func (r *Resolver) Projects(ctx context.Context) ([]*graph.Project, error) { - defer func() { handlePanics(recover(), debug.Stack()) }() + defer func() { panics.LogAndPanic(recover(), debug.Stack()) }() r.an.EventWithLabel(anaConsts.CatStateSvc, "endpoint", "Projects") logging.Debug("Projects resolver") @@ -194,7 +195,7 @@ func (r *Resolver) Projects(ctx context.Context) ([]*graph.Project, error) { } func (r *Resolver) AnalyticsEvent(_ context.Context, category, action, source string, _label *string, dimensionsJson string) (*graph.AnalyticsEventResponse, error) { - defer func() { handlePanics(recover(), debug.Stack()) }() + defer func() { panics.LogAndPanic(recover(), debug.Stack()) }() logging.Debug("Analytics event resolver: %s - %s: %s (%s)", category, action, ptr.From(_label, "NIL"), source) @@ -228,7 +229,7 @@ func (r *Resolver) AnalyticsEvent(_ context.Context, category, action, source st } func (r *Resolver) ReportRuntimeUsage(_ context.Context, pid int, exec, source string, dimensionsJSON string) (*graph.ReportRuntimeUsageResponse, error) { - defer func() { handlePanics(recover(), debug.Stack()) }() + defer func() { panics.LogAndPanic(recover(), debug.Stack()) }() logging.Debug("Runtime usage resolver: %d - %s", pid, exec) var dims *dimensions.Values @@ -242,26 +243,26 @@ func (r *Resolver) ReportRuntimeUsage(_ context.Context, pid int, exec, source s } func (r *Resolver) CheckMessages(ctx context.Context, command string, flags []string) ([]*graph.MessageInfo, error) { - defer func() { handlePanics(recover(), debug.Stack()) }() + defer func() { panics.LogAndPanic(recover(), debug.Stack()) }() logging.Debug("Check messages resolver") return r.messages.Check(command, flags) } func (r *Resolver) ConfigChanged(ctx context.Context, key string) (*graph.ConfigChangedResponse, error) { - defer func() { handlePanics(recover(), debug.Stack()) }() + defer func() { panics.LogAndPanic(recover(), debug.Stack()) }() go configMediator.NotifyListeners(key) return &graph.ConfigChangedResponse{Received: true}, nil } func (r *Resolver) FetchLogTail(ctx context.Context) (string, error) { - defer func() { handlePanics(recover(), debug.Stack()) }() + defer func() { panics.LogAndPanic(recover(), debug.Stack()) }() return logging.ReadTail(), nil } func (r *Resolver) GetProcessesInUse(ctx context.Context, execDir string) ([]*graph.ProcessInfo, error) { - defer func() { handlePanics(recover(), debug.Stack()) }() + defer func() { panics.LogAndPanic(recover(), debug.Stack()) }() inUse := r.rtwatch.GetProcessesInUse(execDir) processes := make([]*graph.ProcessInfo, 0, len(inUse)) @@ -272,7 +273,7 @@ func (r *Resolver) GetProcessesInUse(ctx context.Context, execDir string) ([]*gr } func (r *Resolver) GetJwt(ctx context.Context) (*graph.Jwt, error) { - defer func() { handlePanics(recover(), debug.Stack()) }() + defer func() { panics.LogAndPanic(recover(), debug.Stack()) }() if err := r.auth.MaybeRenew(); err != nil { return nil, errs.Wrap(err, "Could not renew auth token") @@ -308,7 +309,7 @@ func (r *Resolver) GetJwt(ctx context.Context) (*graph.Jwt, error) { } func (r *Resolver) HashGlobs(ctx context.Context, wd string, globs []string) (*graph.GlobResult, error) { - defer func() { handlePanics(recover(), debug.Stack()) }() + defer func() { panics.LogAndPanic(recover(), debug.Stack()) }() hash, files, err := r.fileHasher.HashFiles(wd, globs) if err != nil { @@ -341,11 +342,3 @@ func (r *Resolver) SetCache(ctx context.Context, key string, value string, expir r.globalCache.Set(key, value, time.Duration(expiry)*time.Second) return &graphqltypes.Void{}, nil } - -func handlePanics(recovered interface{}, stack []byte) { - if recovered != nil { - multilog.Error("Panic: %v", recovered) - logging.Debug("Stack: %s", string(stack)) - panic(recovered) // We're only logging the panic, not interrupting it - } -} diff --git a/internal/poller/poller.go b/internal/poller/poller.go index 56de212f85..563caa9a0b 100644 --- a/internal/poller/poller.go +++ b/internal/poller/poller.go @@ -1,6 +1,7 @@ package poller import ( + "runtime/debug" "sync" "time" @@ -8,6 +9,7 @@ import ( "github.com/ActiveState/cli/internal/logging" "github.com/ActiveState/cli/internal/multilog" "github.com/ActiveState/cli/internal/runbits/errors" + "github.com/ActiveState/cli/internal/runbits/panics" ) type Poller struct { @@ -19,8 +21,15 @@ type Poller struct { } func New(interval time.Duration, pollFunc func() (interface{}, error)) *Poller { + wrappedFn := func() (interface{}, error) { + defer func() { + panics.LogAndPanic(recover(), debug.Stack()) + }() + return pollFunc() + } + p := &Poller{ - pollFunc: pollFunc, + pollFunc: wrappedFn, done: make(chan struct{}), } go p.start(interval) diff --git a/internal/runbits/panics/panics.go b/internal/runbits/panics/panics.go index 7dd3f3fc5f..ed710415dc 100644 --- a/internal/runbits/panics/panics.go +++ b/internal/runbits/panics/panics.go @@ -34,3 +34,10 @@ func LogPanics(recovered interface{}, stack []byte) bool { } return false } + +// LogAndPanic produces actionable output for panic events (that shouldn't happen) and panics +func LogAndPanic(recovered interface{}, stack []byte) { + multilog.Error("Panic: %v", recovered) + logging.Debug("Stack: %s", string(stack)) + panic(recovered) +} From 675c46e0e710ee2216fe20b1f6743e32c4fecca6 Mon Sep 17 00:00:00 2001 From: mdrakos Date: Fri, 1 Nov 2024 13:00:55 -0700 Subject: [PATCH 2/6] Handle potential panics in messages --- cmd/state-svc/internal/messages/messages.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/cmd/state-svc/internal/messages/messages.go b/cmd/state-svc/internal/messages/messages.go index 88cdf78f9e..a377b5d911 100644 --- a/cmd/state-svc/internal/messages/messages.go +++ b/cmd/state-svc/internal/messages/messages.go @@ -74,7 +74,11 @@ func (m *Messages) Check(command string, flags []string) ([]*graph.MessageInfo, if cacheValue == nil { return []*graph.MessageInfo{}, nil } - allMessages := cacheValue.([]*graph.MessageInfo) + + allMessages, ok := cacheValue.([]*graph.MessageInfo) + if !ok { + return nil, errs.New("Could not get messages from cache") + } conditionParams := *m.baseParams // copy conditionParams.UserEmail = m.auth.Email() @@ -110,7 +114,11 @@ func check(params *ConditionParams, messages []*graph.MessageInfo, lastReportMap logging.Debug("Checking message %s", message.ID) // Ensure we don't show the same message too often if lastReport, ok := lastReportMap[message.ID]; ok { - lastReportTime, err := time.Parse(time.RFC3339, lastReport.(string)) + lr, ok := lastReport.(string) + if !ok { + return nil, errs.New("Could not get last reported time for message %s as it's not a string", message.ID) + } + lastReportTime, err := time.Parse(time.RFC3339, lr) if err != nil { return nil, errs.New("Could not parse last reported time for message %s as it's not a valid RFC3339 value: %v", message.ID, lastReport) } From 6c49aed4209689bc7e350536a3db45d18997175a Mon Sep 17 00:00:00 2001 From: mdrakos Date: Fri, 1 Nov 2024 13:22:52 -0700 Subject: [PATCH 3/6] Remove panic check on New resolver --- cmd/state-svc/internal/resolver/resolver.go | 1 - 1 file changed, 1 deletion(-) diff --git a/cmd/state-svc/internal/resolver/resolver.go b/cmd/state-svc/internal/resolver/resolver.go index 512f5e921d..ca591fe87d 100644 --- a/cmd/state-svc/internal/resolver/resolver.go +++ b/cmd/state-svc/internal/resolver/resolver.go @@ -50,7 +50,6 @@ type Resolver struct { // var _ genserver.ResolverRoot = &Resolver{} // Must implement ResolverRoot func New(cfg *config.Instance, an *sync.Client, auth *authentication.Auth) (*Resolver, error) { - defer func() { panics.LogAndPanic(recover(), debug.Stack()) }() msg, err := messages.New(cfg, auth) if err != nil { return nil, errs.Wrap(err, "Could not initialize messages") From 5e42cff49f5ef762d506aea5bb6443a744ce1984 Mon Sep 17 00:00:00 2001 From: mdrakos Date: Fri, 1 Nov 2024 13:23:52 -0700 Subject: [PATCH 4/6] Nil check recovered --- internal/runbits/panics/panics.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/internal/runbits/panics/panics.go b/internal/runbits/panics/panics.go index ed710415dc..d743f7a5e8 100644 --- a/internal/runbits/panics/panics.go +++ b/internal/runbits/panics/panics.go @@ -37,7 +37,9 @@ func LogPanics(recovered interface{}, stack []byte) bool { // LogAndPanic produces actionable output for panic events (that shouldn't happen) and panics func LogAndPanic(recovered interface{}, stack []byte) { - multilog.Error("Panic: %v", recovered) - logging.Debug("Stack: %s", string(stack)) - panic(recovered) + if recovered != nil { + multilog.Error("Panic: %v", recovered) + logging.Debug("Stack: %s", string(stack)) + panic(recovered) // We're only logging the panic, not interrupting it + } } From bbfb2faf87c5d20fb31a04fd0e4e7c6ab7979a41 Mon Sep 17 00:00:00 2001 From: Mike Drakos Date: Tue, 5 Nov 2024 13:54:43 -0800 Subject: [PATCH 5/6] Apply suggestions from code review Co-authored-by: Nathan Rijksen --- cmd/state-svc/internal/messages/messages.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/state-svc/internal/messages/messages.go b/cmd/state-svc/internal/messages/messages.go index a377b5d911..cecd8a9471 100644 --- a/cmd/state-svc/internal/messages/messages.go +++ b/cmd/state-svc/internal/messages/messages.go @@ -77,7 +77,7 @@ func (m *Messages) Check(command string, flags []string) ([]*graph.MessageInfo, allMessages, ok := cacheValue.([]*graph.MessageInfo) if !ok { - return nil, errs.New("Could not get messages from cache") + return nil, errs.New("cacheValue has unexpected type: %T", cacheValue) } conditionParams := *m.baseParams // copy @@ -116,7 +116,7 @@ func check(params *ConditionParams, messages []*graph.MessageInfo, lastReportMap if lastReport, ok := lastReportMap[message.ID]; ok { lr, ok := lastReport.(string) if !ok { - return nil, errs.New("Could not get last reported time for message %s as it's not a string", message.ID) + return nil, errs.New("Could not get last reported time for message %s as it's not a string: %T", message.ID, lastReport) } lastReportTime, err := time.Parse(time.RFC3339, lr) if err != nil { From fe8713a087070c2cd54dcc34ea9dea2ac912374e Mon Sep 17 00:00:00 2001 From: mdrakos Date: Tue, 5 Nov 2024 14:02:05 -0800 Subject: [PATCH 6/6] Move panic handling --- cmd/state-svc/internal/messages/messages.go | 5 +++++ cmd/state-svc/internal/resolver/resolver.go | 6 ++++++ internal/poller/poller.go | 11 +---------- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/cmd/state-svc/internal/messages/messages.go b/cmd/state-svc/internal/messages/messages.go index cecd8a9471..1495a8a1b2 100644 --- a/cmd/state-svc/internal/messages/messages.go +++ b/cmd/state-svc/internal/messages/messages.go @@ -4,6 +4,7 @@ import ( "encoding/json" "fmt" "os" + "runtime/debug" "sync" "time" @@ -15,6 +16,7 @@ import ( "github.com/ActiveState/cli/internal/httputil" "github.com/ActiveState/cli/internal/logging" "github.com/ActiveState/cli/internal/poller" + "github.com/ActiveState/cli/internal/runbits/panics" "github.com/ActiveState/cli/internal/strutils" auth "github.com/ActiveState/cli/pkg/platform/authentication" "github.com/ActiveState/cli/pkg/sysinfo" @@ -43,6 +45,9 @@ func New(cfg *config.Instance, auth *auth.Auth) (*Messages, error) { } poll := poller.New(1*time.Hour, func() (interface{}, error) { + defer func() { + panics.LogAndPanic(recover(), debug.Stack()) + }() resp, err := fetch() return resp, err }) diff --git a/cmd/state-svc/internal/resolver/resolver.go b/cmd/state-svc/internal/resolver/resolver.go index ca591fe87d..ba91441e7f 100644 --- a/cmd/state-svc/internal/resolver/resolver.go +++ b/cmd/state-svc/internal/resolver/resolver.go @@ -57,6 +57,9 @@ func New(cfg *config.Instance, an *sync.Client, auth *authentication.Auth) (*Res upchecker := updater.NewDefaultChecker(cfg, an) pollUpdate := poller.New(1*time.Hour, func() (interface{}, error) { + defer func() { + panics.LogAndPanic(recover(), debug.Stack()) + }() logging.Debug("Poller checking for update info") return upchecker.CheckFor(constants.ChannelName, "") }) @@ -71,6 +74,9 @@ func New(cfg *config.Instance, an *sync.Client, auth *authentication.Auth) (*Res } pollAuth := poller.New(time.Duration(int64(time.Millisecond)*pollRate), func() (interface{}, error) { + defer func() { + panics.LogAndPanic(recover(), debug.Stack()) + }() if auth.SyncRequired() { return nil, auth.Sync() } diff --git a/internal/poller/poller.go b/internal/poller/poller.go index 563caa9a0b..56de212f85 100644 --- a/internal/poller/poller.go +++ b/internal/poller/poller.go @@ -1,7 +1,6 @@ package poller import ( - "runtime/debug" "sync" "time" @@ -9,7 +8,6 @@ import ( "github.com/ActiveState/cli/internal/logging" "github.com/ActiveState/cli/internal/multilog" "github.com/ActiveState/cli/internal/runbits/errors" - "github.com/ActiveState/cli/internal/runbits/panics" ) type Poller struct { @@ -21,15 +19,8 @@ type Poller struct { } func New(interval time.Duration, pollFunc func() (interface{}, error)) *Poller { - wrappedFn := func() (interface{}, error) { - defer func() { - panics.LogAndPanic(recover(), debug.Stack()) - }() - return pollFunc() - } - p := &Poller{ - pollFunc: wrappedFn, + pollFunc: pollFunc, done: make(chan struct{}), } go p.start(interval)