Skip to content

Commit

Permalink
Stop logging Pod's without Node's yet as an Error
Browse files Browse the repository at this point in the history
A common debugging red herring I see is people seeing an error that
reads something akin to:

```
"message":"error retrieving node  for Pod game-servercbtld: node \"\"
not found","severity":"error"
```

And (not unsurprisingly, it's an `error`) wondering if it causing
whatever issue they are having with Agones.

Except these errors are totally expected, and normal, as Pods often
aren't populated with the Node they live on straight away, so I wanted
to have a way to be able to return an error from a workerqueue Handler
function to retry, but not have it be logged at an Error level.

So that's what this does - provide a wrapper Error type that only gets
logged at the Debug level, and the corresponding changes to make that
error message above be logged at the Debug level and not cause so much
noise.
  • Loading branch information
markmandel committed Mar 8, 2021
1 parent 25def1b commit 0640590
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 1 deletion.
6 changes: 6 additions & 0 deletions pkg/gameservers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,9 @@ func (c *Controller) syncGameServerStartingState(ctx context.Context, gs *agones
if err != nil {
return nil, err
}
if pod.Spec.NodeName == "" {
return gs, workerqueue.NewDebugError(errors.Errorf("node not yet populated for Pod %s", pod.ObjectMeta.Name))
}
node, err := c.nodeLister.Get(pod.Spec.NodeName)
if err != nil {
return gs, errors.Wrapf(err, "error retrieving node %s for Pod %s", pod.Spec.NodeName, pod.ObjectMeta.Name)
Expand Down Expand Up @@ -803,6 +806,9 @@ func (c *Controller) syncGameServerRequestReadyState(ctx context.Context, gs *ag
addressPopulated := false
if gs.Status.NodeName == "" {
addressPopulated = true
if pod.Spec.NodeName == "" {
return gs, workerqueue.NewDebugError(errors.Errorf("node not yet populated for Pod %s", pod.ObjectMeta.Name))
}
node, err := c.nodeLister.Get(pod.Spec.NodeName)
if err != nil {
return gs, errors.Wrapf(err, "error retrieving node %s for Pod %s", pod.Spec.NodeName, pod.ObjectMeta.Name)
Expand Down
3 changes: 3 additions & 0 deletions pkg/gameservers/migration.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,9 @@ func (mc *MigrationController) syncGameServer(ctx context.Context, key string) e
return nil
}

if pod.Spec.NodeName == "" {
return workerqueue.NewDebugError(errors.Errorf("node not yet populated for Pod %s", pod.ObjectMeta.Name))
}
node, err := mc.nodeLister.Get(pod.Spec.NodeName)
if err != nil {
return errors.Wrapf(err, "error retrieving node %s for Pod %s", pod.Spec.NodeName, pod.ObjectMeta.Name)
Expand Down
29 changes: 28 additions & 1 deletion pkg/util/workerqueue/workerqueue.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,32 @@ const (
workFx = time.Second
)

// debugError is a marker type for errors that that should only be logged at a Debug level.
// Useful if you want a Handler to be retried, but not logged at an Error level.
type debugError struct {
err error
}

// NewDebugError returns a debugError wrapper around an error.
func NewDebugError(err error) error {
return &debugError{err: err}
}

// Error returns the error string
func (l *debugError) Error() string {
if l.err == nil {
return "<nil>"
}
return l.err.Error()
}

// isDebugError returns if the error is a debug error or not
func isDebugError(err error) bool {
cause := errors.Cause(err)
_, ok := cause.(*debugError)
return ok
}

// Handler is the handler for processing the work queue
// This is usually a syncronisation handler for a controller or related
type Handler func(context.Context, string) error
Expand Down Expand Up @@ -153,7 +179,8 @@ func (wq *WorkerQueue) processNextWorkItem(ctx context.Context) bool {

if err := wq.SyncHandler(ctx, key); err != nil {
// Conflicts are expected, so only show them in debug operations.
if k8serror.IsConflict(errors.Cause(err)) {
// Also check is debugError for other expected errors.
if k8serror.IsConflict(errors.Cause(err)) || isDebugError(err) {
wq.logger.WithField(wq.keyName, obj).Debug(err)
} else {
runtime.HandleError(wq.logger.WithField(wq.keyName, obj), err)
Expand Down
14 changes: 14 additions & 0 deletions pkg/util/workerqueue/workerqueue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"time"

"github.com/heptiolabs/healthcheck"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"k8s.io/apimachinery/pkg/util/wait"
Expand Down Expand Up @@ -194,3 +195,16 @@ func TestWorkerQueueEnqueueAfter(t *testing.T) {
assert.Fail(t, "should have got a queue'd message by now")
}
}

func TestDebugError(t *testing.T) {
err := errors.New("not a debug error")
assert.False(t, isDebugError(err))

err = NewDebugError(err)
assert.True(t, isDebugError(err))
assert.EqualError(t, err, "not a debug error")

err = NewDebugError(nil)
assert.True(t, isDebugError(err))
assert.EqualError(t, err, "<nil>")
}

0 comments on commit 0640590

Please sign in to comment.