Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Suppress test failures due to logs after tests complete #6067

Merged
merged 9 commits into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions canary/workflow_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ import (
"go.uber.org/cadence/mocks"
"go.uber.org/cadence/testsuite"
"go.uber.org/cadence/worker"
"go.uber.org/zap/zaptest"

"github.com/uber/cadence/common/log/testlogger"
)

type (
Expand All @@ -52,7 +53,7 @@ func TestWorkflowTestSuite(t *testing.T) {
}

func (s *workflowTestSuite) SetupTest() {
s.SetLogger(zaptest.NewLogger(s.T()))
s.SetLogger(testlogger.NewZap(s.T()))
s.env = s.NewTestWorkflowEnvironment()
s.env.Test(s.T())
}
Expand Down
112 changes: 110 additions & 2 deletions common/log/testlogger/testlogger.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,122 @@
package testlogger

import (
"fmt"
"slices"
"strings"

"github.com/stretchr/testify/require"
"go.uber.org/atomic"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
"go.uber.org/zap/zaptest"

"github.com/uber/cadence/common/log"
"github.com/uber/cadence/common/log/loggerimpl"
)

type TestingT interface {
zaptest.TestingT
Cleanup(func()) // not currently part of zaptest.TestingT
}

// New is a helper to create new development logger in unit test
func New(t zaptest.TestingT) log.Logger {
func New(t TestingT) log.Logger {
// test logger that emits all logs (none dropped / sample func never returns false)
return loggerimpl.NewLogger(zaptest.NewLogger(t), loggerimpl.WithSampleFunc(func(int) bool { return true }))
return loggerimpl.NewLogger(NewZap(t), loggerimpl.WithSampleFunc(func(int) bool { return true }))
}

// NewZap makes a new test-oriented logger that prevents bad-lifecycle logs from failing tests.
func NewZap(t TestingT) *zap.Logger {
/*
HORRIBLE HACK due to async shutdown, both in our code and in libraries (e.g. gocql):
normally, logs produced after a test finishes will *intentionally* fail the test and/or
cause data to race on the test's internal `t.done` field.

that's a good thing, it reveals possibly-dangerously-flawed lifecycle management.

unfortunately some of our code and some libraries do not have good lifecycle management,
and this cannot easily be patched from the outside.

so this logger cheats: after a test completes, it logs to stderr rather than TestingT.
EVERY ONE of these logs is bad and we should not produce them, but it's causing many
otherwise-useful tests to be flaky, and that's a larger interruption than is useful.
*/
logAfterComplete, err := zap.NewDevelopment()
require.NoError(t, err, "could not build a fallback zap logger")
replaced := &fallbackTestCore{
t: t,
fallback: logAfterComplete.Core(),
testing: zaptest.NewLogger(t).Core(),
completed: &atomic.Bool{},
}

t.Cleanup(replaced.UseFallback) // switch to fallback before ending the test

return zap.New(replaced)
}

type fallbackTestCore struct {
t TestingT
fallback zapcore.Core
testing zapcore.Core
completed *atomic.Bool
}

var _ zapcore.Core = (*fallbackTestCore)(nil)

func (f *fallbackTestCore) UseFallback() {
f.completed.Store(true)
}

func (f *fallbackTestCore) Enabled(level zapcore.Level) bool {
if f.completed.Load() {
return f.fallback.Enabled(level)
}
return f.testing.Enabled(level)
}

func (f *fallbackTestCore) With(fields []zapcore.Field) zapcore.Core {
// need to copy and defer, else the returned core will be used at an
// arbitrarily later point in time, possibly after the test has completed.
return &fallbackTestCore{
t: f.t,
fallback: f.fallback.With(fields),
testing: f.testing.With(fields),
completed: f.completed,
}
}

func (f *fallbackTestCore) Check(entry zapcore.Entry, checked *zapcore.CheckedEntry) *zapcore.CheckedEntry {
// see other Check impls, all look similar.
// this defers the "where to log" decision to Write, as `f` is the core that will write.
if f.fallback.Enabled(entry.Level) {
return checked.AddCore(entry, f)
}
return checked // do not add any cores
}

func (f *fallbackTestCore) Write(entry zapcore.Entry, fields []zapcore.Field) error {
if f.completed.Load() {
entry.Message = fmt.Sprintf("COULD FAIL TEST %q, logged too late: %v", f.t.Name(), entry.Message)

hasStack := slices.ContainsFunc(fields, func(field zapcore.Field) bool {
// no specific stack-trace type, so just look for probable fields.
return strings.Contains(strings.ToLower(field.Key), "stack")
})
if !hasStack {
fields = append(fields, zap.Stack("log_stack"))
}
return f.fallback.Write(entry, fields)
}
return f.testing.Write(entry, fields)
}

func (f *fallbackTestCore) Sync() error {
if f.completed.Load() {
return f.fallback.Sync()
}
return f.testing.Sync()
}

var _ zapcore.Core = (*fallbackTestCore)(nil)
116 changes: 116 additions & 0 deletions common/log/testlogger/testlogger_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// The MIT License (MIT)

// Copyright (c) 2017-2020 Uber Technologies Inc.

// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.

package testlogger

import (
"fmt"
"os"
"testing"
"time"

"github.com/uber/cadence/common/log"
"github.com/uber/cadence/common/log/tag"
)

var (
done = make(chan struct{})
logged = make(chan struct{})
)

func TestMain(m *testing.M) {
code := m.Run()
// ensure synchronization between t.done and t.logf, else this test is extremely flaky.
// for details see: https://github.com/golang/go/issues/67701
close(done)
select {
case <-logged:
os.Exit(code)
case <-time.After(time.Second): // should be MUCH faster
_, _ = fmt.Fprintln(os.Stderr, "timed out waiting for test to log")
os.Exit(1)
}
}

// Unfortunately a moderate hack, to work around our faulty lifecycle management,
// and some libraries with issues as well.
// Ideally this test WOULD fail, but that's much harder to assert "safely".
func TestLoggerShouldNotFailIfLoggedLate(t *testing.T) {
origLogger := New(t)
// if With does not defer core selection, this will fail the test
// by sending the logs to t.Logf
withLogger := origLogger.WithTags(tag.ActorID("testing")) // literally any tag
origLogger.Info("before is fine, orig")
withLogger.Info("before is fine, with")
go func() {
<-done
origLogger.Info("too late, orig")
withLogger.Info("too late, with")
close(logged)
}()
}

func TestSubtestShouldNotFail(t *testing.T) {
// when complete, a subtest's too-late logs just get pushed to the parent,
// and do not fail any tests. they only fail when no running parent exists.
//
// if Go changes this behavior, this test could fail, otherwise AFAICT it
// should be stable.
assertDoesNotFail := func(name string, setup, log func(t *testing.T)) {
// need to wrap in something that will out-live the "real" test,
// to ensure there is a running parent test to push logs toward.
t.Run(name, func(t *testing.T) {
// same setup as TestMain but contained within this sub-test
var (
done = make(chan struct{})
logged = make(chan struct{})
)
t.Run("inner", func(t *testing.T) {
setup(t)
go func() {
<-done
// despite being too late, the parent test is still running
// so this does not fail the test.
log(t)
close(logged)
}()
time.AfterFunc(10*time.Millisecond, func() {
close(done)
})
})
<-logged
})
}

assertDoesNotFail("real", func(t *testing.T) {
// no setup needed
}, func(t *testing.T) {
t.Logf("too late but allowed")
})

var l log.Logger
assertDoesNotFail("wrapped", func(t *testing.T) {
l = New(t)
}, func(t *testing.T) {
l.Info("too late but allowed")
})
}
4 changes: 2 additions & 2 deletions common/reconciliation/invariant/staleWorkflow_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ import (
"github.com/golang/mock/gomock"
"github.com/google/uuid"
"github.com/stretchr/testify/assert"
"go.uber.org/zap/zaptest"

"github.com/uber/cadence/common"
"github.com/uber/cadence/common/log/testlogger"
"github.com/uber/cadence/common/persistence"
"github.com/uber/cadence/common/types"
)
Expand All @@ -44,7 +44,7 @@ func setup(t *testing.T, retentionDays int) (pr *persistence.MockRetryer, impl *
impl = &staleWorkflowCheck{
pr: m,
dc: nil, // not used by these tests
log: zaptest.NewLogger(t),
log: testlogger.NewZap(t),
testable: &mocked{},
}
withDomain(retentionDays, impl)
Expand Down
3 changes: 1 addition & 2 deletions service/worker/archiver/workflow_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (
"go.uber.org/cadence/testsuite"
"go.uber.org/cadence/worker"
"go.uber.org/cadence/workflow"
"go.uber.org/zap/zaptest"

"github.com/uber/cadence/common/dynamicconfig"
"github.com/uber/cadence/common/log"
Expand Down Expand Up @@ -136,7 +135,7 @@ func (s *workflowSuite) TestArchivalWorkflow_Success() {
}

func (s *workflowSuite) TestReplayArchiveHistoryWorkflow() {
logger := zaptest.NewLogger(s.T())
logger := testlogger.NewZap(s.T())
globalLogger = workflowTestLogger
globalMetricsClient = metrics.NewClient(tally.NewTestScope("replay", nil), metrics.Worker)
globalConfig = &Config{
Expand Down
6 changes: 2 additions & 4 deletions service/worker/indexer/esProcessor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (

"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"go.uber.org/zap/zaptest"

"github.com/uber/cadence/.gen/go/indexer"
"github.com/uber/cadence/common"
Expand All @@ -38,7 +37,7 @@ import (
"github.com/uber/cadence/common/elasticsearch/bulk"
mocks2 "github.com/uber/cadence/common/elasticsearch/bulk/mocks"
esMocks "github.com/uber/cadence/common/elasticsearch/mocks"
"github.com/uber/cadence/common/log/loggerimpl"
"github.com/uber/cadence/common/log/testlogger"
msgMocks "github.com/uber/cadence/common/messaging/mocks"
"github.com/uber/cadence/common/metrics"
"github.com/uber/cadence/common/metrics/mocks"
Expand Down Expand Up @@ -78,12 +77,11 @@ func (s *esProcessorSuite) SetupTest() {
ESProcessorFlushInterval: dynamicconfig.GetDurationPropertyFn(1 * time.Minute),
}
s.mockBulkProcessor = &mocks2.GenericBulkProcessor{}
zapLogger := zaptest.NewLogger(s.T())
s.mockScope = &mocks.Scope{}

p := &ESProcessorImpl{
config: config,
logger: loggerimpl.NewLogger(zapLogger),
logger: testlogger.New(s.T()),
scope: s.mockScope,
msgEncoder: defaultEncoder,
}
Expand Down
Loading