-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
test_impl.go
653 lines (563 loc) · 18.5 KB
/
test_impl.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
// Copyright 2018 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
package main
import (
"context"
"fmt"
"io"
"math/rand"
"os"
"strings"
"sync"
"time"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/version"
"github.com/cockroachdb/errors"
"github.com/petermattis/goid"
)
// perfArtifactsDir is the directory on cluster nodes in which perf artifacts
// reside. Upon success this directory is copied into the test's ArtifactsDir() from
// each node in the cluster.
const perfArtifactsDir = "perf"
// goCoverArtifactsDir the directory on cluster nodes in which go coverage
// profiles are dumped. At the end of a test this directory is copied into the
// test's ArtifactsDir() from each node in the cluster.
const goCoverArtifactsDir = "gocover"
type testStatus struct {
msg string
time time.Time
progress float64
}
// Holds all error information from a single invocation of t.{Fatal,Error}{,f} to
// preserve any structured errors
// e.g. t.Fatalf("foo %s %s %s", "hello", err1, err2) would mean that
// failure.errors == [err1, err2], with all args (including the non error "hello")
// being captured in the squashedErr
type failure struct {
// This is the single error created from variadic args passed to t.{Fatal,Error}{,f}
squashedErr error
// errors are all the `errors` present in the variadic args
errors []error
}
type testImpl struct {
spec *registry.TestSpec
cockroach string // path to main cockroach binary
cockroachEA string // path to cockroach-short binary compiled with --crdb_test build tag
randomCockroachOnce sync.Once
randomizedCockroach string // either `cockroach` or `cockroach-short`, picked randomly
deprecatedWorkload string // path to workload binary
debug bool // whether the test is in debug mode.
// buildVersion is the version of the Cockroach binary that the test will run
// against.
buildVersion *version.Version
// l is the logger that the test will use for its output.
l *logger.Logger
runner string
// runnerID is the test's main goroutine ID.
runnerID int64
start time.Time
end time.Time
// artifactsDir is the path to the directory holding all the artifacts for
// this test. It will contain a test.log file and cluster logs.
artifactsDir string
// artifactsSpec is a TeamCity artifacts spec used to publish this test's
// artifacts. See:
// https://www.jetbrains.com/help/teamcity/2019.1/configuring-general-settings.html#Artifact-Paths
artifactsSpec string
mu struct {
syncutil.RWMutex
done bool
// cancel, if set, is called from the t.Fatal() family of functions when the
// test is being marked as failed (i.e. when the failed field above is also
// set). This is used to cancel the context passed to t.spec.Run(), so async
// test goroutines can be notified.
cancel func()
// failures added via addFailures, in order
// A test can have multiple calls to t.Fail()/Error(), with each call
// referencing 0+ errors. failure captures all the errors
failures []failure
// status is a map from goroutine id to status set by that goroutine. A
// special goroutine is indicated by runnerID; that one provides the test's
// "main status".
status map[int64]testStatus
// TODO(test-eng): this should just be an in-mem (ring) buffer attached to
// `t.L()`.
output []byte
}
// Map from version to path to the cockroach binary to be used when
// mixed-version test wants a binary for that binary. If a particular version
// <ver> is found in this map, it is used instead of the binary coming from
// `roachprod stage release <ver>`. See the --versions-binary-override flags.
//
// Version strings look like "20.1.4".
versionsBinaryOverride map[string]string
skipInit bool
// If true, go coverage is enabled and the BAZEL_COVER_DIR env var will be set
// when starting nodes.
goCoverEnabled bool
}
func newFailure(squashedErr error, errs []error) failure {
return failure{squashedErr: squashedErr, errors: errs}
}
// BuildVersion exposes the build version of the cluster
// in this test.
func (t *testImpl) BuildVersion() *version.Version {
return t.buildVersion
}
// Cockroach will return either `RuntimeAssertionsCockroach()` or
// `StandardCockroach()`, picked randomly. Once a random choice has
// been made, the same binary will be returned on every call to
// `Cockroach`, to avoid errors that may arise from binaries having a
// different value for metamorphic constants.
func (t *testImpl) Cockroach() string {
// If the test is a benchmark test, we don't want to enable assertions
// as it will slow down performance.
if t.spec.Benchmark {
t.l.Printf("Benchmark test, running with standard cockroach")
return t.StandardCockroach()
}
t.randomCockroachOnce.Do(func() {
assertionsEnabledProbability := 0.5
// If the user specified a custom seed to be used with runtime
// assertions, assume they want to run the test with assertions
// enabled, making it easier to reproduce issues.
if os.Getenv(test.EnvAssertionsEnabledSeed) != "" {
assertionsEnabledProbability = 1
}
if rand.Float64() < assertionsEnabledProbability {
// The build with runtime assertions should exist in every nightly
// CI build, but we can't assume it exists in every roachtest call.
if path := t.RuntimeAssertionsCockroach(); path != "" {
t.l.Printf("Runtime assertions enabled")
t.randomizedCockroach = path
return
} else {
t.l.Printf("WARNING: running without runtime assertions since the corresponding binary was not specified")
}
}
t.randomizedCockroach = t.StandardCockroach()
})
return t.randomizedCockroach
}
func (t *testImpl) RuntimeAssertionsCockroach() string {
return t.cockroachEA
}
func (t *testImpl) StandardCockroach() string {
return t.cockroach
}
func (t *testImpl) DeprecatedWorkload() string {
return t.deprecatedWorkload
}
func (t *testImpl) VersionsBinaryOverride() map[string]string {
return t.versionsBinaryOverride
}
func (t *testImpl) SkipInit() bool {
return t.skipInit
}
// Spec returns the TestSpec.
func (t *testImpl) Spec() interface{} {
return t.spec
}
func (t *testImpl) Helper() {}
func (t *testImpl) Name() string {
return t.spec.Name
}
func (t *testImpl) SnapshotPrefix() string {
return t.spec.SnapshotPrefix
}
// L returns the test's logger.
func (t *testImpl) L() *logger.Logger {
return t.l
}
// ReplaceL replaces the test's logger.
func (t *testImpl) ReplaceL(l *logger.Logger) {
// TODO(tbg): get rid of this, this is racy & hacky.
t.l = l
}
func (t *testImpl) status(ctx context.Context, id int64, args ...interface{}) {
t.mu.Lock()
defer t.mu.Unlock()
if t.mu.status == nil {
t.mu.status = make(map[int64]testStatus)
}
if len(args) == 0 {
delete(t.mu.status, id)
return
}
msg := fmt.Sprint(args...)
t.mu.status[id] = testStatus{
msg: msg,
time: timeutil.Now(),
}
if !t.L().Closed() {
if id == t.runnerID {
t.L().PrintfCtxDepth(ctx, 3, "test status: %s", msg)
} else {
t.L().PrintfCtxDepth(ctx, 3, "test worker status: %s", msg)
}
}
}
// Status sets the main status message for the test. When called from the main
// test goroutine (i.e. the goroutine on which TestSpec.Run is invoked), this
// is equivalent to calling WorkerStatus. If no arguments are specified, the
// status message is erased.
func (t *testImpl) Status(args ...interface{}) {
t.status(context.TODO(), t.runnerID, args...)
}
// IsDebug returns true if the test is in a debug state.
func (t *testImpl) IsDebug() bool {
return t.debug
}
// GetStatus returns the status of the tests's main goroutine.
func (t *testImpl) GetStatus() string {
t.mu.Lock()
defer t.mu.Unlock()
status, ok := t.mu.status[t.runnerID]
if ok {
return fmt.Sprintf("%s (set %s ago)", status.msg, timeutil.Since(status.time).Round(time.Second))
}
return "N/A"
}
// WorkerStatus sets the status message for a worker goroutine associated with
// the test. The status message should be cleared before the goroutine exits by
// calling WorkerStatus with no arguments.
func (t *testImpl) WorkerStatus(args ...interface{}) {
t.status(context.TODO(), goid.Get(), args...)
}
func (t *testImpl) progress(id int64, frac float64) {
t.mu.Lock()
defer t.mu.Unlock()
if t.mu.status == nil {
t.mu.status = make(map[int64]testStatus)
}
status := t.mu.status[id]
status.progress = frac
t.mu.status[id] = status
}
// Progress sets the progress (a fraction in the range [0,1]) associated with
// the main test status message. When called from the main test goroutine
// (i.e. the goroutine on which TestSpec.Run is invoked), this is equivalent to
// calling WorkerProgress.
func (t *testImpl) Progress(frac float64) {
t.progress(t.runnerID, frac)
}
// WorkerProgress sets the progress (a fraction in the range [0,1]) associated
// with the a worker status message.
func (t *testImpl) WorkerProgress(frac float64) {
t.progress(goid.Get(), frac)
}
var _ skip.SkippableTest = (*testImpl)(nil)
// Skip skips the test. The first argument if any is the main message.
// The remaining argument, if any, form the details.
// This implements the skip.SkippableTest interface.
func (t *testImpl) Skip(args ...interface{}) {
if len(args) > 0 {
t.spec.Skip = fmt.Sprint(args[0])
args = args[1:]
}
t.spec.SkipDetails = fmt.Sprint(args...)
panic(errTestFatal)
}
// Skipf skips the test. The formatted message becomes the skip reason.
// This implements the skip.SkippableTest interface.
func (t *testImpl) Skipf(format string, args ...interface{}) {
t.spec.Skip = fmt.Sprintf(format, args...)
panic(errTestFatal)
}
// collectErrors extracts any arg that is an error
func collectErrors(args []interface{}) []error {
var errs []error
for _, a := range args {
if err, ok := a.(error); ok {
errs = append(errs, err)
}
}
return errs
}
// Fatal marks the test as failed, prints the args to t.L(), and calls
// panic(errTestFatal). It can be called multiple times.
//
// If the only argument is an error, it is formatted by "%+v", so it will show
// stack traces and such.
//
// ATTENTION: Since this calls panic(errTestFatal), it should only be called
// from a test's closure. The test runner itself should never call this.
func (t *testImpl) Fatal(args ...interface{}) {
t.addFailureAndCancel(1, "", args...)
panic(errTestFatal)
}
// Fatalf is like Fatal, but takes a format string.
func (t *testImpl) Fatalf(format string, args ...interface{}) {
t.addFailureAndCancel(1, format, args...)
panic(errTestFatal)
}
// FailNow implements the TestingT interface.
func (t *testImpl) FailNow() {
t.addFailureAndCancel(1, "FailNow called")
panic(errTestFatal)
}
// Error implements the TestingT interface
func (t *testImpl) Error(args ...interface{}) {
t.addFailureAndCancel(1, "", args...)
}
// Errorf implements the TestingT interface.
func (t *testImpl) Errorf(format string, args ...interface{}) {
t.addFailureAndCancel(1, format, args...)
}
func (t *testImpl) addFailureAndCancel(depth int, format string, args ...interface{}) {
t.addFailure(depth+1, format, args...)
if t.mu.cancel != nil {
t.mu.cancel()
}
}
// addFailure depth indicates how many stack frames to skip when reporting the
// site of the failure in logs. `0` will report the caller of addFailure, `1` the
// caller of the caller of addFailure, etc.
func (t *testImpl) addFailure(depth int, format string, args ...interface{}) {
if format == "" {
format = strings.Repeat(" %v", len(args))[1:]
}
reportFailure := newFailure(errors.NewWithDepthf(depth+1, format, args...), collectErrors(args))
t.mu.Lock()
defer t.mu.Unlock()
t.mu.failures = append(t.mu.failures, reportFailure)
var b strings.Builder
formatFailure(&b, reportFailure)
msg := b.String()
failureNum := len(t.mu.failures)
failureLog := fmt.Sprintf("failure_%d", failureNum)
t.L().Printf("test failure #%d: full stack retained in %s.log: %s", failureNum, failureLog, msg)
// Also dump the verbose error (incl. all stack traces) to a log file, in case
// we need it. The stacks are sometimes helpful, but we don't want them in the
// main log as they are highly verbose.
{
cl, err := t.L().ChildLogger(failureLog, logger.QuietStderr, logger.QuietStdout)
if err == nil {
// We don't actually log through this logger since it adds an unrelated
// file:line caller (namely ours). The error already has stack traces
// so it's better to write only it to the file to avoid confusion.
if cl.File != nil {
path := cl.File.Name()
if len(path) > 0 {
_ = os.WriteFile(path, []byte(fmt.Sprintf("%+v", reportFailure.squashedErr)), 0644)
}
}
cl.Close() // we just wanted the filename
}
}
t.mu.output = append(t.mu.output, msg...)
t.mu.output = append(t.mu.output, '\n')
}
// We take the "squashed" error that contains information of all the errors for each failure.
func formatFailure(b *strings.Builder, reportFailures ...failure) {
for i, failure := range reportFailures {
if i > 0 {
fmt.Fprintln(b)
}
file, line, fn, ok := errors.GetOneLineSource(failure.squashedErr)
if !ok {
file, line, fn = "<unknown>", 0, "unknown"
}
fmt.Fprintf(b, "(%s:%d).%s: %v", file, line, fn, failure.squashedErr)
}
}
func (t *testImpl) duration() time.Duration {
return t.end.Sub(t.start)
}
func (t *testImpl) Failed() bool {
t.mu.RLock()
defer t.mu.RUnlock()
return t.failedRLocked()
}
func (t *testImpl) failedRLocked() bool {
return len(t.mu.failures) > 0
}
func (t *testImpl) firstFailure() failure {
t.mu.RLock()
defer t.mu.RUnlock()
if len(t.mu.failures) == 0 {
return failure{}
}
return t.mu.failures[0]
}
func (t *testImpl) failureMsg() string {
t.mu.RLock()
defer t.mu.RUnlock()
var b strings.Builder
formatFailure(&b, t.mu.failures...)
return b.String()
}
// failureContainsError returns true if any of the errors in a given failure
// matches the reference error
func failureContainsError(f failure, refError error) bool {
for _, err := range f.errors {
if errors.Is(err, refError) {
return true
}
}
return errors.Is(f.squashedErr, refError)
}
func (t *testImpl) ArtifactsDir() string {
return t.artifactsDir
}
func (t *testImpl) PerfArtifactsDir() string {
return perfArtifactsDir
}
func (t *testImpl) GoCoverArtifactsDir() string {
if t.goCoverEnabled {
return goCoverArtifactsDir
}
return ""
}
// IsBuildVersion returns true if the build version is greater than or equal to
// minVersion. This allows a test to optionally perform additional checks
// depending on the cockroach version it is running against. Note that the
// versions are Cockroach build tag version numbers, not the internal cluster
// version number.
func (t *testImpl) IsBuildVersion(minVersion string) bool {
vers, err := version.Parse(minVersion)
if err != nil {
t.Fatal(err)
}
if p := vers.PreRelease(); p != "" {
panic("cannot specify a prerelease: " + p)
}
// We append "-0" to the min-version spec so that we capture all
// prereleases of the specified version. Otherwise, "v2.1.0" would compare
// greater than "v2.1.0-alpha.x".
vers = version.MustParse(minVersion + "-0")
return t.BuildVersion().AtLeast(vers)
}
// TeamCityEscape escapes a string for use as <value> in a key='<value>' attribute
// in TeamCity build output marker.
// See https://www.jetbrains.com/help/teamcity/2023.05/service-messages.html#Escaped+Values
func TeamCityEscape(s string) string {
var sb strings.Builder
for _, runeValue := range s {
switch runeValue {
case '\n':
sb.WriteString("|n")
case '\r':
sb.WriteString("|r")
case '|':
sb.WriteString("||")
case '[':
sb.WriteString("|[")
case ']':
sb.WriteString("|]")
case '\'':
sb.WriteString("|'")
default:
if runeValue > 127 {
// escape unicode
sb.WriteString(fmt.Sprintf("|0x%04x", runeValue))
} else {
sb.WriteRune(runeValue)
}
}
}
return sb.String()
}
func teamCityNameEscape(name string) string {
return strings.Replace(name, ",", "_", -1)
}
type testWithCount struct {
spec registry.TestSpec
// count maintains the number of runs remaining for a test.
count int
}
type clusterType int
const (
localCluster clusterType = iota
roachprodCluster
)
type loggingOpt struct {
// l is the test runner logger.
// Note that individual test runs will use a different logger.
l *logger.Logger
// tee controls whether test logs (not test runner logs) also go to stdout or
// not.
tee logger.TeeOptType
stdout, stderr io.Writer
// artifactsDir is that path to the dir that will contain the artifacts for
// all the tests.
artifactsDir string
// path to the literal on-agent directory where artifacts are stored. May
// be different from artifactsDir since the roachtest may be running in
// a container.
literalArtifactsDir string
// runnerLogPath is that path to the runner's log file.
runnerLogPath string
}
type workerStatus struct {
// name is the worker's identifier.
name string
mu struct {
syncutil.Mutex
// status is presented in the HTML progress page.
status string
ttr testToRunRes
t *testImpl
c *clusterImpl
}
}
func (w *workerStatus) Status() string {
w.mu.Lock()
defer w.mu.Unlock()
return w.mu.status
}
func (w *workerStatus) SetStatus(status string) {
w.mu.Lock()
w.mu.status = status
w.mu.Unlock()
}
func (w *workerStatus) Cluster() *clusterImpl {
w.mu.Lock()
defer w.mu.Unlock()
return w.mu.c
}
func (w *workerStatus) SetCluster(c *clusterImpl) {
w.mu.Lock()
w.mu.c = c
w.mu.Unlock()
}
func (w *workerStatus) TestToRun() testToRunRes {
w.mu.Lock()
defer w.mu.Unlock()
return w.mu.ttr
}
func (w *workerStatus) Test() *testImpl {
w.mu.Lock()
defer w.mu.Unlock()
return w.mu.t
}
func (w *workerStatus) SetTest(t *testImpl, ttr testToRunRes) {
w.mu.Lock()
w.mu.t = t
w.mu.ttr = ttr
w.mu.Unlock()
}
// shout logs a message both to a logger and to an io.Writer.
// If format doesn't end with a new line, one will be automatically added.
func shout(
ctx context.Context, l *logger.Logger, stdout io.Writer, format string, args ...interface{},
) {
if len(format) == 0 || format[len(format)-1] != '\n' {
format += "\n"
}
msg := fmt.Sprintf(format, args...)
l.PrintfCtxDepth(ctx, 2 /* depth */, msg)
fmt.Fprint(stdout, msg)
}