Skip to content

Commit

Permalink
roachtest: allow adding extra github parameters
Browse files Browse the repository at this point in the history
When github issue posting, we denote various parameters
describing the test, i.e. cloud, metamorphic encryption, etc.
This is useful as it allows one to easily determine properties
of a test without digging into the logs.

However, this feature only works if github posting is enabled.
We've seen some cases where it is not enabled and we have trouble
figuring out the aforementioned parameters. This change makes it so
the parameters are logged to the artifacts directory in case github
posting is not enabled or fails.

It also exposes the notion of extra parameters to the test interface.
This allows for tests that have metamorphic properties to easily list
them in the issue itself.

One example of this is in mixed version tests, where we randomize the
deployment mode and the versions used. We often run into issues that
pertain to only a specific deployment mode or version, and it can be
cumbersome to dig through the artifacts for each individual failure.

Release note: none
Epic: none
Fixes: none
  • Loading branch information
DarrylWong committed Nov 13, 2024
1 parent 2cbed34 commit f3f6d47
Show file tree
Hide file tree
Showing 28 changed files with 402 additions and 231 deletions.
2 changes: 2 additions & 0 deletions pkg/cmd/roachtest/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ func (t testWrapper) L() *logger.Logger {
// Status is part of the testI interface.
func (t testWrapper) Status(args ...interface{}) {}

func (t testWrapper) AddParam(label, value string) {}

func TestClusterMachineType(t *testing.T) {
type machineTypeTestCase struct {
machineType string
Expand Down
12 changes: 12 additions & 0 deletions pkg/cmd/roachtest/clusterstats/mock_test_generated_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 9 additions & 31 deletions pkg/cmd/roachtest/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,6 @@ func newGithubIssues(disable bool, c *clusterImpl, vmCreateOpts *vm.CreateOpts)
}
}

func roachtestPrefix(p string) string {
return "ROACHTEST_" + p
}

// generateHelpCommand creates a HelpCommand for createPostRequest
func generateHelpCommand(
testName string, clusterName string, cloud spec.Cloud, start time.Time, end time.Time,
Expand Down Expand Up @@ -181,6 +177,7 @@ func (g *githubIssues) createPostRequest(
sideEyeTimeoutSnapshotURL string,
runtimeAssertionsBuild bool,
coverageBuild bool,
params map[string]string,
) (issues.PostRequest, error) {
var mention []string
var projColID int
Expand Down Expand Up @@ -267,31 +264,7 @@ func (g *githubIssues) createPostRequest(

artifacts := fmt.Sprintf("/%s", testName)

clusterParams := map[string]string{
roachtestPrefix("cloud"): roachtestflags.Cloud.String(),
roachtestPrefix("cpu"): fmt.Sprintf("%d", spec.Cluster.CPUs),
roachtestPrefix("ssd"): fmt.Sprintf("%d", spec.Cluster.SSDs),
roachtestPrefix("runtimeAssertionsBuild"): fmt.Sprintf("%t", runtimeAssertionsBuild),
roachtestPrefix("coverageBuild"): fmt.Sprintf("%t", coverageBuild),
}
// Emit CPU architecture only if it was specified; otherwise, it's captured below, assuming cluster was created.
if spec.Cluster.Arch != "" {
clusterParams[roachtestPrefix("arch")] = string(spec.Cluster.Arch)
}
// These params can be probabilistically set, so we pass them here to
// show what their actual values are in the posted issue.
if g.vmCreateOpts != nil {
clusterParams[roachtestPrefix("fs")] = g.vmCreateOpts.SSDOpts.FileSystem
clusterParams[roachtestPrefix("localSSD")] = fmt.Sprintf("%v", g.vmCreateOpts.SSDOpts.UseLocalSSD)
}

if g.cluster != nil {
clusterParams[roachtestPrefix("encrypted")] = fmt.Sprintf("%v", g.cluster.encAtRest)
if spec.Cluster.Arch == "" {
// N.B. when Arch is specified, it cannot differ from cluster's arch.
// Hence, we only emit when arch was unspecified.
clusterParams[roachtestPrefix("arch")] = string(g.cluster.arch)
}
issueClusterName = g.cluster.name
}

Expand Down Expand Up @@ -332,13 +305,17 @@ func (g *githubIssues) createPostRequest(
Artifacts: artifacts,
SideEyeSnapshotMsg: sideEyeMsg,
SideEyeSnapshotURL: sideEyeTimeoutSnapshotURL,
ExtraParams: clusterParams,
ExtraParams: params,
HelpCommand: generateHelpCommand(testName, issueClusterName, roachtestflags.Cloud, start, end),
}, nil
}

func (g *githubIssues) MaybePost(
t *testImpl, l *logger.Logger, message string, sideEyeTimeoutSnapshotURL string,
t *testImpl,
l *logger.Logger,
message string,
sideEyeTimeoutSnapshotURL string,
params map[string]string,
) (*issues.TestFailureIssue, error) {
skipReason := g.shouldPost(t)
if skipReason != "" {
Expand All @@ -349,7 +326,8 @@ func (g *githubIssues) MaybePost(
postRequest, err := g.createPostRequest(
t.Name(), t.start, t.end, t.spec, t.failures(),
message, sideEyeTimeoutSnapshotURL,
roachtestutil.UsingRuntimeAssertions(t), t.goCoverEnabled)
roachtestutil.UsingRuntimeAssertions(t), t.goCoverEnabled, params,
)

if err != nil {
return nil, err
Expand Down
35 changes: 20 additions & 15 deletions pkg/cmd/roachtest/github_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/cmd/bazci/githubpost/issues"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
"github.com/cockroachdb/cockroach/pkg/internal/team"
Expand Down Expand Up @@ -122,26 +123,27 @@ func TestCreatePostRequest(t *testing.T) {
const testName = "github_test"

type githubIssueOpts struct {
failures []failure
runtimeAssertionsBuild bool
coverageBuild bool
loadTeamsFailed bool
failures []failure
loadTeamsFailed bool
}

datadriven.Walk(t, datapathutils.TestDataPath(t, "github"), func(t *testing.T, path string) {
clusterSpec := reg.MakeClusterSpec(1)

testSpec := &registry.TestSpec{
Name: testName,
Owner: OwnerUnitTest,
Cluster: clusterSpec,
Name: testName,
Owner: OwnerUnitTest,
Cluster: clusterSpec,
CockroachBinary: registry.StandardCockroach,
}

ti := &testImpl{
spec: testSpec,
l: nilLogger(),
start: time.Date(2023, time.July, 21, 16, 34, 3, 817, time.UTC),
end: time.Date(2023, time.July, 21, 16, 42, 13, 137, time.UTC),
spec: testSpec,
l: nilLogger(),
start: time.Date(2023, time.July, 21, 16, 34, 3, 817, time.UTC),
end: time.Date(2023, time.July, 21, 16, 42, 13, 137, time.UTC),
cockroach: "cockroach",
cockroachEA: "cockroach-short",
}

testClusterImpl := &clusterImpl{spec: clusterSpec, arch: vm.ArchAMD64, name: "foo"}
Expand Down Expand Up @@ -173,9 +175,10 @@ func TestCreatePostRequest(t *testing.T) {
}
message := b.String()

params := getTestParameters(ti, github.cluster, github.vmCreateOpts)
req, err := github.createPostRequest(
testName, ti.start, ti.end, testSpec, testCase.failures,
message, "https://app.side-eye.io/snapshots/1", testCase.runtimeAssertionsBuild, testCase.coverageBuild,
message, "https://app.side-eye.io/snapshots/1", roachtestutil.UsingRuntimeAssertions(ti), ti.goCoverEnabled, params,
)
if testCase.loadTeamsFailed {
// Assert that if TEAMS.yaml cannot be loaded then function errors.
Expand Down Expand Up @@ -230,6 +233,8 @@ func TestCreatePostRequest(t *testing.T) {
testCase.failures = append(testCase.failures, createFailure(refError))
case "add-label":
ti.spec.ExtraLabels = append(ti.spec.ExtraLabels, d.CmdArgs[0].Vals...)
case "add-param":
ti.AddParam(d.CmdArgs[0].Vals[0], d.CmdArgs[1].Vals[0])
case "set-cluster-create-failed":
// We won't have either if cluster create fails.
vmOpts = nil
Expand All @@ -240,9 +245,9 @@ func TestCreatePostRequest(t *testing.T) {
teamLoadFn = invalidTeamsFn
testCase.loadTeamsFailed = true
case "set-runtime-assertions-build":
testCase.runtimeAssertionsBuild = true
ti.spec.CockroachBinary = registry.RuntimeAssertionsCockroach
case "set-coverage-enabled-build":
testCase.coverageBuild = true
ti.goCoverEnabled = true
}

return "ok"
Expand Down Expand Up @@ -289,7 +294,7 @@ func formatPostRequest(req issues.PostRequest) (string, error) {
q.Add("title", formatter.Title(data))
q.Add("body", post.String())
u.RawQuery = q.Encode()
post.WriteString(fmt.Sprintf("Rendered:%s", u.String()))
post.WriteString(fmt.Sprintf("Rendered:\n%s", u.String()))

return post.String(), nil
}
5 changes: 5 additions & 0 deletions pkg/cmd/roachtest/roachtestutil/mixedversion/mixedversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -750,6 +750,11 @@ func (t *Test) Run() {

t.logger.Printf("mixed-version test:\n%s", plan.PrettyPrint())

// Mark the deployment mode and versions, so they show up in the github issue. This makes
// it easier to group failures together without having to dig into the test logs.
t.rt.AddParam("mvtDeploymentMode", string(plan.deploymentMode))
t.rt.AddParam("mvtVersions", formatVersions(plan.Versions()))

if err := t.run(plan); err != nil {
t.rt.Fatal(err)
}
Expand Down
16 changes: 9 additions & 7 deletions pkg/cmd/roachtest/roachtestutil/mixedversion/planner.go
Original file line number Diff line number Diff line change
Expand Up @@ -1573,27 +1573,29 @@ func (plan *TestPlan) PrettyPrint() string {
return plan.prettyPrintInternal(false /* debug */)
}

func formatVersions(versions []*clusterupgrade.Version) string {
formattedVersions := make([]string, 0, len(versions))
for _, v := range versions {
formattedVersions = append(formattedVersions, v.String())
}
return strings.Join(formattedVersions, " → ")
}

func (plan *TestPlan) prettyPrintInternal(debug bool) string {
var out strings.Builder
allSteps := plan.Steps()
for i, step := range allSteps {
plan.prettyPrintStep(&out, step, treeBranchString(i, len(allSteps)), debug)
}

versions := plan.Versions()
formattedVersions := make([]string, 0, len(versions))
for _, v := range versions {
formattedVersions = append(formattedVersions, v.String())
}

var lines []string
addLine := func(title string, val any) {
titleWithColon := fmt.Sprintf("%s:", title)
lines = append(lines, fmt.Sprintf("%-20s%v", titleWithColon, val))
}

addLine("Seed", plan.seed)
addLine("Upgrades", strings.Join(formattedVersions, " → "))
addLine("Upgrades", formatVersions(plan.Versions()))
addLine("Deployment mode", plan.deploymentMode)

if len(plan.enabledMutators) > 0 {
Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/test/test_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ type Test interface {
L() *logger.Logger
Progress(float64)
Status(args ...interface{})
AddParam(string, string)
WorkerStatus(args ...interface{})
WorkerProgress(float64)
IsDebug() bool
Expand Down
25 changes: 25 additions & 0 deletions pkg/cmd/roachtest/test_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,11 @@ type testImpl struct {
// TODO(test-eng): this should just be an in-mem (ring) buffer attached to
// `t.L()`.
output []byte

// extraParams are test-specific parameters that will be added to the Github issue as
// parameters if there is a failure. They will additionally be logged in the test itself
// in case github issue posting is disabled.
extraParams map[string]string
}
// Map from version to path to the cockroach binary to be used when
// mixed-version test wants a binary for that binary. If a particular version
Expand Down Expand Up @@ -286,6 +291,26 @@ func (t *testImpl) Status(args ...interface{}) {
t.status(context.TODO(), t.runnerID, args...)
}

// AddParam adds a parameter to the test. This parameter will be logged both in
// the github issue if one is created and in the artifacts directory. This is useful if a test
// has metamorphic properties as it makes it easier to spot the differences between runs
// without digging into the logs (i.e. mixed version test deployment mode). It also helps
// debugging when the test failure is not posted to github (i.e. qualification runs).
func (t *testImpl) AddParam(label, value string) {
t.mu.Lock()
defer t.mu.Unlock()
if t.mu.extraParams == nil {
t.mu.extraParams = make(map[string]string)
}
t.mu.extraParams[label] = value
}

func (t *testImpl) getExtraParams() map[string]string {
t.mu.Lock()
defer t.mu.Unlock()
return t.mu.extraParams
}

// IsDebug returns true if the test is in a debug state.
func (t *testImpl) IsDebug() bool {
return t.debug
Expand Down
64 changes: 62 additions & 2 deletions pkg/cmd/roachtest/test_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package main
import (
"context"
gosql "database/sql"
"encoding/json"
"fmt"
"html"
"io"
Expand Down Expand Up @@ -846,7 +847,9 @@ func (r *testRunner) runWorker(
handleClusterCreationFailure := func(err error) {
t.Error(errClusterProvisioningFailed(err))

if _, err := github.MaybePost(t, l, t.failureMsg(), "" /* sideEyeTimeoutSnapshotURL */); err != nil {
params := getTestParameters(t, github.cluster, github.vmCreateOpts)
logTestParameters(l, params)
if _, err := github.MaybePost(t, l, t.failureMsg(), "" /* sideEyeTimeoutSnapshotURL */, params); err != nil {
shout(ctx, l, stdout, "failed to post issue: %s", err)
}
}
Expand Down Expand Up @@ -910,6 +913,7 @@ func (r *testRunner) runWorker(
leases = registry.LeaseTypes[prng.Intn(len(registry.LeaseTypes))]
}
c.status(fmt.Sprintf("metamorphically using %s leases", leases))
t.AddParam("metamorphicLeases", leases.String())
}
switch leases {
case registry.DefaultLeases:
Expand Down Expand Up @@ -1163,7 +1167,9 @@ func (r *testRunner) runTest(
}

output := fmt.Sprintf("%s\ntest artifacts and logs in: %s", failureMsg, t.ArtifactsDir())
issue, err := github.MaybePost(t, l, output, sideEyeTimeoutSnapshotURL)
params := getTestParameters(t, github.cluster, github.vmCreateOpts)
logTestParameters(l, params)
issue, err := github.MaybePost(t, l, output, sideEyeTimeoutSnapshotURL, params)
if err != nil {
shout(ctx, l, stdout, "failed to post issue: %s", err)
}
Expand Down Expand Up @@ -2056,3 +2062,57 @@ func logFatalfCtx(ctx context.Context, l *logger.Logger, f string, args ...inter
l.Close()
exit.WithCode(exit.UnspecifiedError())
}

func logTestParameters(l *logger.Logger, params map[string]string) {
// Log the parameters as we've seen cases where it's hard to extract the information (i.e.
// encryption at rest) if we don't have the Github issue to refer to.
if jsonBytes, err := json.MarshalIndent(params, "", " "); err == nil {
// Attempt to log the parameters to their own file, but log to stdout
// anyway if child logger creation fails. Knowing the test parameters
// is worth the noise.
paramLogger, err := l.ChildLogger("params", logger.QuietStdout, logger.QuietStderr)
if err == nil {
defer paramLogger.Close()
paramLogger.Printf("Roachtest Parameters:\n%s", jsonBytes)
} else {
l.Printf("Roachtest Parameters:\n%s", jsonBytes)
}
}
}

func getTestParameters(t *testImpl, c *clusterImpl, createOpts *vm.CreateOpts) map[string]string {
spec := t.spec
clusterParams := map[string]string{
"cloud": roachtestflags.Cloud.String(),
"cpu": fmt.Sprintf("%d", spec.Cluster.CPUs),
"ssd": fmt.Sprintf("%d", spec.Cluster.SSDs),
"runtimeAssertionsBuild": fmt.Sprintf("%t", roachtestutil.UsingRuntimeAssertions(t)),
"coverageBuild": fmt.Sprintf("%t", t.goCoverEnabled),
}
// Emit CPU architecture only if it was specified; otherwise, it's captured below, assuming cluster was created.
if spec.Cluster.Arch != "" {
clusterParams["arch"] = string(spec.Cluster.Arch)
}
// These params can be probabilistically set, so we pass them here to
// show what their actual values are in the posted issue.
if createOpts != nil {
clusterParams["fs"] = createOpts.SSDOpts.FileSystem
clusterParams["localSSD"] = fmt.Sprintf("%v", createOpts.SSDOpts.UseLocalSSD)
}

if c != nil {
clusterParams["encrypted"] = fmt.Sprintf("%v", c.encAtRest)
if spec.Cluster.Arch == "" {
// N.B. when Arch is specified, it cannot differ from cluster's arch.
// Hence, we only emit when arch was unspecified.
clusterParams["arch"] = string(c.arch)
}
}

extraParams := t.getExtraParams()
for label, value := range extraParams {
clusterParams[label] = value
}

return clusterParams
}
Loading

0 comments on commit f3f6d47

Please sign in to comment.