From e0e7e451ff57b0f1916a7cc48447e4467f77f906 Mon Sep 17 00:00:00 2001
From: Nedyalko Andreev <n@andreev.sh>
Date: Wed, 7 Dec 2022 11:15:22 +0200
Subject: [PATCH] Ensure most test abort conditions have a non-zero exit code

---
 api/v1/group_routes_test.go  |  5 +++-
 api/v1/status_routes_test.go |  2 +-
 cmd/integration_test.go      | 15 ++++++++----
 cmd/run.go                   | 14 +++++++----
 core/engine.go               | 46 +++++++++++++++++++++++-------------
 core/engine_test.go          |  8 +++----
 errext/exitcodes/codes.go    |  4 +++-
 7 files changed, 61 insertions(+), 33 deletions(-)

diff --git a/api/v1/group_routes_test.go b/api/v1/group_routes_test.go
index d16ce736321..6dc06f74854 100644
--- a/api/v1/group_routes_test.go
+++ b/api/v1/group_routes_test.go
@@ -6,6 +6,7 @@ import (
 	"net/http/httptest"
 	"testing"
 
+	"github.com/sirupsen/logrus"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 
@@ -19,8 +20,10 @@ import (
 
 func getTestPreInitState(tb testing.TB) *lib.TestPreInitState {
 	reg := metrics.NewRegistry()
+	logger := testutils.NewLogger(tb)
+	logger.SetLevel(logrus.DebugLevel)
 	return &lib.TestPreInitState{
-		Logger:         testutils.NewLogger(tb),
+		Logger:         logger,
 		RuntimeOptions: lib.RuntimeOptions{},
 		Registry:       reg,
 		BuiltinMetrics: metrics.RegisterBuiltinMetrics(reg),
diff --git a/api/v1/status_routes_test.go b/api/v1/status_routes_test.go
index bfbe6c65bdc..685a2c25737 100644
--- a/api/v1/status_routes_test.go
+++ b/api/v1/status_routes_test.go
@@ -128,7 +128,7 @@ func TestPatchStatus(t *testing.T) {
 			}()
 
 			go func() {
-				assert.NoError(t, run())
+				assert.ErrorContains(t, run(), "test run aborted by signal")
 			}()
 			// wait for the executor to initialize to avoid a potential data race below
 			time.Sleep(200 * time.Millisecond)
diff --git a/cmd/integration_test.go b/cmd/integration_test.go
index eeb73fc740b..12f51b94214 100644
--- a/cmd/integration_test.go
+++ b/cmd/integration_test.go
@@ -605,7 +605,7 @@ func TestAbortedByThreshold(t *testing.T) {
 	)
 	newRootCommand(ts.globalState).execute()
 
-	assert.True(t, testutils.LogContains(ts.loggerHook.Drain(), logrus.ErrorLevel, `some thresholds have failed`))
+	assert.True(t, testutils.LogContains(ts.loggerHook.Drain(), logrus.ErrorLevel, `test run aborted by failed thresholds`))
 	stdOut := ts.stdOut.String()
 	t.Log(stdOut)
 	assert.Contains(t, stdOut, `✗ iterations`)
@@ -648,13 +648,15 @@ func TestAbortedByUserWithGoodThresholds(t *testing.T) {
 		};
 	`
 
-	ts := getSimpleCloudOutputTestState(t, script, nil, lib.RunStatusAbortedUser, cloudapi.ResultStatusPassed, 0)
+	ts := getSimpleCloudOutputTestState(t, script, nil, lib.RunStatusAbortedUser, cloudapi.ResultStatusPassed, exitcodes.ExternalAbort)
 
 	asyncWaitForStdoutAndStopTestWithInterruptSignal(t, ts, 15, time.Second, "simple iter 2")
 
 	newRootCommand(ts.globalState).execute()
 
-	assert.False(t, testutils.LogContains(ts.loggerHook.Drain(), logrus.ErrorLevel, `some thresholds have failed`))
+	logs := ts.loggerHook.Drain()
+	assert.False(t, testutils.LogContains(logs, logrus.ErrorLevel, `some thresholds have failed`))
+	assert.True(t, testutils.LogContains(logs, logrus.ErrorLevel, `test run aborted by signal`))
 	stdOut := ts.stdOut.String()
 	t.Log(stdOut)
 	assert.Contains(t, stdOut, `✓ iterations`)
@@ -755,6 +757,9 @@ func asyncWaitForStdoutAndStopTestFromRESTAPI(
 	})
 }
 
+// TODO: add more abort scenario tests, see
+// https://github.com/grafana/k6/issues/2804
+
 func TestAbortedByUserWithRestAPI(t *testing.T) {
 	t.Parallel()
 	script := `
@@ -771,7 +776,7 @@ func TestAbortedByUserWithRestAPI(t *testing.T) {
 
 	ts := getSimpleCloudOutputTestState(
 		t, script, []string{"-v", "--log-output=stdout", "--iterations", "20"},
-		lib.RunStatusAbortedUser, cloudapi.ResultStatusPassed, 0,
+		lib.RunStatusAbortedUser, cloudapi.ResultStatusPassed, exitcodes.ScriptStoppedFromRESTAPI,
 	)
 
 	asyncWaitForStdoutAndStopTestFromRESTAPI(t, ts, 15, time.Second, "a simple iteration")
@@ -783,7 +788,7 @@ func TestAbortedByUserWithRestAPI(t *testing.T) {
 	assert.Contains(t, stdOut, `a simple iteration`)
 	assert.Contains(t, stdOut, `teardown() called`)
 	assert.Contains(t, stdOut, `PATCH /v1/status`)
-	assert.Contains(t, stdOut, `run: stopped by user; exiting...`)
+	assert.Contains(t, stdOut, `run: stopped by user via REST API; exiting...`)
 	assert.Contains(t, stdOut, `level=debug msg="Metrics emission of VUs and VUsMax metrics stopped"`)
 	assert.Contains(t, stdOut, `level=debug msg="Metrics processing finished!"`)
 	assert.Contains(t, stdOut, `level=debug msg="Sending test finished" output=cloud ref=111 run_status=5 tainted=false`)
diff --git a/cmd/run.go b/cmd/run.go
index ab9c5f80d64..49c1e44d2aa 100644
--- a/cmd/run.go
+++ b/cmd/run.go
@@ -204,6 +204,7 @@ func (c *cmdRun) run(cmd *cobra.Command, args []string) error {
 	initBar.Modify(pb.WithConstProgress(0, "Starting test..."))
 	err = engineRun()
 	if err != nil {
+		err = errext.WithExitCodeIfNone(common.UnwrapGojaInterruptedError(err), exitcodes.GenericEngine)
 		logger.WithError(err).Debug("Engine terminated with an error")
 	} else {
 		logger.Debug("Engine run terminated cleanly")
@@ -263,13 +264,16 @@ func (c *cmdRun) run(cmd *cobra.Command, args []string) error {
 			logger.WithError(klErr).Warn("Error while closing the SSLKEYLOGFILE")
 		}
 	}
-	if err != nil {
-		return errext.WithExitCodeIfNone(common.UnwrapGojaInterruptedError(err), exitcodes.GenericEngine)
-	}
+
 	if engine.IsTainted() {
-		return errext.WithExitCodeIfNone(errors.New("some thresholds have failed"), exitcodes.ThresholdsHaveFailed)
+		if err == nil {
+			err = errors.New("some thresholds have failed")
+		} else {
+			logger.Error("some thresholds have failed") // log this, even if there was already a previous error
+		}
+		err = errext.WithExitCodeIfNone(err, exitcodes.ThresholdsHaveFailed)
 	}
-	return nil
+	return err
 }
 
 func (c *cmdRun) flagSet() *pflag.FlagSet {
diff --git a/core/engine.go b/core/engine.go
index 6068f58085a..ed98d744cbf 100644
--- a/core/engine.go
+++ b/core/engine.go
@@ -9,6 +9,7 @@ import (
 	"github.com/sirupsen/logrus"
 
 	"go.k6.io/k6/errext"
+	"go.k6.io/k6/errext/exitcodes"
 	"go.k6.io/k6/lib"
 	"go.k6.io/k6/metrics"
 	"go.k6.io/k6/metrics/engine"
@@ -95,12 +96,12 @@ func NewEngine(testState *lib.TestRunState, ex lib.ExecutionScheduler, outputs [
 // wait() functions.
 //
 // Things to note:
-//  - The first lambda, Run(), synchronously executes the actual load test.
-//  - It can be prematurely aborted by cancelling the runCtx - this won't stop
-//    the metrics collection by the Engine.
-//  - Stopping the metrics collection can be done at any time after Run() has
-//    returned by cancelling the globalCtx
-//  - The second returned lambda can be used to wait for that process to finish.
+//   - The first lambda, Run(), synchronously executes the actual load test.
+//   - It can be prematurely aborted by cancelling the runCtx - this won't stop
+//     the metrics collection by the Engine.
+//   - Stopping the metrics collection can be done at any time after Run() has
+//     returned by cancelling the globalCtx
+//   - The second returned lambda can be used to wait for that process to finish.
 func (e *Engine) Init(globalCtx, runCtx context.Context) (run func() error, wait func(), err error) {
 	e.logger.Debug("Initialization starting...")
 	// TODO: if we ever need metrics processing in the init context, we can move
@@ -113,7 +114,8 @@ func (e *Engine) Init(globalCtx, runCtx context.Context) (run func() error, wait
 	// TODO: move all of this in a separate struct? see main TODO above
 	runSubCtx, runSubCancel := context.WithCancel(runCtx)
 
-	resultCh := make(chan error)
+	execRunResult := make(chan error)
+	engineRunResult := make(chan error)
 	processMetricsAfterRun := make(chan struct{})
 	runFn := func() error {
 		e.logger.Debug("Execution scheduler starting...")
@@ -124,9 +126,9 @@ func (e *Engine) Init(globalCtx, runCtx context.Context) (run func() error, wait
 		case <-runSubCtx.Done():
 			// do nothing, the test run was aborted somehow
 		default:
-			resultCh <- err // we finished normally, so send the result
-			<-resultCh      // the result was processed
+			execRunResult <- err // we finished normally, so send the result
 		}
+		result := <-engineRunResult // get the final result
 
 		// Make the background jobs process the currently buffered metrics and
 		// run the thresholds, then wait for that to be done.
@@ -136,10 +138,12 @@ func (e *Engine) Init(globalCtx, runCtx context.Context) (run func() error, wait
 		case <-globalCtx.Done():
 		}
 
-		return err
+		return result
 	}
 
-	waitFn := e.startBackgroundProcesses(globalCtx, runCtx, resultCh, runSubCancel, processMetricsAfterRun)
+	waitFn := e.startBackgroundProcesses(
+		globalCtx, runCtx, execRunResult, engineRunResult, runSubCancel, processMetricsAfterRun,
+	)
 	return runFn, waitFn, nil
 }
 
@@ -165,7 +169,8 @@ func (e *Engine) setRunStatusFromError(err error) {
 // and that the remaining metrics samples in the pipeline should be processed as the background
 // process is about to exit.
 func (e *Engine) startBackgroundProcesses(
-	globalCtx, runCtx context.Context, runResult chan error, runSubCancel func(), processMetricsAfterRun chan struct{},
+	globalCtx, runCtx context.Context, execRunResult, engineRunResult chan error,
+	runSubCancel func(), processMetricsAfterRun chan struct{},
 ) (wait func()) {
 	processes := new(sync.WaitGroup)
 
@@ -181,27 +186,36 @@ func (e *Engine) startBackgroundProcesses(
 	thresholdAbortChan := make(chan struct{})
 	go func() {
 		defer processes.Done()
+		var err error
+		defer func() {
+			e.logger.WithError(err).Debug("Final Engine.Run() result")
+			engineRunResult <- err
+		}()
 		select {
-		case err := <-runResult:
+		case err = <-execRunResult:
 			if err != nil {
 				e.logger.WithError(err).Debug("run: execution scheduler returned an error")
 				e.setRunStatusFromError(err)
 			} else {
-				e.logger.Debug("run: execution scheduler terminated")
+				e.logger.Debug("run: execution scheduler finished nominally")
 				e.OutputManager.SetRunStatus(lib.RunStatusFinished)
 			}
-			close(runResult) // signal that the run result was processed
+			// do nothing, return the same err value we got from the Run()
+			// ExecutionScheduler result, we just set the run_status based on it
 		case <-runCtx.Done():
 			e.logger.Debug("run: context expired; exiting...")
 			e.OutputManager.SetRunStatus(lib.RunStatusAbortedUser)
+			err = errext.WithExitCodeIfNone(errors.New("test run aborted by signal"), exitcodes.ExternalAbort)
 		case <-e.stopChan:
 			runSubCancel()
-			e.logger.Debug("run: stopped by user; exiting...")
+			e.logger.Debug("run: stopped by user via REST API; exiting...")
 			e.OutputManager.SetRunStatus(lib.RunStatusAbortedUser)
+			err = errext.WithExitCodeIfNone(errors.New("test run stopped from the REST API"), exitcodes.ScriptStoppedFromRESTAPI)
 		case <-thresholdAbortChan:
 			e.logger.Debug("run: stopped by thresholds; exiting...")
 			runSubCancel()
 			e.OutputManager.SetRunStatus(lib.RunStatusAbortedThreshold)
+			err = errext.WithExitCodeIfNone(errors.New("test run aborted by failed thresholds"), exitcodes.ThresholdsHaveFailed)
 		}
 	}()
 
diff --git a/core/engine_test.go b/core/engine_test.go
index 8b444a901a9..b678ef321b1 100644
--- a/core/engine_test.go
+++ b/core/engine_test.go
@@ -141,7 +141,7 @@ func TestEngineRun(t *testing.T) {
 		defer test.wait()
 
 		startTime := time.Now()
-		assert.NoError(t, test.run())
+		assert.ErrorContains(t, test.run(), "test run aborted by signal")
 		assert.WithinDuration(t, startTime.Add(duration), time.Now(), 100*time.Millisecond)
 		<-done
 	})
@@ -192,7 +192,7 @@ func TestEngineRun(t *testing.T) {
 		go func() { errC <- test.run() }()
 		<-signalChan
 		test.runCancel()
-		assert.NoError(t, <-errC)
+		assert.ErrorContains(t, <-errC, "test run aborted by signal")
 		test.wait()
 
 		found := 0
@@ -455,7 +455,7 @@ func TestEngineAbortedByThresholds(t *testing.T) {
 	defer test.wait()
 
 	go func() {
-		assert.NoError(t, test.run())
+		require.ErrorContains(t, test.run(), "aborted by failed thresholds")
 	}()
 
 	select {
@@ -1229,7 +1229,7 @@ func TestEngineRunsTeardownEvenAfterTestRunIsAborted(t *testing.T) {
 		VUs: null.IntFrom(1), Iterations: null.IntFrom(1),
 	}, piState)
 
-	assert.NoError(t, test.run())
+	assert.ErrorContains(t, test.run(), "test run aborted by signal")
 	test.wait()
 
 	var count float64
diff --git a/errext/exitcodes/codes.go b/errext/exitcodes/codes.go
index abcb602f5e5..98b678e69f5 100644
--- a/errext/exitcodes/codes.go
+++ b/errext/exitcodes/codes.go
@@ -1,4 +1,5 @@
 // Package exitcodes contains the constants representing possible k6 exit error codes.
+//
 //nolint:golint
 package exitcodes
 
@@ -13,10 +14,11 @@ const (
 	SetupTimeout             ExitCode = 100
 	TeardownTimeout          ExitCode = 101
 	GenericTimeout           ExitCode = 102 // TODO: remove?
-	GenericEngine            ExitCode = 103
+	GenericEngine            ExitCode = 103 // TODO: remove after https://github.com/grafana/k6/issues/2804
 	InvalidConfig            ExitCode = 104
 	ExternalAbort            ExitCode = 105
 	CannotStartRESTAPI       ExitCode = 106
 	ScriptException          ExitCode = 107
 	ScriptAborted            ExitCode = 108
+	ScriptStoppedFromRESTAPI ExitCode = 109
 )