Cleanup cancelled disruption

Signed-off-by: Pablo Chacin <[email protected]>
grafana · Aug 17, 2023 · d9e0b21 · d9e0b21
1 parent 64e9da1
commit d9e0b21
Show file tree

Hide file tree

Showing 6 changed files with 166 additions and 102 deletions.
diff --git a/pkg/disruptors/cmd_builders.go b/pkg/disruptors/cmd_builders.go
@@ -113,3 +113,7 @@ func buildHTTPFaultCmd(
 
 	return cmd
 }
+
+func buildCleanupCmd() []string {
+	return []string{"xk6-disruptor-agent", "cleanup"}
+}
diff --git a/pkg/disruptors/controller.go b/pkg/disruptors/controller.go
@@ -13,6 +13,14 @@ import (
 	corev1 "k8s.io/api/core/v1"
 )
 
+// VisitOpts define the options for visiting a Pod
+type VisitOpts struct {
+	// Command defines the command to be executed
+	Command []string
+	// Cleanup defines the command to execute for cleaning up if command execution fails
+	Cleanup []string
+}
+
 // AgentController defines the interface for controlling agents in a set of targets
 type AgentController interface {
 	// InjectDisruptorAgent injects the Disruptor agent in the target pods
@@ -22,7 +30,7 @@ type AgentController interface {
 	// Targets retrieves the names of the target of the controller
 	Targets(ctx context.Context) ([]string, error)
 	// Visit allows executing a different command on each target returned by a visiting function
-	Visit(ctx context.Context, visitor func(target corev1.Pod) ([]string, error)) error
+	Visit(ctx context.Context, visitor func(target corev1.Pod) (VisitOpts, error)) error
 }
 
 // AgentController controls de agents in a set of target pods
@@ -96,13 +104,13 @@ func (c *agentController) InjectDisruptorAgent(ctx context.Context) error {
 // ExecCommand executes a command in the targets of the AgentController and reports any error
 func (c *agentController) ExecCommand(ctx context.Context, cmd []string) error {
 	// visit each target with the same command
-	return c.Visit(ctx, func(corev1.Pod) ([]string, error) {
-		return cmd, nil
+	return c.Visit(ctx, func(corev1.Pod) (VisitOpts, error) {
+		return VisitOpts{Command: cmd}, nil
 	})
 }
 
 // Visit allows executing a different command on each target returned by a visiting function
-func (c *agentController) Visit(ctx context.Context, visitor func(corev1.Pod) ([]string, error)) error {
+func (c *agentController) Visit(ctx context.Context, visitor func(corev1.Pod) (VisitOpts, error)) error {
 	// if there are no targets, nothing to do
 	if len(c.targets) == 0 {
 		return nil
@@ -111,41 +119,58 @@ func (c *agentController) Visit(ctx context.Context, visitor func(corev1.Pod) ([
 	execContext, cancel := context.WithCancel(context.Background())
 	defer cancel()
 
-	// ensure errors channel has enough space to avoid blocking gorutines
-	errors := make(chan error, len(c.targets))
+	// ensure errCh channel has enough space to avoid blocking gorutines
+	errCh := make(chan error, len(c.targets))
 	for _, pod := range c.targets {
 		// attach each container asynchronously
 		go func(pod corev1.Pod) {
 			// get the command to execute in the target
-			cmd, err := visitor(pod)
+			visitOpts, err := visitor(pod)
 			if err != nil {
-				errors <- fmt.Errorf("error building command for pod %s: %w", pod.Name, err)
+				errCh <- fmt.Errorf("unable to get command for pod %q: %w", pod.Name, err)
 				return
 			}
 
-			_, stderr, err := c.helper.Exec(execContext, pod.Name, "xk6-agent", cmd, []byte{})
+			_, stderr, err := c.helper.Exec(execContext, pod.Name, "xk6-agent", visitOpts.Command, []byte{})
+
+			// if command failed, ensure the agent execution is terminated
+			if err != nil && visitOpts.Cleanup != nil {
+				// we ignore errors because k6 was cancelled, so there's no point in reporting
+				// use a fresh context because the exec context may have been cancelled or expired
+				//nolint:contextcheck
+				_, _, _ = c.helper.Exec(context.TODO(), pod.Name, "xk6-agent", visitOpts.Cleanup, []byte{})
+			}
+
 			if err != nil {
-				errors <- fmt.Errorf("error invoking agent: %w \n%s", err, string(stderr))
+				errCh <- fmt.Errorf("failed command execution for pod %q: %w \n%s", pod.Name, err, string(stderr))
 				return
 			}
 
-			errors <- nil
+			errCh <- nil
 		}(pod)
 	}
 
+	var err error
 	pending := len(c.targets)
 	for {
 		select {
-		case err := <-errors:
-			if err != nil {
-				return err
-			}
+		case e := <-errCh:
 			pending--
+			if e != nil {
+				// cancel ongoing commands
+				cancel()
+				// Save first received error as reason for ending execution
+				err = e
+			}
+
 			if pending == 0 {
-				return nil
+				return err
 			}
 		case <-ctx.Done():
-			return ctx.Err()
+			// cancel ongoing commands
+			cancel()
+			// save the reason for ending execution
+			err = ctx.Err()
 		}
 	}
 }

diff --git a/pkg/disruptors/controller_test.go b/pkg/disruptors/controller_test.go
@@ -120,19 +120,20 @@ func Test_InjectAgent(t *testing.T) {
 	}
 }
 
-func Test_ExecCommand(t *testing.T) {
+func Test_VisitPod(t *testing.T) {
 	t.Parallel()
 
 	testCases := []struct {
 		title       string
 		namespace   string
 		pods        []*corev1.Pod
-		command     []string
+		visitOpts   VisitOpts
 		err         error
 		stdout      []byte
 		stderr      []byte
 		timeout     time.Duration
 		expectError bool
+		expected    []helpers.Command
 	}{
 		{
 			title:     "successful execution",
@@ -145,9 +146,16 @@ func Test_ExecCommand(t *testing.T) {
 					WithNamespace("test-ns").
 					Build(),
 			},
-			command:     []string{"echo", "-n", "hello", "world"},
+			visitOpts: VisitOpts{
+				Command: []string{"command"},
+				Cleanup: []string{"cleanup"},
+			},
 			err:         nil,
 			expectError: false,
+			expected: []helpers.Command{
+				{Pod: "pod1", Container: "xk6-agent", Namespace: "test-ns", Command: []string{"command"}, Stdin: []byte{}},
+				{Pod: "pod2", Container: "xk6-agent", Namespace: "test-ns", Command: []string{"command"}, Stdin: []byte{}},
+			},
 		},
 		{
 			title:     "failed execution",
@@ -160,10 +168,18 @@ func Test_ExecCommand(t *testing.T) {
 					WithNamespace("test-ns").
 					Build(),
 			},
-			command:     []string{"echo", "-n", "hello", "world"},
-			err:         fmt.Errorf("fake error"),
+			visitOpts: VisitOpts{
+				Command: []string{"echo", "-n", "hello", "world"},
+				Cleanup: []string{"cleanup"},
+			}, err: fmt.Errorf("fake error"),
 			stderr:      []byte("error output"),
 			expectError: true,
+			expected: []helpers.Command{
+				{Pod: "pod1", Container: "xk6-agent", Namespace: "test-ns", Command: []string{"command"}, Stdin: []byte{}},
+				{Pod: "pod1", Container: "xk6-agent", Namespace: "test-ns", Command: []string{"cleanup"}, Stdin: []byte{}},
+				{Pod: "pod2", Container: "xk6-agent", Namespace: "test-ns", Command: []string{"command"}, Stdin: []byte{}},
+				{Pod: "pod2", Container: "xk6-agent", Namespace: "test-ns", Command: []string{"cleanup"}, Stdin: []byte{}},
+			},
 		},
 	}
 
@@ -192,7 +208,9 @@ func Test_ExecCommand(t *testing.T) {
 			)
 
 			executor.SetResult(tc.stdout, tc.stderr, tc.err)
-			err := controller.ExecCommand(context.TODO(), tc.command)
+			err := controller.Visit(context.TODO(), func(target corev1.Pod) (VisitOpts, error) {
+				return tc.visitOpts, nil
+			})
 			if tc.expectError && err == nil {
 				t.Errorf("should had failed")
 				return
@@ -210,29 +228,18 @@ func Test_ExecCommand(t *testing.T) {
 				return
 			}
 
-			// expect same command to be executed for each pod
-			expected := []helpers.Command{}
-			for _, p := range targets {
-				expected = append(expected, helpers.Command{
-					Pod:       p.Name,
-					Namespace: p.Namespace,
-					Container: "xk6-agent",
-					Command:   tc.command,
-					Stdin:     []byte{},
-				})
-			}
+			sort.Slice(tc.expected, func(i, j int) bool {
+				return tc.expected[i].Pod < tc.expected[j].Pod
+			})
 
 			history := executor.GetHistory()
 
-			sort.Slice(expected, func(i, j int) bool {
-				return expected[i].Pod < expected[j].Pod
-			})
 			sort.Slice(history, func(i, j int) bool {
 				return history[i].Pod < history[j].Pod
 			})
 
-			if diff := cmp.Diff(expected, history); diff != "" {
-				t.Errorf("Expected headers did not match returned:\n%s", diff)
+			if diff := cmp.Diff(tc.expected, history); diff != "" {
+				t.Errorf("Expected command did not match returned:\n%s", diff)
 			}
 		})
 	}

diff --git a/pkg/disruptors/pod.go b/pkg/disruptors/pod.go
@@ -119,23 +119,26 @@ func (d *podDisruptor) InjectHTTPFaults(
 		fault.Port = DefaultTargetPort
 	}
 
-	return d.controller.Visit(ctx, func(pod corev1.Pod) ([]string, error) {
+	return d.controller.Visit(ctx, func(pod corev1.Pod) (VisitOpts, error) {
 		if !utils.HasPort(pod, fault.Port) {
-			return nil, fmt.Errorf("pod %q does not expose port %d", pod.Name, fault.Port)
+			return VisitOpts{}, fmt.Errorf("pod %q does not expose port %d", pod.Name, fault.Port)
 		}
 
 		if utils.HasHostNetwork(pod) {
-			return nil, fmt.Errorf("pod %q cannot be safely injected as it has hostNetwork set to true", pod.Name)
+			return VisitOpts{}, fmt.Errorf("pod %q cannot be safely injected as it has hostNetwork set to true", pod.Name)
 		}
 
 		targetAddress, err := utils.PodIP(pod)
 		if err != nil {
-			return nil, err
+			return VisitOpts{}, err
 		}
 
-		cmd := buildHTTPFaultCmd(targetAddress, fault, duration, options)
+		visitOps := VisitOpts{
+			Command: buildHTTPFaultCmd(targetAddress, fault, duration, options),
+			Cleanup: buildCleanupCmd(),
+		}
 
-		return cmd, nil
+		return visitOps, nil
 	})
 }
 
@@ -146,17 +149,21 @@ func (d *podDisruptor) InjectGrpcFaults(
 	duration time.Duration,
 	options GrpcDisruptionOptions,
 ) error {
-	return d.controller.Visit(ctx, func(pod corev1.Pod) ([]string, error) {
+	return d.controller.Visit(ctx, func(pod corev1.Pod) (VisitOpts, error) {
 		if !utils.HasPort(pod, fault.Port) {
-			return nil, fmt.Errorf("pod %q does not expose port %d", pod.Name, fault.Port)
+			return VisitOpts{}, fmt.Errorf("pod %q does not expose port %d", pod.Name, fault.Port)
 		}
 
 		targetAddress, err := utils.PodIP(pod)
 		if err != nil {
-			return nil, err
+			return VisitOpts{}, err
+		}
+
+		visitOps := VisitOpts{
+			Command: buildGrpcFaultCmd(targetAddress, fault, duration, options),
+			Cleanup: buildCleanupCmd(),
 		}
 
-		cmd := buildGrpcFaultCmd(targetAddress, fault, duration, options)
-		return cmd, nil
+		return visitOps, nil
 	})
 }