Skip to content

Commit

Permalink
backport of commit 8f56418
Browse files Browse the repository at this point in the history
  • Loading branch information
schmichael authored Jan 24, 2024
1 parent 5c3552c commit 50bd116
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 34 deletions.
3 changes: 3 additions & 0 deletions .changelog/19787.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
connect: Fixed envoy sidecars being unable to restart after node reboots
```
9 changes: 9 additions & 0 deletions client/allocrunner/interfaces/task_lifecycle.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,15 @@ type TaskPrestartResponse struct {

// Done lets the hook indicate that it completed successfully and
// should not be run again.
//
// Use sparringly! In general hooks should be idempotent and therefore Done
// is unneeded. You never know at what point an agent might crash, and it can
// be hard to reason about how Done=true impacts agent restarts and node
// reboots. See #19787 for example.
//
// Done is useful for expensive operations such as downloading artifacts, or
// for operations which might fail needlessly if rerun while a node is
// disconnected.
Done bool
}

Expand Down
4 changes: 2 additions & 2 deletions client/allocrunner/taskrunner/connect_native_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ func (h *connectNativeHook) Prestart(
merge(environment, h.bridgeEnv(request.TaskEnv.EnvMap))
merge(environment, h.hostEnv(request.TaskEnv.EnvMap))

// tls/acl setup for native task done
response.Done = true
// tls/acl setup for native task done but since SecretsDir is a tmpfs, don't
// mark Done=true as this hook will need to rerun on node reboots
response.Env = environment
return nil
}
Expand Down
15 changes: 0 additions & 15 deletions client/allocrunner/taskrunner/connect_native_hook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,6 @@ func TestTaskRunner_ConnectNativeHook_Noop(t *testing.T) {
// Run the hook
require.NoError(t, h.Prestart(context.Background(), request, response))

// Assert the hook is Done
require.True(t, response.Done)

// Assert no environment variables configured to be set
require.Empty(t, response.Env)

Expand Down Expand Up @@ -352,9 +349,6 @@ func TestTaskRunner_ConnectNativeHook_Ok(t *testing.T) {
// Run the Connect Native hook
require.NoError(t, h.Prestart(context.Background(), request, response))

// Assert the hook is Done
require.True(t, response.Done)

// Assert only CONSUL_HTTP_ADDR env variable is set
require.Equal(t, map[string]string{"CONSUL_HTTP_ADDR": testConsul.HTTPAddr}, response.Env)

Expand Down Expand Up @@ -424,9 +418,6 @@ func TestTaskRunner_ConnectNativeHook_with_SI_token(t *testing.T) {
// Run the Connect Native hook
require.NoError(t, h.Prestart(context.Background(), request, response))

// Assert the hook is Done
require.True(t, response.Done)

// Assert environment variable for token is set
require.NotEmpty(t, response.Env)
require.Equal(t, token, response.Env["CONSUL_HTTP_TOKEN"])
Expand Down Expand Up @@ -504,9 +495,6 @@ func TestTaskRunner_ConnectNativeHook_shareTLS(t *testing.T) {
// Run the Connect Native hook
require.NoError(t, h.Prestart(context.Background(), request, response))

// Assert the hook is Done
require.True(t, response.Done)

// Remove variables we are not interested in
delete(response.Env, "CONSUL_HTTP_ADDR")

Expand Down Expand Up @@ -634,9 +622,6 @@ func TestTaskRunner_ConnectNativeHook_shareTLS_override(t *testing.T) {
// Run the Connect Native hook
require.NoError(t, h.Prestart(context.Background(), request, response))

// Assert the hook is Done
require.True(t, response.Done)

// Assert environment variable for CONSUL_HTTP_SSL is set, because it was
// the only one not overridden by task env block config
require.NotEmpty(t, response.Env)
Expand Down
6 changes: 2 additions & 4 deletions client/allocrunner/taskrunner/envoy_bootstrap_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,6 @@ func (h *envoyBootstrapHook) lookupService(svcKind, svcName string, taskEnv *tas
// Must be aware of both launching envoy as a sidecar proxy, as well as a connect gateway.
func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestartRequest, resp *ifs.TaskPrestartResponse) error {
if !req.Task.Kind.IsConnectProxy() && !req.Task.Kind.IsAnyConnectGateway() {
// Not a Connect proxy sidecar
resp.Done = true
return nil
}

Expand Down Expand Up @@ -358,8 +356,8 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestart

// Command succeeded, exit.
if cmdErr == nil {
// Bootstrap written. Mark as done and move on.
resp.Done = true
// Bootstrap written. Move on without marking as Done as Prestart needs
// to rerun after node reboots.
return false, nil
}

Expand Down
13 changes: 0 additions & 13 deletions client/allocrunner/taskrunner/envoy_bootstrap_hook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,9 +368,6 @@ func TestEnvoyBootstrapHook_with_SI_token(t *testing.T) {
// Run the hook
require.NoError(t, h.Prestart(context.Background(), req, resp))

// Assert it is Done
require.True(t, resp.Done)

// Ensure the default path matches
env := map[string]string{
taskenv.SecretsDir: req.TaskDir.SecretsDir,
Expand Down Expand Up @@ -463,9 +460,6 @@ func TestTaskRunner_EnvoyBootstrapHook_sidecar_ok(t *testing.T) {
// Run the hook
require.NoError(t, h.Prestart(context.Background(), req, resp))

// Assert it is Done
require.True(t, resp.Done)

require.NotNil(t, resp.Env)
require.Equal(t, "127.0.0.2:19001", resp.Env[envoyAdminBindEnvPrefix+"foo"])

Expand Down Expand Up @@ -547,7 +541,6 @@ func TestTaskRunner_EnvoyBootstrapHook_gateway_ok(t *testing.T) {
require.NoError(t, h.Prestart(context.Background(), req, &resp))

// Assert the hook is Done
require.True(t, resp.Done)
require.NotNil(t, resp.Env)

// Read the Envoy Config file
Expand Down Expand Up @@ -596,9 +589,6 @@ func TestTaskRunner_EnvoyBootstrapHook_Noop(t *testing.T) {
// Run the hook
require.NoError(t, h.Prestart(context.Background(), req, resp))

// Assert it is Done
require.True(t, resp.Done)

// Assert no file was written
_, err := os.Open(filepath.Join(req.TaskDir.SecretsDir, "envoy_bootstrap.json"))
require.Error(t, err)
Expand Down Expand Up @@ -677,9 +667,6 @@ func TestTaskRunner_EnvoyBootstrapHook_RecoverableError(t *testing.T) {
require.ErrorIs(t, err, errEnvoyBootstrapError)
require.True(t, structs.IsRecoverable(err))

// Assert it is not Done
require.False(t, resp.Done)

// Assert no file was written
_, err = os.Open(filepath.Join(req.TaskDir.SecretsDir, "envoy_bootstrap.json"))
require.Error(t, err)
Expand Down

0 comments on commit 50bd116

Please sign in to comment.