Skip to content

Commit

Permalink
Call fleet-server audit/unenroll endpoint on uninstall (#5302) (#5688)
Browse files Browse the repository at this point in the history
* Call fleet-server audit/unenroll endpoint on uninstall

Uninstalling a fleet-managed elastic-agent instance will now do a
best-effort attempt to notify fleet-server of the agent removal so the
agent may not appear as offiline.

---------

Co-authored-by: Paolo Chilà <[email protected]>
Co-authored-by: Blake Rouse <[email protected]>
(cherry picked from commit 07c2a92)

Co-authored-by: Michel Laterman <[email protected]>
  • Loading branch information
mergify[bot] and michel-laterman authored Oct 4, 2024
1 parent f067c92 commit aded4a7
Show file tree
Hide file tree
Showing 8 changed files with 469 additions and 6 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: feature

# Change summary; a 80ish characters long description of the change.
summary: Call fleet-server audit/unenroll endpoint on uninstall

# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
description: |
Uninstalling a fleet-managed elastic-agent instance will now do a
best-effort attempt to notify fleet-server of the agent removal so the
agent should not appear as offline.
# Affected component; a word indicating the component this changeset affects.
component:

# PR URL; optional; the PR number that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
pr: https://github.com/elastic/elastic-agent/pull/5302

# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
issue: https://github.com/elastic/elastic-agent/issues/484
4 changes: 2 additions & 2 deletions internal/pkg/agent/cmd/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ func installCmd(streams *cli.IOStreams, cmd *cobra.Command) error {
return err
}
} else {
err := install.Uninstall(cfgFile, topPath, "", log, progBar)
err := install.Uninstall(cmd.Context(), cfgFile, topPath, "", log, progBar)
if err != nil {
progBar.Describe("Uninstall from binary failed")
return err
Expand All @@ -257,7 +257,7 @@ func installCmd(streams *cli.IOStreams, cmd *cobra.Command) error {
defer func() {
if err != nil {
progBar.Describe("Uninstalling")
innerErr := install.Uninstall(cfgFile, topPath, "", log, progBar)
innerErr := install.Uninstall(cmd.Context(), cfgFile, topPath, "", log, progBar)
if innerErr != nil {
progBar.Describe("Failed to Uninstall")
} else {
Expand Down
2 changes: 1 addition & 1 deletion internal/pkg/agent/cmd/uninstall.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ func uninstallCmd(streams *cli.IOStreams, cmd *cobra.Command) error {
fmt.Fprint(os.Stderr, logBuff.String())
}()

err = install.Uninstall(paths.ConfigFile(), paths.Top(), uninstallToken, log, progBar)
err = install.Uninstall(cmd.Context(), paths.ConfigFile(), paths.Top(), uninstallToken, log, progBar)
if err != nil {
progBar.Describe("Failed to uninstall agent")
return fmt.Errorf("error uninstalling agent: %w", err)
Expand Down
105 changes: 102 additions & 3 deletions internal/pkg/agent/install/uninstall.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"errors"
"fmt"
"io/fs"
"net/http"
"os"
"path/filepath"
"runtime"
Expand All @@ -19,24 +20,36 @@ import (
"github.com/schollz/progressbar/v3"

"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/info"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/secret"
"github.com/elastic/elastic-agent/internal/pkg/agent/configuration"
aerrors "github.com/elastic/elastic-agent/internal/pkg/agent/errors"
"github.com/elastic/elastic-agent/internal/pkg/agent/transpiler"
"github.com/elastic/elastic-agent/internal/pkg/agent/vars"
"github.com/elastic/elastic-agent/internal/pkg/agent/vault"
"github.com/elastic/elastic-agent/internal/pkg/capabilities"
"github.com/elastic/elastic-agent/internal/pkg/config"
"github.com/elastic/elastic-agent/internal/pkg/config/operations"
"github.com/elastic/elastic-agent/internal/pkg/core/backoff"
"github.com/elastic/elastic-agent/internal/pkg/fleetapi"
fleetclient "github.com/elastic/elastic-agent/internal/pkg/fleetapi/client"
"github.com/elastic/elastic-agent/pkg/component"
comprt "github.com/elastic/elastic-agent/pkg/component/runtime"
"github.com/elastic/elastic-agent/pkg/core/logger"
"github.com/elastic/elastic-agent/pkg/features"
"github.com/elastic/elastic-agent/pkg/utils"
)

// fleetAudit variables control retry attempts for contacting fleet
var (
fleetAuditAttempts = 10
fleetAuditWaitInit = time.Second
fleetAuditWaitMax = time.Second * 10
)

// Uninstall uninstalls persistently Elastic Agent on the system.
func Uninstall(cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *progressbar.ProgressBar) error {
func Uninstall(ctx context.Context, cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *progressbar.ProgressBar) error {
cwd, err := os.Getwd()
if err != nil {
return fmt.Errorf("unable to get current working directory")
Expand All @@ -58,8 +71,6 @@ func Uninstall(cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *pr
return fmt.Errorf("failed trying to kill any running watcher: %w", err)
}

ctx := context.Background()

// check if the agent was installed using --unprivileged by checking the file vault for the agent secret (needed on darwin to correctly load the vault)
unprivileged, err := checkForUnprivilegedVault(ctx)
if err != nil {
Expand Down Expand Up @@ -100,6 +111,27 @@ func Uninstall(cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *pr
}
}

// will only notify fleet of the uninstall command if it can gather config and agentinfo, and is not a stand-alone install
notifyFleet := false
var ai *info.AgentInfo
c, err := operations.LoadFullAgentConfig(ctx, log, cfgFile, false, unprivileged)
if err != nil {
pt.Describe(fmt.Sprintf("unable to read agent config to determine if notifying Fleet is needed: %v", err))
}
cfg, err := configuration.NewFromConfig(c)
if err != nil {
pt.Describe(fmt.Sprintf("notify Fleet: unable to transform *config.Config to *configuration.Configuration: %v", err))
}

if cfg != nil && !configuration.IsStandalone(cfg.Fleet) {
ai, err = info.NewAgentInfo(ctx, false)
if err != nil {
pt.Describe(fmt.Sprintf("unable to read agent info, Fleet will not be notified of uninstall: %v", err))
} else {
notifyFleet = true
}
}

// remove existing directory
pt.Describe("Removing install directory")
err = RemovePath(topPath)
Expand All @@ -112,9 +144,66 @@ func Uninstall(cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *pr
}
pt.Describe("Removed install directory")

if notifyFleet {
notifyFleetAuditUninstall(ctx, log, pt, cfg, ai) //nolint:errcheck // ignore the error as we can't act on it
}

return nil
}

// notifyFleetAuditUninstall will attempt to notify fleet-server of the agent's uninstall.
//
// There are retries for the attempt after a 10s wait, but it is a best-effort approach.
func notifyFleetAuditUninstall(ctx context.Context, log *logp.Logger, pt *progressbar.ProgressBar, cfg *configuration.Configuration, ai *info.AgentInfo) error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
pt.Describe("Attempting to notify Fleet of uninstall")
client, err := fleetclient.NewAuthWithConfig(log, cfg.Fleet.AccessAPIKey, cfg.Fleet.Client)
if err != nil {
pt.Describe(fmt.Sprintf("notify Fleet: unable to create fleetapi client: %v", err))
return err
}
cmd := fleetapi.NewAuditUnenrollCmd(ai, client)
req := &fleetapi.AuditUnenrollRequest{
Reason: fleetapi.ReasonUninstall,
Timestamp: time.Now().UTC(),
}
jitterBackoff := backoffWithContext(ctx)
for i := 0; i < fleetAuditAttempts; i++ {
resp, err := cmd.Execute(ctx, req)
if err != nil {
var reqErr *fleetapi.ReqError
// Do not retry if it was a context error, or an error with the request.
if errors.Is(err, context.Canceled) {
return ctx.Err()
} else if errors.As(err, &reqErr) {
pt.Describe(fmt.Sprintf("notify Fleet: encountered unretryable error: %v", err))
return err
}
pt.Describe(fmt.Sprintf("notify Fleet: network error, retry in %v.", jitterBackoff.NextWait()))
jitterBackoff.Wait()
continue
}
resp.Body.Close()
switch resp.StatusCode {
case http.StatusOK:
pt.Describe("Successfully notified Fleet about uninstall")
return nil
case http.StatusBadRequest, http.StatusUnauthorized, http.StatusConflict:
// BadRequest are not retried because the request body is incorrect and will not be accepted
// Unauthorized are not retried because the API key has been invalidated; unauthorized is listed here but will be returned as a fleetapi.ReqError
// Conflict will not retry because in this case Endpoint has indicated that it is orphaned and we do not want to overwrite that annotation
pt.Describe(fmt.Sprintf("notify Fleet: failed with status code %d (no retries)", resp.StatusCode))
return fmt.Errorf("unretryable return status: %d", resp.StatusCode)
default:
pt.Describe(fmt.Sprintf("notify Fleet: failed with status code %d (retry in %v)", resp.StatusCode, jitterBackoff.NextWait()))
jitterBackoff.Wait()
}
}
pt.Describe("notify Fleet: failed")
return fmt.Errorf("notify Fleet: failed")
}

// EnsureStoppedService ensures that the installed service is stopped.
func EnsureStoppedService(topPath string, pt *progressbar.ProgressBar) (service.Status, error) {
status, _ := StatusService(topPath)
Expand Down Expand Up @@ -398,3 +487,13 @@ func killWatcher(pt *progressbar.ProgressBar) error {
<-time.After(1 * time.Second)
}
}

func backoffWithContext(ctx context.Context) backoff.Backoff {
ch := make(chan struct{})
bo := backoff.NewEqualJitterBackoff(ch, fleetAuditWaitInit, fleetAuditWaitMax)
go func() {
<-ctx.Done()
close(ch)
}()
return bo
}
108 changes: 108 additions & 0 deletions internal/pkg/agent/install/uninstall_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,28 @@ package install
import (
"context"
"fmt"
"io"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"runtime"
"testing"
"time"

"github.com/schollz/progressbar/v3"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/zap"

"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/info"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/secret"
"github.com/elastic/elastic-agent/internal/pkg/agent/configuration"
"github.com/elastic/elastic-agent/internal/pkg/agent/vault"
"github.com/elastic/elastic-agent/internal/pkg/fleetapi/client"
"github.com/elastic/elastic-agent/internal/pkg/remote"
)

func Test_checkForUnprivilegedVault(t *testing.T) {
Expand Down Expand Up @@ -119,3 +129,101 @@ func initFileVault(t *testing.T, ctx context.Context, testVaultPath string, keys
require.NoError(t, err, "error setting up key %q = %0x", k, v)
}
}

func TestNotifyFleetAuditUnenroll(t *testing.T) {
fleetAuditWaitInit = time.Millisecond * 10
fleetAuditWaitMax = time.Millisecond * 100
t.Cleanup(func() {
fleetAuditWaitInit = time.Second
fleetAuditWaitMax = time.Second * 10
})

tests := []struct {
name string
getServer func() *httptest.Server
err error
}{{
name: "succeeds after a retry",
getServer: func() *httptest.Server {
callCount := 0
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if callCount == 0 {
callCount++
w.WriteHeader(http.StatusNotFound)
return
}
callCount++
w.WriteHeader(http.StatusOK)
}))
return server
},
err: nil,
}, {
name: "returns 401",
getServer: func() *httptest.Server {
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusUnauthorized)
}))
},
err: client.ErrInvalidAPIKey,
}, {
name: "returns 409",
getServer: func() *httptest.Server {
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusConflict)
}))
},
err: fmt.Errorf("unretryable return status: 409"),
}}

log, _ := logp.NewInMemory("test", zap.NewDevelopmentEncoderConfig())
pt := progressbar.NewOptions(-1, progressbar.OptionSetWriter(io.Discard))
ai := &info.AgentInfo{}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
server := tc.getServer()
defer server.Close()

cfg := &configuration.Configuration{
Fleet: &configuration.FleetAgentConfig{
AccessAPIKey: "example-key",
Client: remote.Config{
Protocol: remote.ProtocolHTTP,
Host: server.URL,
},
},
}
err := notifyFleetAuditUninstall(context.Background(), log, pt, cfg, ai)
if tc.err == nil {
assert.NoError(t, err)
} else {
assert.ErrorContains(t, err, tc.err.Error())
}
})
}

t.Run("fails with no retries", func(t *testing.T) {
fleetAuditAttempts = 1
t.Cleanup(func() {
fleetAuditAttempts = 10
})
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
}))
defer server.Close()

cfg := &configuration.Configuration{
Fleet: &configuration.FleetAgentConfig{
AccessAPIKey: "example-key",
Client: remote.Config{
Protocol: remote.ProtocolHTTP,
Host: server.URL,
},
},
}
err := notifyFleetAuditUninstall(context.Background(), log, pt, cfg, ai)
assert.EqualError(t, err, "notify Fleet: failed")

})
}
Loading

0 comments on commit aded4a7

Please sign in to comment.