Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[8.x](backport #5302) Call fleet-server audit/unenroll endpoint on uninstall #5688

Merged
merged 1 commit into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: feature

# Change summary; a 80ish characters long description of the change.
summary: Call fleet-server audit/unenroll endpoint on uninstall

# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
description: |
Uninstalling a fleet-managed elastic-agent instance will now do a
best-effort attempt to notify fleet-server of the agent removal so the
agent should not appear as offline.

# Affected component; a word indicating the component this changeset affects.
component:

# PR URL; optional; the PR number that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
pr: https://github.com/elastic/elastic-agent/pull/5302

# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
issue: https://github.com/elastic/elastic-agent/issues/484
4 changes: 2 additions & 2 deletions internal/pkg/agent/cmd/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ func installCmd(streams *cli.IOStreams, cmd *cobra.Command) error {
return err
}
} else {
err := install.Uninstall(cfgFile, topPath, "", log, progBar)
err := install.Uninstall(cmd.Context(), cfgFile, topPath, "", log, progBar)
if err != nil {
progBar.Describe("Uninstall from binary failed")
return err
Expand All @@ -257,7 +257,7 @@ func installCmd(streams *cli.IOStreams, cmd *cobra.Command) error {
defer func() {
if err != nil {
progBar.Describe("Uninstalling")
innerErr := install.Uninstall(cfgFile, topPath, "", log, progBar)
innerErr := install.Uninstall(cmd.Context(), cfgFile, topPath, "", log, progBar)
if innerErr != nil {
progBar.Describe("Failed to Uninstall")
} else {
Expand Down
2 changes: 1 addition & 1 deletion internal/pkg/agent/cmd/uninstall.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ func uninstallCmd(streams *cli.IOStreams, cmd *cobra.Command) error {
fmt.Fprint(os.Stderr, logBuff.String())
}()

err = install.Uninstall(paths.ConfigFile(), paths.Top(), uninstallToken, log, progBar)
err = install.Uninstall(cmd.Context(), paths.ConfigFile(), paths.Top(), uninstallToken, log, progBar)
if err != nil {
progBar.Describe("Failed to uninstall agent")
return fmt.Errorf("error uninstalling agent: %w", err)
Expand Down
105 changes: 102 additions & 3 deletions internal/pkg/agent/install/uninstall.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"errors"
"fmt"
"io/fs"
"net/http"
"os"
"path/filepath"
"runtime"
Expand All @@ -19,24 +20,36 @@ import (
"github.com/schollz/progressbar/v3"

"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/info"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/secret"
"github.com/elastic/elastic-agent/internal/pkg/agent/configuration"
aerrors "github.com/elastic/elastic-agent/internal/pkg/agent/errors"
"github.com/elastic/elastic-agent/internal/pkg/agent/transpiler"
"github.com/elastic/elastic-agent/internal/pkg/agent/vars"
"github.com/elastic/elastic-agent/internal/pkg/agent/vault"
"github.com/elastic/elastic-agent/internal/pkg/capabilities"
"github.com/elastic/elastic-agent/internal/pkg/config"
"github.com/elastic/elastic-agent/internal/pkg/config/operations"
"github.com/elastic/elastic-agent/internal/pkg/core/backoff"
"github.com/elastic/elastic-agent/internal/pkg/fleetapi"
fleetclient "github.com/elastic/elastic-agent/internal/pkg/fleetapi/client"
"github.com/elastic/elastic-agent/pkg/component"
comprt "github.com/elastic/elastic-agent/pkg/component/runtime"
"github.com/elastic/elastic-agent/pkg/core/logger"
"github.com/elastic/elastic-agent/pkg/features"
"github.com/elastic/elastic-agent/pkg/utils"
)

// fleetAudit variables control retry attempts for contacting fleet
var (
fleetAuditAttempts = 10
fleetAuditWaitInit = time.Second
fleetAuditWaitMax = time.Second * 10
)

// Uninstall uninstalls persistently Elastic Agent on the system.
func Uninstall(cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *progressbar.ProgressBar) error {
func Uninstall(ctx context.Context, cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *progressbar.ProgressBar) error {
cwd, err := os.Getwd()
if err != nil {
return fmt.Errorf("unable to get current working directory")
Expand All @@ -58,8 +71,6 @@ func Uninstall(cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *pr
return fmt.Errorf("failed trying to kill any running watcher: %w", err)
}

ctx := context.Background()

// check if the agent was installed using --unprivileged by checking the file vault for the agent secret (needed on darwin to correctly load the vault)
unprivileged, err := checkForUnprivilegedVault(ctx)
if err != nil {
Expand Down Expand Up @@ -100,6 +111,27 @@ func Uninstall(cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *pr
}
}

// will only notify fleet of the uninstall command if it can gather config and agentinfo, and is not a stand-alone install
notifyFleet := false
var ai *info.AgentInfo
c, err := operations.LoadFullAgentConfig(ctx, log, cfgFile, false, unprivileged)
if err != nil {
pt.Describe(fmt.Sprintf("unable to read agent config to determine if notifying Fleet is needed: %v", err))
}
cfg, err := configuration.NewFromConfig(c)
if err != nil {
pt.Describe(fmt.Sprintf("notify Fleet: unable to transform *config.Config to *configuration.Configuration: %v", err))
}

if cfg != nil && !configuration.IsStandalone(cfg.Fleet) {
ai, err = info.NewAgentInfo(ctx, false)
if err != nil {
pt.Describe(fmt.Sprintf("unable to read agent info, Fleet will not be notified of uninstall: %v", err))
} else {
notifyFleet = true
}
}

// remove existing directory
pt.Describe("Removing install directory")
err = RemovePath(topPath)
Expand All @@ -112,9 +144,66 @@ func Uninstall(cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *pr
}
pt.Describe("Removed install directory")

if notifyFleet {
notifyFleetAuditUninstall(ctx, log, pt, cfg, ai) //nolint:errcheck // ignore the error as we can't act on it
}

return nil
}

// notifyFleetAuditUninstall will attempt to notify fleet-server of the agent's uninstall.
//
// There are retries for the attempt after a 10s wait, but it is a best-effort approach.
func notifyFleetAuditUninstall(ctx context.Context, log *logp.Logger, pt *progressbar.ProgressBar, cfg *configuration.Configuration, ai *info.AgentInfo) error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
pt.Describe("Attempting to notify Fleet of uninstall")
client, err := fleetclient.NewAuthWithConfig(log, cfg.Fleet.AccessAPIKey, cfg.Fleet.Client)
if err != nil {
pt.Describe(fmt.Sprintf("notify Fleet: unable to create fleetapi client: %v", err))
return err
}
cmd := fleetapi.NewAuditUnenrollCmd(ai, client)
req := &fleetapi.AuditUnenrollRequest{
Reason: fleetapi.ReasonUninstall,
Timestamp: time.Now().UTC(),
}
jitterBackoff := backoffWithContext(ctx)
for i := 0; i < fleetAuditAttempts; i++ {
resp, err := cmd.Execute(ctx, req)
if err != nil {
var reqErr *fleetapi.ReqError
// Do not retry if it was a context error, or an error with the request.
if errors.Is(err, context.Canceled) {
return ctx.Err()
} else if errors.As(err, &reqErr) {
pt.Describe(fmt.Sprintf("notify Fleet: encountered unretryable error: %v", err))
return err
}
pt.Describe(fmt.Sprintf("notify Fleet: network error, retry in %v.", jitterBackoff.NextWait()))
jitterBackoff.Wait()
continue
}
resp.Body.Close()
switch resp.StatusCode {
case http.StatusOK:
pt.Describe("Successfully notified Fleet about uninstall")
return nil
case http.StatusBadRequest, http.StatusUnauthorized, http.StatusConflict:
// BadRequest are not retried because the request body is incorrect and will not be accepted
// Unauthorized are not retried because the API key has been invalidated; unauthorized is listed here but will be returned as a fleetapi.ReqError
// Conflict will not retry because in this case Endpoint has indicated that it is orphaned and we do not want to overwrite that annotation
pt.Describe(fmt.Sprintf("notify Fleet: failed with status code %d (no retries)", resp.StatusCode))
return fmt.Errorf("unretryable return status: %d", resp.StatusCode)
default:
pt.Describe(fmt.Sprintf("notify Fleet: failed with status code %d (retry in %v)", resp.StatusCode, jitterBackoff.NextWait()))
jitterBackoff.Wait()
}
}
pt.Describe("notify Fleet: failed")
return fmt.Errorf("notify Fleet: failed")
}

// EnsureStoppedService ensures that the installed service is stopped.
func EnsureStoppedService(topPath string, pt *progressbar.ProgressBar) (service.Status, error) {
status, _ := StatusService(topPath)
Expand Down Expand Up @@ -398,3 +487,13 @@ func killWatcher(pt *progressbar.ProgressBar) error {
<-time.After(1 * time.Second)
}
}

func backoffWithContext(ctx context.Context) backoff.Backoff {
ch := make(chan struct{})
bo := backoff.NewEqualJitterBackoff(ch, fleetAuditWaitInit, fleetAuditWaitMax)
go func() {
<-ctx.Done()
close(ch)
}()
return bo
}
108 changes: 108 additions & 0 deletions internal/pkg/agent/install/uninstall_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,28 @@ package install
import (
"context"
"fmt"
"io"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"runtime"
"testing"
"time"

"github.com/schollz/progressbar/v3"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/zap"

"github.com/elastic/elastic-agent-libs/logp"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/info"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/secret"
"github.com/elastic/elastic-agent/internal/pkg/agent/configuration"
"github.com/elastic/elastic-agent/internal/pkg/agent/vault"
"github.com/elastic/elastic-agent/internal/pkg/fleetapi/client"
"github.com/elastic/elastic-agent/internal/pkg/remote"
)

func Test_checkForUnprivilegedVault(t *testing.T) {
Expand Down Expand Up @@ -119,3 +129,101 @@ func initFileVault(t *testing.T, ctx context.Context, testVaultPath string, keys
require.NoError(t, err, "error setting up key %q = %0x", k, v)
}
}

func TestNotifyFleetAuditUnenroll(t *testing.T) {
fleetAuditWaitInit = time.Millisecond * 10
fleetAuditWaitMax = time.Millisecond * 100
t.Cleanup(func() {
fleetAuditWaitInit = time.Second
fleetAuditWaitMax = time.Second * 10
})

tests := []struct {
name string
getServer func() *httptest.Server
err error
}{{
name: "succeeds after a retry",
getServer: func() *httptest.Server {
callCount := 0
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if callCount == 0 {
callCount++
w.WriteHeader(http.StatusNotFound)
return
}
callCount++
w.WriteHeader(http.StatusOK)
}))
return server
},
err: nil,
}, {
name: "returns 401",
getServer: func() *httptest.Server {
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusUnauthorized)
}))
},
err: client.ErrInvalidAPIKey,
}, {
name: "returns 409",
getServer: func() *httptest.Server {
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusConflict)
}))
},
err: fmt.Errorf("unretryable return status: 409"),
}}

log, _ := logp.NewInMemory("test", zap.NewDevelopmentEncoderConfig())
pt := progressbar.NewOptions(-1, progressbar.OptionSetWriter(io.Discard))
ai := &info.AgentInfo{}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
server := tc.getServer()
defer server.Close()

cfg := &configuration.Configuration{
Fleet: &configuration.FleetAgentConfig{
AccessAPIKey: "example-key",
Client: remote.Config{
Protocol: remote.ProtocolHTTP,
Host: server.URL,
},
},
}
err := notifyFleetAuditUninstall(context.Background(), log, pt, cfg, ai)
if tc.err == nil {
assert.NoError(t, err)
} else {
assert.ErrorContains(t, err, tc.err.Error())
}
})
}

t.Run("fails with no retries", func(t *testing.T) {
fleetAuditAttempts = 1
t.Cleanup(func() {
fleetAuditAttempts = 10
})
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
}))
defer server.Close()

cfg := &configuration.Configuration{
Fleet: &configuration.FleetAgentConfig{
AccessAPIKey: "example-key",
Client: remote.Config{
Protocol: remote.ProtocolHTTP,
Host: server.URL,
},
},
}
err := notifyFleetAuditUninstall(context.Background(), log, pt, cfg, ai)
assert.EqualError(t, err, "notify Fleet: failed")

})
}
Loading