Skip to content

Commit

Permalink
VAULT-31409: trace postUnseal function (#28895)
Browse files Browse the repository at this point in the history
* initial implementation of unseal trace

* close file if we fail to start the trace

didn't bother to check the error from traceFile.Close()

* use reloadable config instead of env var

* license

* remove leftover

* allow setting custom dir and remove new package

* bring back StartDebugTrace

after talking to Kuba it sounds like it's a good idea to try to move stuff out of core, so even if there's no immediate need for a generic debug trace function it's still fair to add it

* track postUnseal instead of unsealInternal

also some usability improvements from manual testing

* address PR comments

* address security review

there were concerns about using the /tmp directory because of permissions, or having a default dir at all, so now it's required to set a dir in order to generate the traces.

* add unit tests to StartDebugTrace

* move back to default dir

* document new parameters

* add tiny integration test

* avoid column in trace filename

sounds like it might be forbidden in Windows and possibly cause problems in some MacOS applications.

* address PR feedback

* add go doc to test

CI was complaining about missing comments on the new test function. It feels a bit silly to require this of tests but whatever XD

* fix tests
  • Loading branch information
bosouza authored Nov 26, 2024
1 parent 42552f6 commit a2c467c
Show file tree
Hide file tree
Showing 10 changed files with 272 additions and 1 deletion.
3 changes: 3 additions & 0 deletions changelog/28895.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
core: Added new `enable_post_unseal_trace` and `post_unseal_trace_directory` config options to generate Go traces during the post-unseal step for debug purposes.
```
16 changes: 16 additions & 0 deletions command/server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ type Config struct {
License string `hcl:"-"`
LicensePath string `hcl:"license_path"`
DisableSSCTokens bool `hcl:"-"`

EnablePostUnsealTrace bool `hcl:"enable_post_unseal_trace"`
PostUnsealTraceDir string `hcl:"post_unseal_trace_directory"`
}

const (
Expand Down Expand Up @@ -425,6 +428,16 @@ func (c *Config) Merge(c2 *Config) *Config {
result.LicensePath = c2.LicensePath
}

result.EnablePostUnsealTrace = c.EnablePostUnsealTrace
if c2.EnablePostUnsealTrace {
result.EnablePostUnsealTrace = c2.EnablePostUnsealTrace
}

result.PostUnsealTraceDir = c.PostUnsealTraceDir
if c2.PostUnsealTraceDir != "" {
result.PostUnsealTraceDir = c2.PostUnsealTraceDir
}

// Use values from top-level configuration for storage if set
if storage := result.Storage; storage != nil {
if result.APIAddr != "" {
Expand Down Expand Up @@ -1150,6 +1163,9 @@ func (c *Config) Sanitized() map[string]interface{} {
"detect_deadlocks": c.DetectDeadlocks,

"imprecise_lease_role_tracking": c.ImpreciseLeaseRoleTracking,

"enable_post_unseal_trace": c.EnablePostUnsealTrace,
"post_unseal_trace_directory": c.PostUnsealTraceDir,
}
for k, v := range sharedResult {
result[k] = v
Expand Down
2 changes: 2 additions & 0 deletions command/server/config_test_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,8 @@ func testConfig_Sanitized(t *testing.T) {
},
"administrative_namespace_path": "admin/",
"imprecise_lease_role_tracking": false,
"enable_post_unseal_trace": true,
"post_unseal_trace_directory": "/tmp",
}

addExpectedEntSanitizedConfig(expected, []string{"http"})
Expand Down
2 changes: 2 additions & 0 deletions command/server/test-fixtures/config3.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,5 @@ raw_storage_endpoint = true
disable_sealwrap = true
disable_sentinel_trace = true
administrative_namespace_path = "admin/"
enable_post_unseal_trace = true
post_unseal_trace_directory = "/tmp"
63 changes: 63 additions & 0 deletions helper/trace/debug_trace.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1

package trace

import (
"fmt"
"os"
"path/filepath"
"runtime/trace"
"time"
)

func StartDebugTrace(dir string, filePrefix string) (file string, stop func() error, err error) {
dirMustExist := true
if dir == "" {
dirMustExist = false // if a dir is provided it must exist, otherwise we'll create a default one
dir = filepath.Join(os.TempDir(), "vault-traces")
}

d, err := os.Stat(dir)
if err != nil && !os.IsNotExist(err) {
return "", nil, fmt.Errorf("failed to stat trace directory %q: %s", dir, err)
}

if os.IsNotExist(err) && dirMustExist {
return "", nil, fmt.Errorf("trace directory %q does not exist", dir)
}

if !os.IsNotExist(err) && !d.IsDir() {
return "", nil, fmt.Errorf("trace directory %q is not a directory", dir)
}

if os.IsNotExist(err) {
if err := os.Mkdir(dir, 0o700); err != nil {
return "", nil, fmt.Errorf("failed to create trace directory %q: %s", dir, err)
}
}

// would prefer a more human readable time reference in the file name but the column
// character can cause problems in filenames
fileName := fmt.Sprintf("%s-%d.trace", filePrefix, time.Now().Unix())
traceFile, err := filepath.Abs(filepath.Join(dir, fileName))
if err != nil {
return "", nil, fmt.Errorf("failed to get absolute path for trace file: %s", err)
}
f, err := os.Create(traceFile)
if err != nil {
return "", nil, fmt.Errorf("failed to create trace file %q: %s", traceFile, err)
}

if err := trace.Start(f); err != nil {
f.Close()
return "", nil, fmt.Errorf("failed to start trace: %s", err)
}

stop = func() error {
trace.Stop()
return f.Close()
}

return f.Name(), stop, nil
}
111 changes: 111 additions & 0 deletions helper/trace/debug_trace_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1

package trace

import (
"os"
"path/filepath"
"runtime"
"testing"

"github.com/stretchr/testify/require"
)

// TestStartDebugTrace tests the debug trace functionality creating real
// files and traces.
func TestStartDebugTrace(t *testing.T) {
t.Run("error_on_non_existent_dir", func(t *testing.T) {
_, _, err := StartDebugTrace("non-existent-dir", "filePrefix")
require.Error(t, err)
require.Contains(t, err.Error(), "does not exist")
})

t.Run("error_on_non_dir", func(t *testing.T) {
f, err := os.CreateTemp("", "")
require.NoError(t, err)
require.NoError(t, f.Close())
_, _, err = StartDebugTrace(f.Name(), "")
require.Error(t, err)
require.Contains(t, err.Error(), "is not a directory")
})

t.Run("error_on_failed_to_create_trace_file", func(t *testing.T) {
noWriteFolder := filepath.Join(os.TempDir(), "no-write-permissions")
// create folder without write permission
err := os.Mkdir(noWriteFolder, 0o000)
t.Cleanup(func() {
os.RemoveAll(noWriteFolder)
})
require.NoError(t, err)
_, _, err = StartDebugTrace(noWriteFolder, "")
require.Error(t, err)
require.Contains(t, err.Error(), "failed to create trace file")
})

t.Run("error_trying_to_start_second_concurrent_trace", func(t *testing.T) {
dir, err := os.MkdirTemp("", "")
require.NoError(t, err)
t.Cleanup(func() {
os.RemoveAll(dir)
})
_, stop, err := StartDebugTrace(dir, "filePrefix")
require.NoError(t, err)
_, stopNil, err := StartDebugTrace(dir, "filePrefix")
require.Error(t, err)
require.Contains(t, err.Error(), "failed to start trace")
require.NoError(t, stop())
require.Nil(t, stopNil)
})

t.Run("error_when_stating_tmp_dir_with_restricted_permissions", func(t *testing.T) {
// this test relies on setting TMPDIR so skip it if we're not on a Unix system
if runtime.GOOS == "windows" {
t.Skip("skipping test on Windows")
}

tmpMissingPermissions := filepath.Join(t.TempDir(), "missing_permissions")
err := os.Mkdir(tmpMissingPermissions, 0o000)
require.NoError(t, err)
t.Setenv("TMPDIR", tmpMissingPermissions)
_, _, err = StartDebugTrace("", "filePrefix")
require.Error(t, err)
require.Contains(t, err.Error(), "failed to stat trace directory")
})

t.Run("successful_trace_generates_non_empty_file", func(t *testing.T) {
dir, err := os.MkdirTemp("", "")
require.NoError(t, err)
t.Cleanup(func() {
os.RemoveAll(dir)
})
file, stop, err := StartDebugTrace(dir, "filePrefix")
require.NoError(t, err)
require.NoError(t, stop())
f, err := os.Stat(file)
require.NoError(t, err)
require.Greater(t, f.Size(), int64(0))
})

t.Run("successful_creation_of_tmp_dir", func(t *testing.T) {
os.RemoveAll(filepath.Join(os.TempDir(), "vault-traces"))
file, stop, err := StartDebugTrace("", "filePrefix")
require.NoError(t, err)
require.NoError(t, stop())
require.Contains(t, file, filepath.Join(os.TempDir(), "vault-traces", "filePrefix"))
f, err := os.Stat(file)
require.NoError(t, err)
require.Greater(t, f.Size(), int64(0))
})

t.Run("successful_trace_with_existing_tmp_dir", func(t *testing.T) {
os.Mkdir(filepath.Join(os.TempDir(), "vault-traces"), 0o700)
file, stop, err := StartDebugTrace("", "filePrefix")
require.NoError(t, err)
require.NoError(t, stop())
require.Contains(t, file, filepath.Join(os.TempDir(), "vault-traces", "filePrefix"))
f, err := os.Stat(file)
require.NoError(t, err)
require.Greater(t, f.Size(), int64(0))
})
}
2 changes: 2 additions & 0 deletions http/sys_config_state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,8 @@ func TestSysConfigState_Sanitized(t *testing.T) {
"storage": tc.expectedStorageOutput,
"administrative_namespace_path": "",
"imprecise_lease_role_tracking": false,
"enable_post_unseal_trace": false,
"post_unseal_trace_directory": "",
}

if tc.expectedHAStorageOutput != nil {
Expand Down
40 changes: 40 additions & 0 deletions vault/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ import (
"github.com/hashicorp/vault/helper/metricsutil"
"github.com/hashicorp/vault/helper/namespace"
"github.com/hashicorp/vault/helper/osutil"
"github.com/hashicorp/vault/helper/trace"
"github.com/hashicorp/vault/physical/raft"
"github.com/hashicorp/vault/sdk/helper/certutil"
"github.com/hashicorp/vault/sdk/helper/consts"
Expand Down Expand Up @@ -2704,6 +2705,10 @@ func (c *Core) runUnsealSetupForPrimary(ctx context.Context, logger log.Logger)
// requires the Vault to be unsealed such as mount tables, logical backends,
// credential stores, etc.
func (c *Core) postUnseal(ctx context.Context, ctxCancelFunc context.CancelFunc, unsealer UnsealStrategy) (retErr error) {
if stopTrace := c.tracePostUnsealIfEnabled(); stopTrace != nil {
defer stopTrace()
}

defer metrics.MeasureSince([]string{"core", "post_unseal"}, time.Now())

// Clear any out
Expand Down Expand Up @@ -2819,6 +2824,41 @@ func (c *Core) postUnseal(ctx context.Context, ctxCancelFunc context.CancelFunc,
return nil
}

// tracePostUnsealIfEnabled checks if post-unseal tracing is enabled in the server
// config and starts a go trace if it is, returning a stop function to be called once
// the post-unseal process is complete.
func (c *Core) tracePostUnsealIfEnabled() (stop func()) {
// use rawConfig to allow config hot-reload of EnablePostUnsealTrace via SIGHUP
conf := c.rawConfig.Load()
if conf == nil {
c.logger.Warn("failed to get raw config to check enable_post_unseal_trace")
return nil
}

if !conf.(*server.Config).EnablePostUnsealTrace {
return nil
}

dir := conf.(*server.Config).PostUnsealTraceDir

traceFile, stopTrace, err := trace.StartDebugTrace(dir, "post-unseal")
if err != nil {
c.logger.Warn("failed to start post-unseal trace", "error", err)
return nil
}

c.logger.Info("post-unseal trace started", "file", traceFile)

return func() {
err := stopTrace()
if err != nil {
c.logger.Warn("failure when stopping post-unseal trace", "error", err)
return
}
c.logger.Info("post-unseal trace completed", "file", traceFile)
}
}

// preSeal is invoked before the barrier is sealed, allowing
// for any state teardown required.
func (c *Core) preSeal() error {
Expand Down
23 changes: 22 additions & 1 deletion vault/external_tests/pprof/pprof_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,33 @@ import (
"encoding/json"
"io"
"net/http"
"os"
"strconv"
"strings"
"testing"

"github.com/hashicorp/go-cleanhttp"
"github.com/hashicorp/vault/command/server"
vaulthttp "github.com/hashicorp/vault/http"
"github.com/hashicorp/vault/sdk/helper/testhelpers/schema"
"github.com/hashicorp/vault/vault"
"github.com/stretchr/testify/require"
"golang.org/x/net/http2"
)

func TestSysPprof(t *testing.T) {
t.Parallel()
cluster := vault.NewTestCluster(t, nil, &vault.TestClusterOptions{

// trace test setup
dir, err := os.MkdirTemp("", "vault-trace-test")
require.NoError(t, err)

cluster := vault.NewTestCluster(t, &vault.CoreConfig{
RawConfig: &server.Config{
EnablePostUnsealTrace: true,
PostUnsealTraceDir: dir,
},
}, &vault.TestClusterOptions{
HandlerFunc: vaulthttp.Handler,
RequestResponseCallback: schema.ResponseValidatingCallback(t),
})
Expand All @@ -30,6 +43,14 @@ func TestSysPprof(t *testing.T) {
core := cluster.Cores[0].Core
vault.TestWaitActive(t, core)
SysPprof_Test(t, cluster)

// draft trace test onto pprof one to avoid increasing test runtime with additional clusters
files, err := os.ReadDir(dir)
require.NoError(t, err)
require.Greater(t, len(files), 0)
traceFile, err := files[0].Info()
require.NoError(t, err)
require.Greater(t, traceFile.Size(), int64(0))
}

func TestSysPprof_MaxRequestDuration(t *testing.T) {
Expand Down
11 changes: 11 additions & 0 deletions website/content/docs/configuration/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,17 @@ can have a negative effect on performance due to the tracking of each lock attem
When `imprecise_lease_role_tracking` is set to true and a new role-based quota is enabled, subsequent lease counts start from 0.
`imprecise_lease_role_tracking` affects role-based lease count quotas, but reduces latencies when not using role based quotas.

- `enable_post_unseal_trace` `(bool: false)` - Enables the server to generate a Go trace during the execution of the
`core.postUnseal` function for debug purposes. The resulting trace can be viewed with the `go tool trace` command. The output
directory can be specified with the `post_unseal_trace_directory` parameter. This should only be enabled temporarily for
debugging purposes as it can have a significant performance impact. This can be updated on a running Vault process with a
SIGHUP signal.

- `post_unseal_trace_directory` `(string: "")` - Specifies the directory where the trace file will be written, which must exist
and be writable by the Vault process. If not specified it will create a subdirectory `vault-traces` under the result from
[os.TempDir()](https://pkg.go.dev/os#TempDir) (usually `/tmp` on Unix systems). This can be updated on a running Vault process
with a SIGHUP signal.

### High availability parameters

The following parameters are used on backends that support [high availability][high-availability].
Expand Down

0 comments on commit a2c467c

Please sign in to comment.