Skip to content

Commit

Permalink
Merge #66842
Browse files Browse the repository at this point in the history
66842: envutil,server: report more env vars upon server startup and in telemetry r=cameronnunez a=knz

Supersedes #66805. 
Informs #66838 (option 2).

We want more visibility into the env vars that influence the server's
state, for example HTTP_PROXY or more generally those env vars that
our dependency and the go runtime observe.

However, we need to be careful:

- we can't report *all* env vars because there are many cases where
  even the name of an env var or its presence is confidential
  information.
- even for the env vars where we're confident that we can
  report that it is set, we cannot always report its value
  because the value itself could be confidential (e.g. HTTP_PROXY).

This patch thus adds env var reporting with a nuance between the
following:

- safe name, safe value (e.g. GOMAXPROCS)
- safe name, redactable value (e.g. TERM)
- safe name, no value (e.g HTTP_PROXY)

Example:
![image](https://user-images.githubusercontent.com/642886/127644871-816b4e0d-4de8-4e37-a451-dcdafc8b4bc4.png)


Release note (cli change): CockroachDB server nodes now report
more environment variables in logs upon startup. Only certain
environment variables that may have an influence on the server's
behavior are reported.

Co-authored-by: Raphael 'kena' Poss <[email protected]>
  • Loading branch information
craig[bot] and knz committed Jul 30, 2021
2 parents e20731d + f62a585 commit 1cf862c
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 16 deletions.
31 changes: 30 additions & 1 deletion pkg/cli/interactive_tests/test_secure.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ end_test

proc start_secure_server {argv certs_dir extra} {
report "BEGIN START SECURE SERVER"
system "$argv start-single-node --host=localhost --socket-dir=. --certs-dir=$certs_dir --pid-file=server_pid -s=path=logs/db --background $extra >>expect-cmd.log 2>&1;
# We add some arbitrary env vars here to check env var
# exposure in one of the sub tests below.
system "env AWS_ACCESS_KEY=supersecret TERM=mycoolterm GODEBUG=mycooldebugvar $argv start-single-node --host=localhost --socket-dir=. --certs-dir=$certs_dir --pid-file=server_pid -s=path=logs/db --background $extra >>expect-cmd.log 2>&1;
$argv sql --certs-dir=$certs_dir -e 'select 1'"
report "END START SECURE SERVER"
}
Expand All @@ -37,6 +39,16 @@ proc stop_secure_server {argv certs_dir} {

start_secure_server $argv $certs_dir ""

start_test "Check that the env vars are properly reported in the log file."
# NB: this grep checks the aws access key was redacted.
system "grep AWS_ACCESS_KEY='\\.\\.\\.' logs/db/logs/cockroach.log"
system "grep GODEBUG=mycooldebugvar logs/db/logs/cockroach.log"
# NB: this grep checks the value of the TERM var was enclosed
# in redaction markers.
system "grep TERM=..*mycoolterm logs/db/logs/cockroach.log"
end_test


start_test "Check 'node ls' works with certificates."
send "$argv node ls --certs-dir=$certs_dir\r"
eexpect "id"
Expand Down Expand Up @@ -176,6 +188,23 @@ eexpect "cluster.organization"
eexpect $prompt
end_test

start_test "Check that env vars are reported in the node report"
send "$python $pyfile cookie_root.txt 'https://localhost:8080/_status/nodes/1'> logs/db/node.txt\r"
eexpect $prompt
system "grep AWS_ACCESS_KEY='\\.\\.\\.' logs/db/node.txt"
system "grep GODEBUG=mycooldebugvar logs/db/node.txt"
system "grep TERM=mycoolterm logs/db/node.txt"
end_test

start_test "Check that env vars are NOT reported in the diagnostic report"
send "$python $pyfile cookie_root.txt 'https://localhost:8080/_status/diagnostics/1'> logs/db/diag.txt\r"
eexpect $prompt
system "if grep AWS_ACCESS_KEY logs/db/diag.txt; then echo FAIL; fi"
system "if grep TERM logs/db/diag.txt; then echo FAIL; fi"
system "if grep GODEBUG logs/db/diag.txt; then echo FAIL; fi"
end_test


# Now test the cookies with non-TLS http.
stop_secure_server $argv $certs_dir

Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -963,7 +963,7 @@ func clientFlagsRPC() string {
func reportConfiguration(ctx context.Context) {
serverCfg.Report(ctx)
if envVarsUsed := envutil.GetEnvVarsUsed(); len(envVarsUsed) > 0 {
log.Ops.Infof(ctx, "using local environment variables: %s", strings.Join(envVarsUsed, ", "))
log.Ops.Infof(ctx, "using local environment variables:\n%s", redact.Join("\n", envVarsUsed))
}
// If a user ever reports "bad things have happened", any
// troubleshooting steps will want to rule out that the user was
Expand Down
4 changes: 2 additions & 2 deletions pkg/server/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -1362,7 +1362,7 @@ func (s *statusServer) Nodes(
return nil, err
}

resp, _, err := s.nodesHelper(ctx, 0, 0)
resp, _, err := s.nodesHelper(ctx, 0 /* limit */, 0 /* offset */)
return resp, err
}

Expand All @@ -1371,7 +1371,7 @@ func (s *statusServer) Nodes(
func (s *statusServer) ListNodesInternal(
ctx context.Context, req *serverpb.NodesRequest,
) (*serverpb.NodesResponse, error) {
resp, _, err := s.nodesHelper(ctx, 0, 0)
resp, _, err := s.nodesHelper(ctx, 0 /* limit */, 0 /* offset */)
return resp, err
}

Expand Down
1 change: 1 addition & 0 deletions pkg/server/status/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ go_library(
"//pkg/util/system",
"//pkg/util/timeutil",
"@com_github_cockroachdb_errors//:errors",
"@com_github_cockroachdb_redact//:redact",
"@com_github_dustin_go_humanize//:go-humanize",
"@com_github_elastic_gosigar//:gosigar",
"@com_github_shirou_gopsutil//net",
Expand Down
11 changes: 10 additions & 1 deletion pkg/server/status/recorder.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/cockroachdb/cockroach/pkg/util/system"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/redact"
humanize "github.com/dustin/go-humanize"
"github.com/elastic/gosigar"
)
Expand Down Expand Up @@ -446,7 +447,7 @@ func (mr *MetricsRecorder) GenerateNodeStatus(ctx context.Context) *statuspb.Nod
StoreStatuses: make([]statuspb.StoreStatus, 0, lastSummaryCount),
Metrics: make(map[string]float64, lastNodeMetricCount),
Args: os.Args,
Env: envutil.GetEnvVarsUsed(),
Env: flattenStrings(envutil.GetEnvVarsUsed()),
Activity: activity,
NumCpus: int32(system.NumCPU()),
TotalSystemMemory: systemMemory,
Expand Down Expand Up @@ -488,6 +489,14 @@ func (mr *MetricsRecorder) GenerateNodeStatus(ctx context.Context) *statuspb.Nod
return nodeStat
}

func flattenStrings(s []redact.RedactableString) []string {
res := make([]string, len(s))
for i, v := range s {
res[i] = v.StripMarkers()
}
return res
}

// WriteNodeStatus writes the supplied summary to the given client. If mustExist
// is true, the key must already exist and must not change while being updated,
// otherwise an error is returned -- if false, the status is always written.
Expand Down
1 change: 1 addition & 0 deletions pkg/util/envutil/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ go_library(
deps = [
"//pkg/util/humanizeutil",
"//pkg/util/syncutil",
"@com_github_cockroachdb_redact//:redact",
],
)

Expand Down
132 changes: 121 additions & 11 deletions pkg/util/envutil/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@ import (
"os"
"os/user"
"runtime"
"sort"
"strconv"
"strings"
"time"

"github.com/cockroachdb/cockroach/pkg/util/humanizeutil"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/cockroachdb/redact"
)

type envVarInfo struct {
Expand Down Expand Up @@ -106,23 +108,131 @@ func GetEnvReport() string {

// GetEnvVarsUsed returns the names of all environment variables that
// may have been used.
func GetEnvVarsUsed() []string {
func GetEnvVarsUsed() (result []redact.RedactableString) {
allVarsRaw := os.Environ()
sort.Strings(allVarsRaw)
allVarsValues := make(map[redact.SafeString]string, len(allVarsRaw))
allVarNames := make([]redact.SafeString, 0, len(allVarsRaw))
for _, v := range allVarsRaw {
i := strings.IndexByte(v, '=')
if i < 0 {
continue
}
varName := redact.SafeString(v[:i])
allVarNames = append(allVarNames, varName)
var value string
if i+1 < len(v) {
value = v[i+1:]
}
allVarsValues[varName] = value
}

envVarRegistry.mu.Lock()
defer envVarRegistry.mu.Unlock()

var vars []string
for k, v := range envVarRegistry.cache {
if v.present {
vars = append(vars, k+"="+os.Getenv(k))
for _, varName := range allVarNames {
_, crdbVar := envVarRegistry.cache[string(varName)]
_, safeVar := safeVarRegistry[varName]
if crdbVar || safeVar {
result = append(result, redact.Sprintf("%s=%s", varName, redact.Safe(allVarsValues[varName])))
} else if _, reportable := valueReportableUnsafeVarRegistry[varName]; reportable {
result = append(result, redact.Sprintf("%s=%s", varName, allVarsValues[varName]))
} else if _, reportable := nameReportableUnsafeVarRegistry[varName]; reportable {
result = append(result, redact.Sprintf("%s=...", varName))
}
// For any env var just the name could contain too many sensitive details and we
// don't really want them to show up in logs.
}

for _, name := range []string{"GOGC", "GODEBUG", "GOMAXPROCS", "GOTRACEBACK"} {
if val, ok := os.LookupEnv(name); ok {
vars = append(vars, name+"="+val)
}
}
return vars
return result
}

// safeVarRegistry is the list of variables where we can both report
// the name and the value safely: the value is known to never contain
// sensitive information.
var safeVarRegistry = map[redact.SafeString]struct{}{
"GOGC": {},
"GODEBUG": {},
"GOMAXPROCS": {},
"GOTRACEBACK": {},
}

// valueReportableUnsafeVarRegistry is the list of variables where we can
// report the name safely, and the value as a redactable payload.
// The value may contain sensitive information, but not so sensitive
// that users would be unhappy to see them enclosed within redaction
// markers in log files.
var valueReportableUnsafeVarRegistry = map[redact.SafeString]struct{}{
"DEBUG_HTTP2_GOROUTINES": {},
"GRPC_GO_LOG_SEVERITY_LEVEL": {},
"GRPC_GO_LOG_VERBOSITY_LEVEL": {},
"LANG": {},
"LC_ALL": {},
"LC_COLLATE": {},
"LC_CTYPE": {},
"LC_TIME": {},
"LC_NUMERIC": {},
"LC_MESSAGES": {},
"LS_METRICS_ENABLED": {},
"TERM": {},
"TZ": {},
"ZONEINFO": {},
// From the Go runtime.
"LOCALDOMAIN": {},
"RES_OPTIONS": {},
"HOSTALIASES": {},
"HTTP_PROXY": {},
"HTTPS_PROXY": {},
"NO_PROXY": {},
"REQUEST_METHOD": {},
}

// nameReportableUnsafeVarRegistry is the list of variables where we can
// report the name safely, but not the value in any form because it is
// too likely to contain an unsafe payload that users would be horrified
// to see in a log file, redaction markers or not.
var nameReportableUnsafeVarRegistry = map[redact.SafeString]struct{}{
// GCP.
"GOOGLE_API_USE_MTLS": {},
"GOOGLE_CLOUD_PROJECT": {},
// AWS.
"AWS_ACCESS_KEY": {},
"AWS_ACCESS_KEY_ID": {},
"AWS_PROFILE": {},
"AWS_REGION": {},
"AWS_SDK_LOAD_CONFIG": {},
"AWS_SECRET_ACCESS_KEY": {},
"AWS_SECRET_KEY": {},
"AWS_SESSION_TOKEN": {},
"AWS_SHARED_CREDENTIALS_FILE": {},
// Azure.
"AZURE_ACCESS_TOKEN_FILE": {},
"AZURE_AUTH_LOCATION": {},
"AZURE_CONFIG_DIR": {},
"AZURE_GO_SDK_LOG_FILE": {},
"AZURE_GO_SDK_LOG_LEVEL": {},
// Google auth.
"GAE_APPLICATION": {},
"GAE_DEPLOYMENT_ID": {},
"GAE_ENV": {},
"GAE_INSTANCE": {},
"GAE_LONG_APP_ID": {},
"GAE_MINOR_VERSION": {},
"GAE_MODULE_INSTANCE": {},
"GAE_MODULE_NAME": {},
"GAE_PARTITION": {},
"GAE_SERVICE": {},
// gRPC.
"GRPC_GO_LOG_SEVERITY_LEVEL": {},
"GRPC_GO_LOG_VERBOSITY_LEVEL": {},
// Kerberos.
"KRB5CCNAME": {},
// Pprof.
"PPROF_BINARY_PATH": {},
"PPROF_TMPDIR": {},
"PPROF_TOOLS": {},
// Sentry-go.
"SENTRY_RELEASE": {},
}

// GetShellCommand returns a complete command to run with a prefix of the command line.
Expand Down

0 comments on commit 1cf862c

Please sign in to comment.