Skip to content

Commit

Permalink
server: add cluster setting to control tenant trace redaction
Browse files Browse the repository at this point in the history
Previously, all operations run on the server on behalf of a secondary
tenant would have their logs redacted. This patch introduces a new
cluster setting that controls whether logs should be redacted or
not. This is only settable by the system tenant and maintains the
previous behaviour by default.

Release note (ops change): introduce a new cluster setting
(`server.secondary_tenants.redact_trace`) which controls if traces
should be redacted for ops run on behalf of secondary tenants.
  • Loading branch information
arulajmani committed Aug 14, 2022
1 parent d25cb57 commit 8ebdf27
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 13 deletions.
1 change: 1 addition & 0 deletions docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
<tr><td><code>server.oidc_authentication.redirect_url</code></td><td>string</td><td><code>https://localhost:8080/oidc/v1/callback</code></td><td>sets OIDC redirect URL via a URL string or a JSON string containing a required `redirect_urls` key with an object that maps from region keys to URL strings (URLs should point to your load balancer and must route to the path /oidc/v1/callback) </td></tr>
<tr><td><code>server.oidc_authentication.scopes</code></td><td>string</td><td><code>openid</code></td><td>sets OIDC scopes to include with authentication request (space delimited list of strings, required to start with `openid`)</td></tr>
<tr><td><code>server.rangelog.ttl</code></td><td>duration</td><td><code>720h0m0s</code></td><td>if nonzero, range log entries older than this duration are deleted every 10m0s. Should not be lowered below 24 hours.</td></tr>
<tr><td><code>server.secondary_tenants.redact_trace.enabled</code></td><td>boolean</td><td><code>true</code></td><td>controls if server side traces are redacted for tenant operations</td></tr>
<tr><td><code>server.shutdown.connection_wait</code></td><td>duration</td><td><code>0s</code></td><td>the maximum amount of time a server waits for all SQL connections to be closed before proceeding with a drain. (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)</td></tr>
<tr><td><code>server.shutdown.drain_wait</code></td><td>duration</td><td><code>0s</code></td><td>the amount of time a server waits in an unready state before proceeding with a drain (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting. --drain-wait is to specify the duration of the whole draining process, while server.shutdown.drain_wait is to set the wait time for health probes to notice that the node is not ready.)</td></tr>
<tr><td><code>server.shutdown.lease_transfer_wait</code></td><td>duration</td><td><code>5s</code></td><td>the timeout for a single iteration of the range lease transfer phase of draining (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting)</td></tr>
Expand Down
5 changes: 4 additions & 1 deletion pkg/server/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"testing"

"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
Expand Down Expand Up @@ -57,7 +58,9 @@ func BenchmarkSetupSpanForIncomingRPC(b *testing.B) {

b.ResetTimer()
for i := 0; i < b.N; i++ {
_, sp := setupSpanForIncomingRPC(ctx, roachpb.SystemTenantID, ba, tr)
_, sp := setupSpanForIncomingRPC(
ctx, roachpb.SystemTenantID, ba, tr, cluster.MakeTestingClusterSettings(),
)
sp.finish(ctx, nil /* br */)
}
})
Expand Down
34 changes: 22 additions & 12 deletions pkg/server/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,12 @@ var (
10*time.Second,
settings.NonNegativeDurationWithMaximum(maxGraphiteInterval),
).WithPublic()
redactServerTracesForSecondaryTenants = settings.RegisterBoolSetting(
settings.SystemOnly,
"server.secondary_tenants.redact_trace.enabled",
"controls if server side traces are redacted for tenant operations",
true,
).WithPublic()
)

type nodeMetrics struct {
Expand Down Expand Up @@ -1097,6 +1103,7 @@ type spanForRequest struct {
sp *tracing.Span
needRecording bool
tenID roachpb.TenantID
settings *cluster.Settings
}

// finish finishes the span. If the span was recording and br is not nil, the
Expand All @@ -1114,17 +1121,15 @@ func (sp *spanForRequest) finish(ctx context.Context, br *roachpb.BatchResponse)

rec = sp.sp.FinishAndGetConfiguredRecording()
if rec != nil {
// Decide if the trace for this RPC, if any, will need to be redacted. It
// needs to be redacted if the response goes to a tenant. In case the request
// is local, then the trace might eventually go to a tenant (and tenID might
// be set), but it will go to the tenant only indirectly, through the response
// of a parent RPC. In that case, that parent RPC is responsible for the
// redaction.
// Decide if the trace for this RPC, if any, will need to be redacted. In
// general, responses sent to a tenant are redacted unless indicated
// otherwise by the cluster setting below.
//
// Tenants get a redacted recording, i.e. with anything
// sensitive stripped out of the verbose messages. However,
// structured payloads stay untouched.
needRedaction := sp.tenID != roachpb.SystemTenantID
// Even if the recording sent to a tenant is redacted (anything sensitive
// is stripped out of the verbose messages), structured payloads
// stay untouched.
needRedaction := sp.tenID != roachpb.SystemTenantID &&
redactServerTracesForSecondaryTenants.Get(&sp.settings.SV)
if needRedaction {
if err := redactRecordingForTenant(sp.tenID, rec); err != nil {
log.Errorf(ctx, "error redacting trace recording: %s", err)
Expand All @@ -1151,11 +1156,15 @@ func (sp *spanForRequest) finish(ctx context.Context, br *roachpb.BatchResponse)
func (n *Node) setupSpanForIncomingRPC(
ctx context.Context, tenID roachpb.TenantID, ba *roachpb.BatchRequest,
) (context.Context, spanForRequest) {
return setupSpanForIncomingRPC(ctx, tenID, ba, n.storeCfg.AmbientCtx.Tracer)
return setupSpanForIncomingRPC(ctx, tenID, ba, n.storeCfg.AmbientCtx.Tracer, n.storeCfg.Settings)
}

func setupSpanForIncomingRPC(
ctx context.Context, tenID roachpb.TenantID, ba *roachpb.BatchRequest, tr *tracing.Tracer,
ctx context.Context,
tenID roachpb.TenantID,
ba *roachpb.BatchRequest,
tr *tracing.Tracer,
settings *cluster.Settings,
) (context.Context, spanForRequest) {
var newSpan *tracing.Span
parentSpan := tracing.SpanFromContext(ctx)
Expand Down Expand Up @@ -1199,6 +1208,7 @@ func setupSpanForIncomingRPC(
needRecording: needRecordingCollection,
tenID: tenID,
sp: newSpan,
settings: settings,
}
}

Expand Down

0 comments on commit 8ebdf27

Please sign in to comment.