diff --git a/pkg/ccl/logictestccl/testdata/logic_test/tenant_capability b/pkg/ccl/logictestccl/testdata/logic_test/tenant_capability index 790382695505..b4f9559a17bb 100644 --- a/pkg/ccl/logictestccl/testdata/logic_test/tenant_capability +++ b/pkg/ccl/logictestccl/testdata/logic_test/tenant_capability @@ -36,6 +36,7 @@ can_admin_scatter true can_admin_split true can_admin_unsplit false can_check_consistency false +can_debug_process false can_use_nodelocal_storage false can_view_node_info false can_view_tsdb_metrics false @@ -61,6 +62,7 @@ can_admin_scatter true can_admin_split true can_admin_unsplit false can_check_consistency false +can_debug_process false can_use_nodelocal_storage false can_view_node_info false can_view_tsdb_metrics false @@ -79,6 +81,7 @@ can_admin_scatter true can_admin_split false can_admin_unsplit false can_check_consistency false +can_debug_process false can_use_nodelocal_storage false can_view_node_info false can_view_tsdb_metrics false @@ -104,6 +107,7 @@ can_admin_scatter true can_admin_split true can_admin_unsplit false can_check_consistency false +can_debug_process false can_use_nodelocal_storage false can_view_node_info false can_view_tsdb_metrics false @@ -129,6 +133,7 @@ can_admin_scatter true can_admin_split true can_admin_unsplit false can_check_consistency false +can_debug_process false can_use_nodelocal_storage false can_view_node_info false can_view_tsdb_metrics false @@ -154,6 +159,7 @@ can_admin_scatter true can_admin_split true can_admin_unsplit false can_check_consistency false +can_debug_process false can_use_nodelocal_storage false can_view_node_info true can_view_tsdb_metrics false @@ -172,6 +178,7 @@ can_admin_scatter true can_admin_split false can_admin_unsplit false can_check_consistency false +can_debug_process false can_use_nodelocal_storage false can_view_node_info false can_view_tsdb_metrics false @@ -190,6 +197,7 @@ can_admin_scatter true can_admin_split false can_admin_unsplit false can_check_consistency false +can_debug_process false can_use_nodelocal_storage false can_view_node_info false can_view_tsdb_metrics false @@ -208,6 +216,7 @@ can_admin_scatter true can_admin_split false can_admin_unsplit false can_check_consistency false +can_debug_process false can_use_nodelocal_storage false can_view_node_info false can_view_tsdb_metrics false @@ -227,6 +236,7 @@ can_admin_scatter true can_admin_split true can_admin_unsplit true can_check_consistency true +can_debug_process true can_use_nodelocal_storage true can_view_node_info true can_view_tsdb_metrics true @@ -268,6 +278,7 @@ can_admin_scatter true can_admin_split true can_admin_unsplit false can_check_consistency false +can_debug_process false can_use_nodelocal_storage false can_view_node_info false can_view_tsdb_metrics false @@ -297,6 +308,7 @@ can_admin_scatter true can_admin_split true can_admin_unsplit false can_check_consistency false +can_debug_process false can_use_nodelocal_storage false can_view_node_info false can_view_tsdb_metrics false @@ -331,6 +343,7 @@ can_admin_scatter true can_admin_split true can_admin_unsplit false can_check_consistency false +can_debug_process false can_use_nodelocal_storage false can_view_node_info false can_view_tsdb_metrics false @@ -349,6 +362,7 @@ can_admin_scatter false can_admin_split false can_admin_unsplit false can_check_consistency false +can_debug_process false can_use_nodelocal_storage false can_view_node_info false can_view_tsdb_metrics false @@ -367,6 +381,7 @@ can_admin_scatter true can_admin_split true can_admin_unsplit true can_check_consistency true +can_debug_process true can_use_nodelocal_storage true can_view_node_info true can_view_tsdb_metrics true diff --git a/pkg/ccl/serverccl/server_sql_test.go b/pkg/ccl/serverccl/server_sql_test.go index ffa7aebd0246..3e9ef4a785df 100644 --- a/pkg/ccl/serverccl/server_sql_test.go +++ b/pkg/ccl/serverccl/server_sql_test.go @@ -13,6 +13,7 @@ import ( "errors" "fmt" "io" + "net/http" "strings" "testing" "time" @@ -20,6 +21,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/ccl" "github.com/cockroachdb/cockroach/pkg/ccl/utilccl/licenseccl" + "github.com/cockroachdb/cockroach/pkg/multitenant/tenantcapabilities" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/security" "github.com/cockroachdb/cockroach/pkg/server/systemconfigwatcher/systemconfigwatchertest" @@ -194,6 +196,137 @@ func TestTenantHTTP(t *testing.T) { } +// TestTenantProcessDebugging verifies that in-process SQL tenant servers gate +// process debugging behind capabilities. +func TestTenantProcessDebugging(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + + tc := serverutils.StartNewTestCluster(t, 1, base.TestClusterArgs{ServerArgs: base.TestServerArgs{ + DefaultTestTenant: base.TestTenantDisabled, + }}) + defer tc.Stopper().Stop(ctx) + db := tc.ServerConn(0) + defer db.Close() + + s := tc.Server(0) + + tenant, _, err := s.StartSharedProcessTenant(ctx, + base.TestSharedProcessTenantArgs{ + TenantID: serverutils.TestTenantID(), + TenantName: "processdebug", + }) + require.NoError(t, err) + defer tenant.Stopper().Stop(ctx) + + t.Run("system tenant pprof", func(t *testing.T) { + httpClient, err := s.GetAdminHTTPClient() + require.NoError(t, err) + defer httpClient.CloseIdleConnections() + + url := s.AdminURL().URL + url.Path = url.Path + "/debug/pprof/goroutine" + q := url.Query() + q.Add("debug", "2") + url.RawQuery = q.Encode() + + resp, err := httpClient.Get(url.String()) + require.NoError(t, err) + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + require.Equal(t, http.StatusOK, resp.StatusCode) + require.Contains(t, string(body), "goroutine") + }) + + t.Run("pprof", func(t *testing.T) { + httpClient, err := tenant.GetAdminHTTPClient() + require.NoError(t, err) + defer httpClient.CloseIdleConnections() + + url := tenant.AdminURL().URL + url.Path = url.Path + "/debug/pprof/" + q := url.Query() + q.Add("debug", "2") + url.RawQuery = q.Encode() + + resp, err := httpClient.Get(url.String()) + require.NoError(t, err) + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + require.Equal(t, http.StatusForbidden, resp.StatusCode) + require.Contains(t, string(body), "tenant does not have capability to debug the running process") + + _, err = db.Exec(`ALTER TENANT processdebug GRANT CAPABILITY can_debug_process=true`) + require.NoError(t, err) + + tc.WaitForTenantCapabilities(t, serverutils.TestTenantID(), map[tenantcapabilities.ID]string{ + tenantcapabilities.CanDebugProcess: "true", + }) + + resp, err = httpClient.Get(url.String()) + require.NoError(t, err) + defer resp.Body.Close() + body, err = io.ReadAll(resp.Body) + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + require.Contains(t, string(body), "goroutine") + + _, err = db.Exec(`ALTER TENANT processdebug REVOKE CAPABILITY can_debug_process`) + require.NoError(t, err) + + tc.WaitForTenantCapabilities(t, serverutils.TestTenantID(), map[tenantcapabilities.ID]string{ + tenantcapabilities.CanDebugProcess: "false", + }) + }) + + t.Run("vmodule", func(t *testing.T) { + httpClient, err := tenant.GetAdminHTTPClient() + require.NoError(t, err) + defer httpClient.CloseIdleConnections() + + url := tenant.AdminURL().URL + url.Path = url.Path + "/debug/vmodule" + q := url.Query() + q.Add("duration", "-1s") + q.Add("vmodule", "exec_log=3") + url.RawQuery = q.Encode() + + resp, err := httpClient.Get(url.String()) + require.NoError(t, err) + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + require.Equal(t, http.StatusForbidden, resp.StatusCode) + require.Contains(t, string(body), "tenant does not have capability to debug the running process") + + _, err = db.Exec(`ALTER TENANT processdebug GRANT CAPABILITY can_debug_process=true`) + require.NoError(t, err) + + tc.WaitForTenantCapabilities(t, serverutils.TestTenantID(), map[tenantcapabilities.ID]string{ + tenantcapabilities.CanDebugProcess: "true", + }) + + resp, err = httpClient.Get(url.String()) + require.NoError(t, err) + defer resp.Body.Close() + body, err = io.ReadAll(resp.Body) + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + require.Contains(t, string(body), "previous vmodule configuration: \nnew vmodule configuration: exec_log=3\n") + + _, err = db.Exec(`ALTER TENANT processdebug REVOKE CAPABILITY can_debug_process`) + require.NoError(t, err) + + tc.WaitForTenantCapabilities(t, serverutils.TestTenantID(), map[tenantcapabilities.ID]string{ + tenantcapabilities.CanDebugProcess: "false", + }) + }) +} + func TestNonExistentTenant(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) diff --git a/pkg/configprofiles/testdata/multitenant-app b/pkg/configprofiles/testdata/multitenant-app index a4e71ecbb9fe..d2425716bcd5 100644 --- a/pkg/configprofiles/testdata/multitenant-app +++ b/pkg/configprofiles/testdata/multitenant-app @@ -58,6 +58,7 @@ SHOW TENANTS WITH CAPABILITIES 1 system ready shared can_admin_split true 1 system ready shared can_admin_unsplit true 1 system ready shared can_check_consistency true +1 system ready shared can_debug_process true 1 system ready shared can_use_nodelocal_storage true 1 system ready shared can_view_node_info true 1 system ready shared can_view_tsdb_metrics true @@ -68,6 +69,7 @@ SHOW TENANTS WITH CAPABILITIES 2 template ready none can_admin_split true 2 template ready none can_admin_unsplit true 2 template ready none can_check_consistency true +2 template ready none can_debug_process true 2 template ready none can_use_nodelocal_storage true 2 template ready none can_view_node_info true 2 template ready none can_view_tsdb_metrics true @@ -78,6 +80,7 @@ SHOW TENANTS WITH CAPABILITIES 3 application ready shared can_admin_split true 3 application ready shared can_admin_unsplit true 3 application ready shared can_check_consistency true +3 application ready shared can_debug_process true 3 application ready shared can_use_nodelocal_storage true 3 application ready shared can_view_node_info true 3 application ready shared can_view_tsdb_metrics true diff --git a/pkg/configprofiles/testdata/multitenant-app-repl b/pkg/configprofiles/testdata/multitenant-app-repl index ae5e43c6f68c..bbfef13442e1 100644 --- a/pkg/configprofiles/testdata/multitenant-app-repl +++ b/pkg/configprofiles/testdata/multitenant-app-repl @@ -58,6 +58,7 @@ SHOW TENANTS WITH CAPABILITIES 1 system ready shared can_admin_split true 1 system ready shared can_admin_unsplit true 1 system ready shared can_check_consistency true +1 system ready shared can_debug_process true 1 system ready shared can_use_nodelocal_storage true 1 system ready shared can_view_node_info true 1 system ready shared can_view_tsdb_metrics true @@ -68,6 +69,7 @@ SHOW TENANTS WITH CAPABILITIES 2 template ready none can_admin_split true 2 template ready none can_admin_unsplit true 2 template ready none can_check_consistency true +2 template ready none can_debug_process true 2 template ready none can_use_nodelocal_storage true 2 template ready none can_view_node_info true 2 template ready none can_view_tsdb_metrics true @@ -78,6 +80,7 @@ SHOW TENANTS WITH CAPABILITIES 3 application ready shared can_admin_split true 3 application ready shared can_admin_unsplit true 3 application ready shared can_check_consistency true +3 application ready shared can_debug_process true 3 application ready shared can_use_nodelocal_storage true 3 application ready shared can_view_node_info true 3 application ready shared can_view_tsdb_metrics true diff --git a/pkg/configprofiles/testdata/multitenant-noapp b/pkg/configprofiles/testdata/multitenant-noapp index dc207e1a0b3d..1785b6138a68 100644 --- a/pkg/configprofiles/testdata/multitenant-noapp +++ b/pkg/configprofiles/testdata/multitenant-noapp @@ -51,6 +51,7 @@ SHOW TENANTS WITH CAPABILITIES 1 system ready shared can_admin_split true 1 system ready shared can_admin_unsplit true 1 system ready shared can_check_consistency true +1 system ready shared can_debug_process true 1 system ready shared can_use_nodelocal_storage true 1 system ready shared can_view_node_info true 1 system ready shared can_view_tsdb_metrics true @@ -61,6 +62,7 @@ SHOW TENANTS WITH CAPABILITIES 2 template ready none can_admin_split true 2 template ready none can_admin_unsplit true 2 template ready none can_check_consistency true +2 template ready none can_debug_process true 2 template ready none can_use_nodelocal_storage true 2 template ready none can_view_node_info true 2 template ready none can_view_tsdb_metrics true diff --git a/pkg/configprofiles/testdata/multitenant-noapp-repl b/pkg/configprofiles/testdata/multitenant-noapp-repl index e5af19772c99..52456f3f6f57 100644 --- a/pkg/configprofiles/testdata/multitenant-noapp-repl +++ b/pkg/configprofiles/testdata/multitenant-noapp-repl @@ -51,6 +51,7 @@ SHOW TENANTS WITH CAPABILITIES 1 system ready shared can_admin_split true 1 system ready shared can_admin_unsplit true 1 system ready shared can_check_consistency true +1 system ready shared can_debug_process true 1 system ready shared can_use_nodelocal_storage true 1 system ready shared can_view_node_info true 1 system ready shared can_view_tsdb_metrics true @@ -61,6 +62,7 @@ SHOW TENANTS WITH CAPABILITIES 2 template ready none can_admin_split true 2 template ready none can_admin_unsplit true 2 template ready none can_check_consistency true +2 template ready none can_debug_process true 2 template ready none can_use_nodelocal_storage true 2 template ready none can_view_node_info true 2 template ready none can_view_tsdb_metrics true diff --git a/pkg/kv/kvserver/tenantrate/limiter_test.go b/pkg/kv/kvserver/tenantrate/limiter_test.go index 5efc33f62409..446fb1c17f36 100644 --- a/pkg/kv/kvserver/tenantrate/limiter_test.go +++ b/pkg/kv/kvserver/tenantrate/limiter_test.go @@ -667,6 +667,16 @@ func (ts *testState) HasCapabilityForBatch( func (ts *testState) BindReader(tenantcapabilities.Reader) {} +var _ tenantcapabilities.Authorizer = &testState{} + +func (ts *testState) HasProcessDebugCapability(ctx context.Context, tenID roachpb.TenantID) error { + if ts.capabilities[tenID].CanDebugProcess { + return nil + } else { + return errors.New("unauthorized") + } +} + func (ts *testState) HasNodeStatusCapability(_ context.Context, tenID roachpb.TenantID) error { if ts.capabilities[tenID].CanViewNodeInfo { return nil @@ -767,6 +777,8 @@ func parseStrings(t *testing.T, d *datadriven.TestData) []string { // is subject to rate limit checks. (For testing in this package.) type fakeAuthorizer struct{} +var _ tenantcapabilities.Authorizer = &fakeAuthorizer{} + func (fakeAuthorizer) HasNodeStatusCapability(_ context.Context, tenID roachpb.TenantID) error { return nil } @@ -787,3 +799,7 @@ func (fakeAuthorizer) HasCapabilityForBatch( return nil } func (fakeAuthorizer) BindReader(tenantcapabilities.Reader) {} + +func (fakeAuthorizer) HasProcessDebugCapability(ctx context.Context, tenID roachpb.TenantID) error { + return nil +} diff --git a/pkg/multitenant/tenantcapabilities/capabilities.go b/pkg/multitenant/tenantcapabilities/capabilities.go index e9869a79bb81..c6b59e442dd4 100644 --- a/pkg/multitenant/tenantcapabilities/capabilities.go +++ b/pkg/multitenant/tenantcapabilities/capabilities.go @@ -84,6 +84,11 @@ const ( // span configs. TenantSpanConfigBounds // span_config_bounds + // CanDebugProcess describes the ability of a tenant to set vmodule on the + // process and run pprof profiles and tools. This can reveal information + // across tenant boundaries. + CanDebugProcess // can_debug_process + MaxCapabilityID ID = iota - 1 ) @@ -114,6 +119,7 @@ var capabilities = [MaxCapabilityID + 1]Capability{ CanViewTSDBMetrics: boolCapability(CanViewTSDBMetrics), ExemptFromRateLimiting: boolCapability(ExemptFromRateLimiting), TenantSpanConfigBounds: spanConfigBoundsCapability(TenantSpanConfigBounds), + CanDebugProcess: boolCapability(CanDebugProcess), } // EnableAll enables maximum access to services. diff --git a/pkg/multitenant/tenantcapabilities/id_string.go b/pkg/multitenant/tenantcapabilities/id_string.go index 4ba2401e8697..da79f2376809 100644 --- a/pkg/multitenant/tenantcapabilities/id_string.go +++ b/pkg/multitenant/tenantcapabilities/id_string.go @@ -18,7 +18,8 @@ func _() { _ = x[CanViewTSDBMetrics-8] _ = x[ExemptFromRateLimiting-9] _ = x[TenantSpanConfigBounds-10] - _ = x[MaxCapabilityID-10] + _ = x[CanDebugProcess-11] + _ = x[MaxCapabilityID-11] } func (i ID) String() string { @@ -43,6 +44,8 @@ func (i ID) String() string { return "exempt_from_rate_limiting" case TenantSpanConfigBounds: return "span_config_bounds" + case CanDebugProcess: + return "can_debug_process" default: return "ID(" + strconv.FormatInt(int64(i), 10) + ")" } @@ -59,7 +62,8 @@ var stringToCapabilityIDMap = map[string]ID{ "can_view_tsdb_metrics": 8, "exempt_from_rate_limiting": 9, "span_config_bounds": 10, - "MaxCapabilityID": 10, + "can_debug_process": 11, + "MaxCapabilityID": 11, } var IDs = []ID{ @@ -68,6 +72,7 @@ var IDs = []ID{ CanAdminSplit, CanAdminUnsplit, CanCheckConsistency, + CanDebugProcess, CanUseNodelocalStorage, CanViewNodeInfo, CanViewTSDBMetrics, diff --git a/pkg/multitenant/tenantcapabilities/interfaces.go b/pkg/multitenant/tenantcapabilities/interfaces.go index 1dd4dfb911a4..498c9f1aaa0d 100644 --- a/pkg/multitenant/tenantcapabilities/interfaces.go +++ b/pkg/multitenant/tenantcapabilities/interfaces.go @@ -70,6 +70,10 @@ type Authorizer interface { // IsExemptFromRateLimiting returns true of the tenant should // not be subject to rate limiting. IsExemptFromRateLimiting(ctx context.Context, tenID roachpb.TenantID) bool + + // HasProcessDebugCapability returns an error if a tenant, referenced by its ID, + // is not allowed to debug the running process. + HasProcessDebugCapability(ctx context.Context, tenID roachpb.TenantID) error } // Entry ties together a tenantID with its capabilities. diff --git a/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer/allow_everything.go b/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer/allow_everything.go index 1426b7c58516..e6bc27658a3a 100644 --- a/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer/allow_everything.go +++ b/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer/allow_everything.go @@ -66,3 +66,10 @@ func (n *AllowEverythingAuthorizer) IsExemptFromRateLimiting( ) bool { return true } + +// HasProcessDebugCapability implements the tenantcapabilities.Authorizer interface. +func (n *AllowEverythingAuthorizer) HasProcessDebugCapability( + ctx context.Context, tenID roachpb.TenantID, +) error { + return nil +} diff --git a/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer/allow_nothing.go b/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer/allow_nothing.go index f759f6ac0e76..02eb5a0597aa 100644 --- a/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer/allow_nothing.go +++ b/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer/allow_nothing.go @@ -65,3 +65,10 @@ func (n *AllowNothingAuthorizer) HasNodelocalStorageCapability( func (n *AllowNothingAuthorizer) IsExemptFromRateLimiting(context.Context, roachpb.TenantID) bool { return false } + +// HasProcessDebugCapability implements the tenantcapabilities.Authorizer interface. +func (n *AllowNothingAuthorizer) HasProcessDebugCapability( + ctx context.Context, tenID roachpb.TenantID, +) error { + return errors.New("operation blocked") +} diff --git a/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer/authorizer.go b/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer/authorizer.go index 2c4ae8f1b034..9443b77fc2ea 100644 --- a/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer/authorizer.go +++ b/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer/authorizer.go @@ -390,3 +390,32 @@ func (a *Authorizer) getMode( } return cp, selectedMode } + +func (a *Authorizer) HasProcessDebugCapability(ctx context.Context, tenID roachpb.TenantID) error { + if tenID.IsSystem() { + return nil + } + errFn := func() error { + return errors.New("client tenant does not have capability to debug the process") + } + cp, mode := a.getMode(ctx, tenID) + switch mode { + case authorizerModeOn: + break // fallthrough to the next check. + case authorizerModeAllowAll: + return nil + case authorizerModeV222: + return errFn() + default: + err := errors.AssertionFailedf("unknown authorizer mode: %d", mode) + logcrash.ReportOrPanic(ctx, &a.settings.SV, "%v", err) + return err + } + + if !tenantcapabilities.MustGetBoolByID( + cp, tenantcapabilities.CanDebugProcess, + ) { + return errFn() + } + return nil +} diff --git a/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer/testdata/authorizer_enabled b/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer/testdata/authorizer_enabled index b4e244d89491..0f345632857c 100644 --- a/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer/testdata/authorizer_enabled +++ b/pkg/multitenant/tenantcapabilities/tenantcapabilitiesauthorizer/testdata/authorizer_enabled @@ -8,7 +8,7 @@ client tenant does not have capability "can_admin_scatter" (*kvpb.AdminScatterRe has-capability-for-batch ten=10 cmds=(Merge) ---- -client tenant does not have capability "ID(12)" (*kvpb.MergeRequest) +client tenant does not have capability "ID(13)" (*kvpb.MergeRequest) has-node-status-capability ten=10 ---- @@ -34,7 +34,7 @@ ok has-capability-for-batch ten=10 cmds=(Merge) ---- -client tenant does not have capability "ID(12)" (*kvpb.MergeRequest) +client tenant does not have capability "ID(13)" (*kvpb.MergeRequest) has-node-status-capability ten=10 ---- diff --git a/pkg/multitenant/tenantcapabilities/tenantcapabilitiespb/capabilities.proto b/pkg/multitenant/tenantcapabilities/tenantcapabilitiespb/capabilities.proto index 398c2c08458b..173d0dfda0e2 100644 --- a/pkg/multitenant/tenantcapabilities/tenantcapabilitiespb/capabilities.proto +++ b/pkg/multitenant/tenantcapabilities/tenantcapabilitiespb/capabilities.proto @@ -92,6 +92,10 @@ message TenantCapabilities { // CanCheckConsistency if set to true, grants the tenant the ability to run // range consistency checks. bool can_check_consistency = 10; + + // CanDebugProcess, if set to true, grants the tenant the ability to + // set vmodule on the process and run pprof profiles and tools. + bool can_debug_process = 11; }; // SpanConfigBound is used to constrain the possible values a SpanConfig may diff --git a/pkg/multitenant/tenantcapabilities/values.go b/pkg/multitenant/tenantcapabilities/values.go index 9f9d93f3af70..50dc8daab735 100644 --- a/pkg/multitenant/tenantcapabilities/values.go +++ b/pkg/multitenant/tenantcapabilities/values.go @@ -56,6 +56,8 @@ func GetValueByID(t *tenantcapabilitiespb.TenantCapabilities, id ID) (Value, err return (*boolValue)(&t.ExemptFromRateLimiting), nil case TenantSpanConfigBounds: return &spanConfigBoundsValue{b: &t.SpanConfigBounds}, nil + case CanDebugProcess: + return (*boolValue)(&t.CanDebugProcess), nil default: return nil, errors.AssertionFailedf("unknown capability: %q", id.String()) } diff --git a/pkg/rpc/auth_test.go b/pkg/rpc/auth_test.go index 7e7219bc81a5..b0a5d8f6cfbc 100644 --- a/pkg/rpc/auth_test.go +++ b/pkg/rpc/auth_test.go @@ -954,6 +954,12 @@ type mockAuthorizer struct { hasExemptFromRateLimiterCapability bool } +func (m mockAuthorizer) HasProcessDebugCapability( + ctx context.Context, tenID roachpb.TenantID, +) error { + return errors.New("tenant does not have capability") +} + var _ tenantcapabilities.Authorizer = &mockAuthorizer{} // HasCapabilityForBatch implements the tenantcapabilities.Authorizer interface. diff --git a/pkg/server/config.go b/pkg/server/config.go index 096369fac8e2..0a2993eed8ed 100644 --- a/pkg/server/config.go +++ b/pkg/server/config.go @@ -30,6 +30,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvclient/rangefeed" "github.com/cockroachdb/cockroach/pkg/kv/kvpb" "github.com/cockroachdb/cockroach/pkg/kv/kvserver" + "github.com/cockroachdb/cockroach/pkg/multitenant/tenantcapabilities" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/rpc" "github.com/cockroachdb/cockroach/pkg/server/autoconfig/acprovider" @@ -536,6 +537,10 @@ type LocalKVServerInfo struct { InternalServer kvpb.InternalServer ServerInterceptors rpc.ServerInterceptorInfo Tracer *tracing.Tracer + + // SameProcessCapabilityAuthorizer is the tenant capability authorizer to + // use for servers running in the same process as the KV node. + SameProcessCapabilityAuthorizer tenantcapabilities.Authorizer } // MakeSQLConfig returns a SQLConfig with default values. diff --git a/pkg/server/debug/BUILD.bazel b/pkg/server/debug/BUILD.bazel index 90bc9c8c4609..3c40fcc2a6ea 100644 --- a/pkg/server/debug/BUILD.bazel +++ b/pkg/server/debug/BUILD.bazel @@ -17,6 +17,7 @@ go_library( "//pkg/kv/kvserver", "//pkg/kv/kvserver/closedts/sidetransport", "//pkg/kv/kvserver/kvstorage", + "//pkg/multitenant/tenantcapabilities", "//pkg/roachpb", "//pkg/server/debug/goroutineui", "//pkg/server/debug/pprofui", diff --git a/pkg/server/debug/server.go b/pkg/server/debug/server.go index 284b6cf3f8b5..f4caee78e89c 100644 --- a/pkg/server/debug/server.go +++ b/pkg/server/debug/server.go @@ -24,6 +24,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvserver" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts/sidetransport" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvstorage" + "github.com/cockroachdb/cockroach/pkg/multitenant/tenantcapabilities" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/server/debug/goroutineui" "github.com/cockroachdb/cockroach/pkg/server/debug/pprofui" @@ -72,41 +73,98 @@ type Server struct { type serverTickleFn = func(ctx context.Context, name roachpb.TenantName) error -// NewServer sets up a debug server. -func NewServer( - ambientContext log.AmbientContext, +func setupProcessWideRoutes( + mux *http.ServeMux, st *cluster.Settings, - hbaConfDebugFn http.HandlerFunc, + tenantID roachpb.TenantID, + authorizer tenantcapabilities.Authorizer, + vsrv *vmoduleServer, profiler pprofui.Profiler, - serverTickleFn serverTickleFn, -) *Server { - mux := http.NewServeMux() +) { + authzCheck := func(w http.ResponseWriter, r *http.Request) bool { + if err := authorizer.HasProcessDebugCapability(r.Context(), tenantID); err != nil { + http.Error(w, "tenant does not have capability to debug the running process", http.StatusForbidden) + return false + } + return true + } - // Install a redirect to the UI's collection of debug tools. - mux.HandleFunc(Endpoint, handleLanding) + authzFunc := func(origHandler http.HandlerFunc) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + if authzCheck(w, r) { + origHandler(w, r) + } + } + } // Cribbed straight from pprof's `init()` method. See: // https://golang.org/src/net/http/pprof/pprof.go - mux.HandleFunc("/debug/pprof/", pprof.Index) - mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) - mux.HandleFunc("/debug/pprof/profile", func(w http.ResponseWriter, r *http.Request) { + mux.HandleFunc("/debug/pprof/", authzFunc(pprof.Index)) + mux.HandleFunc("/debug/pprof/cmdline", authzFunc(pprof.Cmdline)) + mux.HandleFunc("/debug/pprof/profile", authzFunc(func(w http.ResponseWriter, r *http.Request) { CPUProfileHandler(st, w, r) - }) - mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) - mux.HandleFunc("/debug/pprof/trace", pprof.Trace) + })) + mux.HandleFunc("/debug/pprof/symbol", authzFunc(pprof.Symbol)) + mux.HandleFunc("/debug/pprof/trace", authzFunc(pprof.Trace)) // Cribbed straight from trace's `init()` method. See: // https://github.com/golang/net/blob/master/trace/trace.go - mux.HandleFunc("/debug/requests", trace.Traces) - mux.HandleFunc("/debug/events", trace.Events) + mux.HandleFunc("/debug/requests", authzFunc(trace.Traces)) + mux.HandleFunc("/debug/events", authzFunc(trace.Events)) // This registers a superset of the variables exposed through the // /debug/vars endpoint onto the /debug/metrics endpoint. It includes all // expvars registered globally and all metrics registered on the // DefaultRegistry. - mux.Handle("/debug/metrics", exp.ExpHandler(metrics.DefaultRegistry)) + mux.HandleFunc("/debug/metrics", authzFunc(exp.ExpHandler(metrics.DefaultRegistry).ServeHTTP)) // Also register /debug/vars (even though /debug/metrics is better). - mux.Handle("/debug/vars", expvar.Handler()) + mux.HandleFunc("/debug/vars", authzFunc(expvar.Handler().ServeHTTP)) + + // Register the stopper endpoint, which lists all active tasks. + mux.HandleFunc("/debug/stopper", authzFunc(stop.HandleDebug)) + + // Set up the vmodule endpoint. + mux.HandleFunc("/debug/vmodule", authzFunc(vsrv.vmoduleHandleDebug)) + + ps := pprofui.NewServer(pprofui.NewMemStorage(pprofui.ProfileConcurrency, pprofui.ProfileExpiry), profiler) + mux.Handle("/debug/pprof/ui/", authzFunc(func(w http.ResponseWriter, r *http.Request) { + http.StripPrefix("/debug/pprof/ui", ps).ServeHTTP(w, r) + })) + + mux.HandleFunc("/debug/pprof/goroutineui/", authzFunc(func(w http.ResponseWriter, req *http.Request) { + dump := goroutineui.NewDump() + + _ = req.ParseForm() + switch req.Form.Get("sort") { + case "count": + dump.SortCountDesc() + case "wait": + dump.SortWaitDesc() + default: + } + _ = dump.HTML(w) + })) + +} + +// NewServer sets up a debug server. +func NewServer( + ambientContext log.AmbientContext, + st *cluster.Settings, + hbaConfDebugFn http.HandlerFunc, + profiler pprofui.Profiler, + serverTickleFn serverTickleFn, + tenantID roachpb.TenantID, + authorizer tenantcapabilities.Authorizer, +) *Server { + mux := http.NewServeMux() + + // Install a redirect to the UI's collection of debug tools. + mux.HandleFunc(Endpoint, handleLanding) + + // Debug routes that retrieve process-wide state. + vsrv := &vmoduleServer{} + setupProcessWideRoutes(mux, st, tenantID, authorizer, vsrv, profiler) if hbaConfDebugFn != nil { // Expose the processed HBA configuration through the debug @@ -114,9 +172,6 @@ func NewServer( mux.HandleFunc("/debug/hba_conf", hbaConfDebugFn) } - // Register the stopper endpoint, which lists all active tasks. - mux.HandleFunc("/debug/stopper", stop.HandleDebug) - if serverTickleFn != nil { // Register the server tickling function. // @@ -126,10 +181,6 @@ func NewServer( mux.Handle("/debug/tickle", handleTickle(serverTickleFn)) } - // Set up the vmodule endpoint. - vsrv := &vmoduleServer{} - mux.HandleFunc("/debug/vmodule", vsrv.vmoduleHandleDebug) - // Set up the log spy, a tool that allows inspecting filtered logs at high // verbosity. We require the tenant ID from the ambientCtx to set the logSpy // tenant filter. @@ -149,23 +200,6 @@ func NewServer( mux.HandleFunc("/debug/logspy", spy.handleDebugLogSpy) - ps := pprofui.NewServer(pprofui.NewMemStorage(pprofui.ProfileConcurrency, pprofui.ProfileExpiry), profiler) - mux.Handle("/debug/pprof/ui/", http.StripPrefix("/debug/pprof/ui", ps)) - - mux.HandleFunc("/debug/pprof/goroutineui/", func(w http.ResponseWriter, req *http.Request) { - dump := goroutineui.NewDump() - - _ = req.ParseForm() - switch req.Form.Get("sort") { - case "count": - dump.SortCountDesc() - case "wait": - dump.SortWaitDesc() - default: - } - _ = dump.HTML(w) - }) - return &Server{ ambientCtx: ambientContext, st: st, diff --git a/pkg/server/server.go b/pkg/server/server.go index 4d939a804d06..2e230eacb4e2 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -1242,6 +1242,8 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) { } return errors.Newf("server found with type %T", d) }, + roachpb.SystemTenantID, + authorizer, ) recoveryServer := loqrecovery.NewServer( diff --git a/pkg/server/server_controller_new_server.go b/pkg/server/server_controller_new_server.go index e00c64cbae89..7c712a6cddfd 100644 --- a/pkg/server/server_controller_new_server.go +++ b/pkg/server/server_controller_new_server.go @@ -155,8 +155,9 @@ func (s *Server) makeSharedProcessTenantConfig( // Create a configuration for the new tenant. parentCfg := s.cfg localServerInfo := LocalKVServerInfo{ - InternalServer: s.node, - ServerInterceptors: s.grpc.serverInterceptorsInfo, + InternalServer: s.node, + ServerInterceptors: s.grpc.serverInterceptorsInfo, + SameProcessCapabilityAuthorizer: s.rpcContext.TenantRPCAuthorizer, } baseCfg, sqlCfg, err := makeSharedProcessTenantServerConfig(ctx, tenantID, index, parentCfg, localServerInfo, stopper, s.recorder) if err != nil { diff --git a/pkg/server/tenant.go b/pkg/server/tenant.go index 2eff7dd1a8da..a42a1dca5a7e 100644 --- a/pkg/server/tenant.go +++ b/pkg/server/tenant.go @@ -411,6 +411,11 @@ func newTenantServer( sStatus.baseStatusServer.sqlServer = sqlServer sAdmin.sqlServer = sqlServer + var processCapAuthz tenantcapabilities.Authorizer = &tenantcapabilitiesauthorizer.AllowEverythingAuthorizer{} + if lsi := sqlCfg.LocalKVServerInfo; lsi != nil { + processCapAuthz = lsi.SameProcessCapabilityAuthorizer + } + // Create the debug API server. debugServer := debug.NewServer( baseCfg.AmbientCtx, @@ -418,6 +423,8 @@ func newTenantServer( sqlServer.pgServer.HBADebugFn(), sqlServer.execCfg.SQLStatusServer, nil, /* serverTickleFn */ + sqlCfg.TenantID, + processCapAuthz, ) return &SQLServerWrapper{ diff --git a/pkg/server/testserver.go b/pkg/server/testserver.go index 80ec0922cafd..8c22328fbb75 100644 --- a/pkg/server/testserver.go +++ b/pkg/server/testserver.go @@ -921,6 +921,8 @@ func (ts *TestServer) StartSharedProcessTenant( hts := &httpTestServer{} hts.t.authentication = sqlServerWrapper.authentication hts.t.sqlServer = sqlServer + hts.t.tenantName = args.TenantName + testTenant := &TestTenant{ SQLServer: sqlServer, Cfg: sqlServer.cfg,