Skip to content

Commit

Permalink
server, tenant: gate process debugging behind capability
Browse files Browse the repository at this point in the history
Previously, all tenant servers were started with a debug server
that granted process-manipulating power via pprof and vmodule HTTP
endpoints.

This implementation is fine when servers serve just 1 tenant in
a given process; that tenant then legitimately has access to all
process-level control. However, it becomes a problem in shared-process
multitenancy, when the same process is shared by multiple tenants. In
that case, it is undesirable for 1 tenant to have access to & control
process properties, as it could influence the well-functioning of
other tenants or potentially leak data across tenant boundaries.

This commit gates access to the debug server behind a capability
**only with shared process multitenancy**. Tenant servers running
within their own process will contain a debug server with no
capability gating since they own their process.

The gating is implemented via a tenant authorizer function backed
by the capability store that is injected into the debug server
upon startup. Shared process tenants are provided this authorizer,
while separate process tenants and the system tenant use the no-op
authorizer.

Epic: CRDB-12100
Resolves: #97946

Release note: None

Co-authored-by: Raphael 'kena' Poss <[email protected]>
  • Loading branch information
dhartunian and knz committed Jun 28, 2023
1 parent 125379f commit cc16c03
Show file tree
Hide file tree
Showing 24 changed files with 345 additions and 49 deletions.
15 changes: 15 additions & 0 deletions pkg/ccl/logictestccl/testdata/logic_test/tenant_capability
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ can_admin_scatter true
can_admin_split true
can_admin_unsplit false
can_check_consistency false
can_debug_process false
can_use_nodelocal_storage false
can_view_node_info false
can_view_tsdb_metrics false
Expand All @@ -61,6 +62,7 @@ can_admin_scatter true
can_admin_split true
can_admin_unsplit false
can_check_consistency false
can_debug_process false
can_use_nodelocal_storage false
can_view_node_info false
can_view_tsdb_metrics false
Expand All @@ -79,6 +81,7 @@ can_admin_scatter true
can_admin_split false
can_admin_unsplit false
can_check_consistency false
can_debug_process false
can_use_nodelocal_storage false
can_view_node_info false
can_view_tsdb_metrics false
Expand All @@ -104,6 +107,7 @@ can_admin_scatter true
can_admin_split true
can_admin_unsplit false
can_check_consistency false
can_debug_process false
can_use_nodelocal_storage false
can_view_node_info false
can_view_tsdb_metrics false
Expand All @@ -129,6 +133,7 @@ can_admin_scatter true
can_admin_split true
can_admin_unsplit false
can_check_consistency false
can_debug_process false
can_use_nodelocal_storage false
can_view_node_info false
can_view_tsdb_metrics false
Expand All @@ -154,6 +159,7 @@ can_admin_scatter true
can_admin_split true
can_admin_unsplit false
can_check_consistency false
can_debug_process false
can_use_nodelocal_storage false
can_view_node_info true
can_view_tsdb_metrics false
Expand All @@ -172,6 +178,7 @@ can_admin_scatter true
can_admin_split false
can_admin_unsplit false
can_check_consistency false
can_debug_process false
can_use_nodelocal_storage false
can_view_node_info false
can_view_tsdb_metrics false
Expand All @@ -190,6 +197,7 @@ can_admin_scatter true
can_admin_split false
can_admin_unsplit false
can_check_consistency false
can_debug_process false
can_use_nodelocal_storage false
can_view_node_info false
can_view_tsdb_metrics false
Expand All @@ -208,6 +216,7 @@ can_admin_scatter true
can_admin_split false
can_admin_unsplit false
can_check_consistency false
can_debug_process false
can_use_nodelocal_storage false
can_view_node_info false
can_view_tsdb_metrics false
Expand All @@ -227,6 +236,7 @@ can_admin_scatter true
can_admin_split true
can_admin_unsplit true
can_check_consistency true
can_debug_process true
can_use_nodelocal_storage true
can_view_node_info true
can_view_tsdb_metrics true
Expand Down Expand Up @@ -268,6 +278,7 @@ can_admin_scatter true
can_admin_split true
can_admin_unsplit false
can_check_consistency false
can_debug_process false
can_use_nodelocal_storage false
can_view_node_info false
can_view_tsdb_metrics false
Expand Down Expand Up @@ -297,6 +308,7 @@ can_admin_scatter true
can_admin_split true
can_admin_unsplit false
can_check_consistency false
can_debug_process false
can_use_nodelocal_storage false
can_view_node_info false
can_view_tsdb_metrics false
Expand Down Expand Up @@ -331,6 +343,7 @@ can_admin_scatter true
can_admin_split true
can_admin_unsplit false
can_check_consistency false
can_debug_process false
can_use_nodelocal_storage false
can_view_node_info false
can_view_tsdb_metrics false
Expand All @@ -349,6 +362,7 @@ can_admin_scatter false
can_admin_split false
can_admin_unsplit false
can_check_consistency false
can_debug_process false
can_use_nodelocal_storage false
can_view_node_info false
can_view_tsdb_metrics false
Expand All @@ -367,6 +381,7 @@ can_admin_scatter true
can_admin_split true
can_admin_unsplit true
can_check_consistency true
can_debug_process true
can_use_nodelocal_storage true
can_view_node_info true
can_view_tsdb_metrics true
Expand Down
133 changes: 133 additions & 0 deletions pkg/ccl/serverccl/server_sql_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,15 @@ import (
"errors"
"fmt"
"io"
"net/http"
"strings"
"testing"
"time"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/ccl"
"github.com/cockroachdb/cockroach/pkg/ccl/utilccl/licenseccl"
"github.com/cockroachdb/cockroach/pkg/multitenant/tenantcapabilities"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/server/systemconfigwatcher/systemconfigwatchertest"
Expand Down Expand Up @@ -194,6 +196,137 @@ func TestTenantHTTP(t *testing.T) {

}

// TestTenantProcessDebugging verifies that in-process SQL tenant servers gate
// process debugging behind capabilities.
func TestTenantProcessDebugging(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
ctx := context.Background()

tc := serverutils.StartNewTestCluster(t, 1, base.TestClusterArgs{ServerArgs: base.TestServerArgs{
DefaultTestTenant: base.TestTenantDisabled,
}})
defer tc.Stopper().Stop(ctx)
db := tc.ServerConn(0)
defer db.Close()

s := tc.Server(0)

tenant, _, err := s.StartSharedProcessTenant(ctx,
base.TestSharedProcessTenantArgs{
TenantID: serverutils.TestTenantID(),
TenantName: "processdebug",
})
require.NoError(t, err)
defer tenant.Stopper().Stop(ctx)

t.Run("system tenant pprof", func(t *testing.T) {
httpClient, err := s.GetAdminHTTPClient()
require.NoError(t, err)
defer httpClient.CloseIdleConnections()

url := s.AdminURL().URL
url.Path = url.Path + "/debug/pprof/goroutine"
q := url.Query()
q.Add("debug", "2")
url.RawQuery = q.Encode()

resp, err := httpClient.Get(url.String())
require.NoError(t, err)
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
require.NoError(t, err)

require.Equal(t, http.StatusOK, resp.StatusCode)
require.Contains(t, string(body), "goroutine")
})

t.Run("pprof", func(t *testing.T) {
httpClient, err := tenant.GetAdminHTTPClient()
require.NoError(t, err)
defer httpClient.CloseIdleConnections()

url := tenant.AdminURL().URL
url.Path = url.Path + "/debug/pprof/"
q := url.Query()
q.Add("debug", "2")
url.RawQuery = q.Encode()

resp, err := httpClient.Get(url.String())
require.NoError(t, err)
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
require.NoError(t, err)
require.Equal(t, http.StatusForbidden, resp.StatusCode)
require.Contains(t, string(body), "tenant does not have capability to debug the running process")

_, err = db.Exec(`ALTER TENANT processdebug GRANT CAPABILITY can_debug_process=true`)
require.NoError(t, err)

tc.WaitForTenantCapabilities(t, serverutils.TestTenantID(), map[tenantcapabilities.ID]string{
tenantcapabilities.CanDebugProcess: "true",
})

resp, err = httpClient.Get(url.String())
require.NoError(t, err)
defer resp.Body.Close()
body, err = io.ReadAll(resp.Body)
require.NoError(t, err)
require.Equal(t, http.StatusOK, resp.StatusCode)
require.Contains(t, string(body), "goroutine")

_, err = db.Exec(`ALTER TENANT processdebug REVOKE CAPABILITY can_debug_process`)
require.NoError(t, err)

tc.WaitForTenantCapabilities(t, serverutils.TestTenantID(), map[tenantcapabilities.ID]string{
tenantcapabilities.CanDebugProcess: "false",
})
})

t.Run("vmodule", func(t *testing.T) {
httpClient, err := tenant.GetAdminHTTPClient()
require.NoError(t, err)
defer httpClient.CloseIdleConnections()

url := tenant.AdminURL().URL
url.Path = url.Path + "/debug/vmodule"
q := url.Query()
q.Add("duration", "-1s")
q.Add("vmodule", "exec_log=3")
url.RawQuery = q.Encode()

resp, err := httpClient.Get(url.String())
require.NoError(t, err)
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
require.NoError(t, err)
require.Equal(t, http.StatusForbidden, resp.StatusCode)
require.Contains(t, string(body), "tenant does not have capability to debug the running process")

_, err = db.Exec(`ALTER TENANT processdebug GRANT CAPABILITY can_debug_process=true`)
require.NoError(t, err)

tc.WaitForTenantCapabilities(t, serverutils.TestTenantID(), map[tenantcapabilities.ID]string{
tenantcapabilities.CanDebugProcess: "true",
})

resp, err = httpClient.Get(url.String())
require.NoError(t, err)
defer resp.Body.Close()
body, err = io.ReadAll(resp.Body)
require.NoError(t, err)
require.Equal(t, http.StatusOK, resp.StatusCode)
require.Contains(t, string(body), "previous vmodule configuration: \nnew vmodule configuration: exec_log=3\n")

_, err = db.Exec(`ALTER TENANT processdebug REVOKE CAPABILITY can_debug_process`)
require.NoError(t, err)

tc.WaitForTenantCapabilities(t, serverutils.TestTenantID(), map[tenantcapabilities.ID]string{
tenantcapabilities.CanDebugProcess: "false",
})
})
}

func TestNonExistentTenant(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
Expand Down
3 changes: 3 additions & 0 deletions pkg/configprofiles/testdata/multitenant-app
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ SHOW TENANTS WITH CAPABILITIES
1 system ready shared can_admin_split true
1 system ready shared can_admin_unsplit true
1 system ready shared can_check_consistency true
1 system ready shared can_debug_process true
1 system ready shared can_use_nodelocal_storage true
1 system ready shared can_view_node_info true
1 system ready shared can_view_tsdb_metrics true
Expand All @@ -68,6 +69,7 @@ SHOW TENANTS WITH CAPABILITIES
2 template ready none can_admin_split true
2 template ready none can_admin_unsplit true
2 template ready none can_check_consistency true
2 template ready none can_debug_process true
2 template ready none can_use_nodelocal_storage true
2 template ready none can_view_node_info true
2 template ready none can_view_tsdb_metrics true
Expand All @@ -78,6 +80,7 @@ SHOW TENANTS WITH CAPABILITIES
3 application ready shared can_admin_split true
3 application ready shared can_admin_unsplit true
3 application ready shared can_check_consistency true
3 application ready shared can_debug_process true
3 application ready shared can_use_nodelocal_storage true
3 application ready shared can_view_node_info true
3 application ready shared can_view_tsdb_metrics true
Expand Down
3 changes: 3 additions & 0 deletions pkg/configprofiles/testdata/multitenant-app-repl
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ SHOW TENANTS WITH CAPABILITIES
1 system ready shared can_admin_split true
1 system ready shared can_admin_unsplit true
1 system ready shared can_check_consistency true
1 system ready shared can_debug_process true
1 system ready shared can_use_nodelocal_storage true
1 system ready shared can_view_node_info true
1 system ready shared can_view_tsdb_metrics true
Expand All @@ -68,6 +69,7 @@ SHOW TENANTS WITH CAPABILITIES
2 template ready none can_admin_split true
2 template ready none can_admin_unsplit true
2 template ready none can_check_consistency true
2 template ready none can_debug_process true
2 template ready none can_use_nodelocal_storage true
2 template ready none can_view_node_info true
2 template ready none can_view_tsdb_metrics true
Expand All @@ -78,6 +80,7 @@ SHOW TENANTS WITH CAPABILITIES
3 application ready shared can_admin_split true
3 application ready shared can_admin_unsplit true
3 application ready shared can_check_consistency true
3 application ready shared can_debug_process true
3 application ready shared can_use_nodelocal_storage true
3 application ready shared can_view_node_info true
3 application ready shared can_view_tsdb_metrics true
Expand Down
2 changes: 2 additions & 0 deletions pkg/configprofiles/testdata/multitenant-noapp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ SHOW TENANTS WITH CAPABILITIES
1 system ready shared can_admin_split true
1 system ready shared can_admin_unsplit true
1 system ready shared can_check_consistency true
1 system ready shared can_debug_process true
1 system ready shared can_use_nodelocal_storage true
1 system ready shared can_view_node_info true
1 system ready shared can_view_tsdb_metrics true
Expand All @@ -61,6 +62,7 @@ SHOW TENANTS WITH CAPABILITIES
2 template ready none can_admin_split true
2 template ready none can_admin_unsplit true
2 template ready none can_check_consistency true
2 template ready none can_debug_process true
2 template ready none can_use_nodelocal_storage true
2 template ready none can_view_node_info true
2 template ready none can_view_tsdb_metrics true
Expand Down
2 changes: 2 additions & 0 deletions pkg/configprofiles/testdata/multitenant-noapp-repl
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ SHOW TENANTS WITH CAPABILITIES
1 system ready shared can_admin_split true
1 system ready shared can_admin_unsplit true
1 system ready shared can_check_consistency true
1 system ready shared can_debug_process true
1 system ready shared can_use_nodelocal_storage true
1 system ready shared can_view_node_info true
1 system ready shared can_view_tsdb_metrics true
Expand All @@ -61,6 +62,7 @@ SHOW TENANTS WITH CAPABILITIES
2 template ready none can_admin_split true
2 template ready none can_admin_unsplit true
2 template ready none can_check_consistency true
2 template ready none can_debug_process true
2 template ready none can_use_nodelocal_storage true
2 template ready none can_view_node_info true
2 template ready none can_view_tsdb_metrics true
Expand Down
16 changes: 16 additions & 0 deletions pkg/kv/kvserver/tenantrate/limiter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,16 @@ func (ts *testState) HasCapabilityForBatch(

func (ts *testState) BindReader(tenantcapabilities.Reader) {}

var _ tenantcapabilities.Authorizer = &testState{}

func (ts *testState) HasProcessDebugCapability(ctx context.Context, tenID roachpb.TenantID) error {
if ts.capabilities[tenID].CanDebugProcess {
return nil
} else {
return errors.New("unauthorized")
}
}

func (ts *testState) HasNodeStatusCapability(_ context.Context, tenID roachpb.TenantID) error {
if ts.capabilities[tenID].CanViewNodeInfo {
return nil
Expand Down Expand Up @@ -767,6 +777,8 @@ func parseStrings(t *testing.T, d *datadriven.TestData) []string {
// is subject to rate limit checks. (For testing in this package.)
type fakeAuthorizer struct{}

var _ tenantcapabilities.Authorizer = &fakeAuthorizer{}

func (fakeAuthorizer) HasNodeStatusCapability(_ context.Context, tenID roachpb.TenantID) error {
return nil
}
Expand All @@ -787,3 +799,7 @@ func (fakeAuthorizer) HasCapabilityForBatch(
return nil
}
func (fakeAuthorizer) BindReader(tenantcapabilities.Reader) {}

func (fakeAuthorizer) HasProcessDebugCapability(ctx context.Context, tenID roachpb.TenantID) error {
return nil
}
Loading

0 comments on commit cc16c03

Please sign in to comment.