From 767d9aba36d5f2b674795aab317a06edd2d56b16 Mon Sep 17 00:00:00 2001 From: Alex Barganier Date: Fri, 6 Jan 2023 16:46:29 -0500 Subject: [PATCH] pkg/server: filter _status/logs and _status/logfiles by tenant The first iteration of CockroachDB containing multiple tenants in a single process will have all tenants share the same log files/sinks, where each log entry is tagged with the tenant that it pertains to. When fetching these logs using status server endpoints such as `/_status/logs` and `/_status/logfiles`, we must ensure that non-system tenants do not gain access to logs that don't belong to them. The system tenant on the other hand has sufficient privilege to view *all logs*, so no such filtering is required. This patch updates both of the previously mentioned endpoints to filter logs based on tenant ID in non-system tenant status servers. We use the server's rpcCtx to determine which tenant the server belongs to. If that server belongs to any tenant other than the system tenant, we filter the response's log entries by the tenantID found in the rpcCtx before returning to the caller. Release note: none --- .../serverccl/statusccl/tenant_status_test.go | 12 +- pkg/server/status.go | 27 +++- pkg/server/status_test.go | 118 ++++++++++++++++++ 3 files changed, 155 insertions(+), 2 deletions(-) diff --git a/pkg/ccl/serverccl/statusccl/tenant_status_test.go b/pkg/ccl/serverccl/statusccl/tenant_status_test.go index 37e3c90b0a09..15644534cebf 100644 --- a/pkg/ccl/serverccl/statusccl/tenant_status_test.go +++ b/pkg/ccl/serverccl/statusccl/tenant_status_test.go @@ -47,7 +47,9 @@ import ( func TestTenantStatusAPI(t *testing.T) { defer leaktest.AfterTest(t)() - defer log.Scope(t).Close(t) + s := log.ScopeWithoutShowLogs(t) + defer s.Close(t) + defer s.SetupSingleFileLogging()() // The liveness session might expire before the stress race can finish. skip.UnderStressRace(t, "expensive tests") @@ -156,6 +158,14 @@ func testTenantLogs(ctx context.Context, t *testing.T, helper serverccl.TenantTe }) require.NoError(t, err) require.NotEmpty(t, logsFileResp.Entries) + systemTenantIDStr := fmt.Sprintf("%d", roachpb.SystemTenantID.InternalValue) + for _, resp := range []*serverpb.LogEntriesResponse{logsResp, logsFileResp} { + for _, entry := range resp.Entries { + // Logs belonging to the system tenant ID should never show up in a response + // provided by an app tenant server. Verify this filtering. + require.NotEqual(t, entry.TenantID, systemTenantIDStr) + } + } } func TestTenantCannotSeeNonTenantStats(t *testing.T) { diff --git a/pkg/server/status.go b/pkg/server/status.go index f71b82a3eb41..c85c821a07ba 100644 --- a/pkg/server/status.go +++ b/pkg/server/status.go @@ -1239,6 +1239,13 @@ func (s *statusServer) LogFile( if err != nil { return nil, serverError(ctx, err) } + // Unless we're the system tenant, clients should only be able + // to view logs that pertain to their own tenant. Set the filter + // accordingly. + tenantIDFilter := "" + if s.rpcCtx.TenantID != roachpb.SystemTenantID { + tenantIDFilter = s.rpcCtx.TenantID.String() + } for { var entry logpb.Entry if err := decoder.Decode(&entry); err != nil { @@ -1247,6 +1254,9 @@ func (s *statusServer) LogFile( } return nil, serverError(ctx, err) } + if tenantIDFilter != "" && entry.TenantID != tenantIDFilter { + continue + } resp.Entries = append(resp.Entries, entry) } @@ -1354,7 +1364,22 @@ func (s *statusServer) Logs( return nil, serverError(ctx, err) } - return &serverpb.LogEntriesResponse{Entries: entries}, nil + out := &serverpb.LogEntriesResponse{} + // Unless we're the system tenant, clients should only be able + // to view logs that pertain to their own tenant. Set the filter + // accordingly. + tenantIDFilter := "" + if s.rpcCtx.TenantID != roachpb.SystemTenantID { + tenantIDFilter = s.rpcCtx.TenantID.String() + } + for _, e := range entries { + if tenantIDFilter != "" && e.TenantID != tenantIDFilter { + continue + } + out.Entries = append(out.Entries, e) + } + + return out, nil } // Stacks returns goroutine or thread stack traces. diff --git a/pkg/server/status_test.go b/pkg/server/status_test.go index ca65b3474dd1..5e2d80bd4c8f 100644 --- a/pkg/server/status_test.go +++ b/pkg/server/status_test.go @@ -655,6 +655,124 @@ func TestStatusLocalLogs(t *testing.T) { } } +// TestStatusLocalLogsTenantFilter checks to ensure that local/logfiles, +// local/logfiles/{filename} and local/log function correctly filter +// logs by tenant ID. +func TestStatusLocalLogsTenantFilter(t *testing.T) { + defer leaktest.AfterTest(t)() + if log.V(3) { + skip.IgnoreLint(t, "Test only works with low verbosity levels") + } + + s := log.ScopeWithoutShowLogs(t) + defer s.Close(t) + + // This test cares about the number of output files. Ensure + // there's just one. + defer s.SetupSingleFileLogging()() + + ts := startServer(t) + defer ts.Stopper().Stop(context.Background()) + + ctxSysTenant := context.Background() + ctxSysTenant = context.WithValue(ctxSysTenant, log.ServerIdentificationContextKey{}, &idProvider{ + tenantID: roachpb.SystemTenantID, + clusterID: &base.ClusterIDContainer{}, + serverID: &base.NodeIDContainer{}, + }) + appTenantID := roachpb.MustMakeTenantID(uint64(2)) + ctxAppTenant := context.Background() + ctxAppTenant = context.WithValue(ctxAppTenant, log.ServerIdentificationContextKey{}, &idProvider{ + tenantID: appTenantID, + clusterID: &base.ClusterIDContainer{}, + serverID: &base.NodeIDContainer{}, + }) + + // Log an error of each main type which we expect to be able to retrieve. + // The resolution of our log timestamps is such that it's possible to get + // two subsequent log messages with the same timestamp. This test will fail + // when that occurs. By adding a small sleep in here after each timestamp to + // ensures this isn't the case and that the log filtering doesn't filter out + // the log entires we're looking for. The value of 20 μs was chosen because + // the log timestamps have a fidelity of 10 μs and thus doubling that should + // be a sufficient buffer. + // See util/log/clog.go formatHeader() for more details. + const sleepBuffer = time.Microsecond * 20 + log.Errorf(ctxSysTenant, "system tenant msg 1") + time.Sleep(sleepBuffer) + log.Errorf(ctxAppTenant, "app tenant msg 1") + time.Sleep(sleepBuffer) + log.Warningf(ctxSysTenant, "system tenant msg 2") + time.Sleep(sleepBuffer) + log.Warningf(ctxAppTenant, "app tenant msg 2") + time.Sleep(sleepBuffer) + log.Infof(ctxSysTenant, "system tenant msg 3") + time.Sleep(sleepBuffer) + log.Infof(ctxAppTenant, "app tenant msg 3") + timestampEnd := timeutil.Now().UnixNano() + + var listFilesResp serverpb.LogFilesListResponse + if err := getStatusJSONProto(ts, "logfiles/local", &listFilesResp); err != nil { + t.Fatal(err) + } + require.Lenf(t, listFilesResp.Files, 1, "expected 1 log files; got %d", len(listFilesResp.Files)) + + testCases := []struct { + name string + tenantID roachpb.TenantID + }{ + { + name: "logs for system tenant does not apply filter", + tenantID: roachpb.SystemTenantID, + }, + { + name: "logs for app tenant applies tenant ID filter", + tenantID: appTenantID, + }, + } + + for _, testCase := range testCases { + // Non-system tenant servers filter to the tenant that they belong to. + // Set the server tenant ID for this test case. + ts.rpcContext.TenantID = testCase.tenantID + + var logfilesResp serverpb.LogEntriesResponse + if err := getStatusJSONProto(ts, "logfiles/local/"+listFilesResp.Files[0].Name, &logfilesResp); err != nil { + t.Fatal(err) + } + var logsResp serverpb.LogEntriesResponse + if err := getStatusJSONProto(ts, fmt.Sprintf("logs/local?end_time=%d", timestampEnd), &logsResp); err != nil { + t.Fatal(err) + } + + // Run the same set of assertions against both responses, as they are both expected + // to contain the log entries we're looking for. + for _, response := range []serverpb.LogEntriesResponse{logfilesResp, logsResp} { + sysTenantFound, appTenantFound := false, false + for _, logEntry := range response.Entries { + if !strings.HasSuffix(logEntry.File, "status_test.go") { + continue + } + + if testCase.tenantID != roachpb.SystemTenantID { + require.Equal(t, logEntry.TenantID, testCase.tenantID.String()) + } else { + // Logs use the literal system tenant ID when tagging. + if logEntry.TenantID == fmt.Sprintf("%d", roachpb.SystemTenantID.InternalValue) { + sysTenantFound = true + } else if logEntry.TenantID == appTenantID.String() { + appTenantFound = true + } + } + } + if testCase.tenantID == roachpb.SystemTenantID { + require.True(t, sysTenantFound) + require.True(t, appTenantFound) + } + } + } +} + // TestStatusLogRedaction checks that the log file retrieval RPCs // honor the redaction flags. func TestStatusLogRedaction(t *testing.T) {