From ebbc531f217fa306822dea8e59db04f76dbd6bf3 Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Mon, 11 Jan 2021 16:13:51 +0100 Subject: [PATCH] util/log: conditionally include the server IDs on every line For CC security logging we want the ability to route the logging events from the files where they are written into a centralized logging collector. However this routing is done line-by-line. To enable log aggregation across multiple clusters, or multiple nodes, we need to disambiguate which log entries come from which cluster and which node. This patch accommodates this requirement by adding the cluster ID and, for tenant servers, the tenant and SQL instance ID, on every output line when the env var `COCKROACH_ALWAYS_LOG_SERVER_IDS` is set to a true-ish value. Note: this feature is unneeded in v21.1 because in that version JSON logging is available and that already includes the server identity bits. Release note: None --- pkg/ccl/backupccl/backup_test.go | 21 +++++ .../backupccl/create_scheduled_backup_test.go | 11 +++ .../interactive_tests/test_force_auth_log.tcl | 47 ++++++++++ pkg/server/testserver.go | 7 ++ pkg/sql/logictest/logic.go | 11 ++- pkg/sql/telemetry_test.go | 3 + pkg/util/log/clog.go | 92 ++++++++++++++++--- pkg/util/log/flags.go | 10 ++ pkg/util/log/sync_buffer.go | 6 +- pkg/util/log/test_log_scope.go | 4 + 10 files changed, 193 insertions(+), 19 deletions(-) create mode 100644 pkg/cli/interactive_tests/test_force_auth_log.tcl diff --git a/pkg/ccl/backupccl/backup_test.go b/pkg/ccl/backupccl/backup_test.go index a449d824c891..43db93a09452 100644 --- a/pkg/ccl/backupccl/backup_test.go +++ b/pkg/ccl/backupccl/backup_test.go @@ -5808,6 +5808,8 @@ func TestProtectedTimestampsFailDueToLimits(t *testing.T) { // Ensure that backing up and restoring tenants succeeds. func TestBackupRestoreTenant(t *testing.T) { defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + defer jobs.TestingSetAdoptAndCancelIntervals(100*time.Millisecond, 100*time.Millisecond)() const numAccounts = 1 @@ -5824,11 +5826,16 @@ func TestBackupRestoreTenant(t *testing.T) { tenant10 := sqlutils.MakeSQLRunner(conn10) tenant10.Exec(t, `CREATE DATABASE foo; CREATE TABLE foo.bar(i int primary key); INSERT INTO foo.bar VALUES (110), (210)`) + // Prevent a logging assertion that the server ID is initialized multiple times. + log.TestingClearServerIdentifiers() + _, conn11 := serverutils.StartTenant(t, srv, base.TestTenantArgs{TenantID: roachpb.MakeTenantID(11)}) defer conn11.Close() tenant11 := sqlutils.MakeSQLRunner(conn11) tenant11.Exec(t, `CREATE DATABASE foo; CREATE TABLE foo.baz(i int primary key); INSERT INTO foo.baz VALUES (111), (211)`) + log.TestingClearServerIdentifiers() + _, conn20 := serverutils.StartTenant(t, srv, base.TestTenantArgs{TenantID: roachpb.MakeTenantID(20)}) defer conn20.Close() tenant20 := sqlutils.MakeSQLRunner(conn20) @@ -5880,6 +5887,8 @@ func TestBackupRestoreTenant(t *testing.T) { [][]string{{`10`, `true`, `{"id": "10", "state": "ACTIVE"}`}}, ) + log.TestingClearServerIdentifiers() + ten10Stopper := stop.NewStopper() _, restoreConn10 := serverutils.StartTenant( t, restoreTC.Server(0), base.TestTenantArgs{ @@ -5914,6 +5923,8 @@ func TestBackupRestoreTenant(t *testing.T) { [][]string{{`10`, `true`, `{"id": "10", "state": "ACTIVE"}`}}, ) + log.TestingClearServerIdentifiers() + _, restoreConn10 = serverutils.StartTenant( t, restoreTC.Server(0), base.TestTenantArgs{TenantID: roachpb.MakeTenantID(10), Existing: true}, ) @@ -5938,6 +5949,8 @@ func TestBackupRestoreTenant(t *testing.T) { [][]string{{`10`, `true`, `{"id": "10", "state": "ACTIVE"}`}}, ) + log.TestingClearServerIdentifiers() + _, restoreConn10 := serverutils.StartTenant( t, restoreTC.Server(0), base.TestTenantArgs{TenantID: roachpb.MakeTenantID(10), Existing: true}, ) @@ -5967,6 +5980,8 @@ func TestBackupRestoreTenant(t *testing.T) { }, ) + log.TestingClearServerIdentifiers() + _, restoreConn10 := serverutils.StartTenant( t, restoreTC.Server(0), base.TestTenantArgs{TenantID: roachpb.MakeTenantID(10), Existing: true}, ) @@ -5976,6 +5991,8 @@ func TestBackupRestoreTenant(t *testing.T) { restoreTenant10.CheckQueryResults(t, `select * from foo.bar`, tenant10.QueryStr(t, `select * from foo.bar`)) restoreTenant10.CheckQueryResults(t, `select * from foo.bar2`, tenant10.QueryStr(t, `select * from foo.bar2`)) + log.TestingClearServerIdentifiers() + _, restoreConn11 := serverutils.StartTenant( t, restoreTC.Server(0), base.TestTenantArgs{TenantID: roachpb.MakeTenantID(11), Existing: true}, ) @@ -5994,6 +6011,8 @@ func TestBackupRestoreTenant(t *testing.T) { restoreDB.Exec(t, `RESTORE TENANT 10 FROM 'nodelocal://1/t10' AS OF SYSTEM TIME `+ts1) + log.TestingClearServerIdentifiers() + _, restoreConn10 := serverutils.StartTenant( t, restoreTC.Server(0), base.TestTenantArgs{TenantID: roachpb.MakeTenantID(10), Existing: true}, ) @@ -6012,6 +6031,8 @@ func TestBackupRestoreTenant(t *testing.T) { restoreDB.Exec(t, `RESTORE TENANT 20 FROM 'nodelocal://1/t20'`) + log.TestingClearServerIdentifiers() + _, restoreConn20 := serverutils.StartTenant( t, restoreTC.Server(0), base.TestTenantArgs{TenantID: roachpb.MakeTenantID(20), Existing: true}, ) diff --git a/pkg/ccl/backupccl/create_scheduled_backup_test.go b/pkg/ccl/backupccl/create_scheduled_backup_test.go index 7cc89e056cca..b854bf1001f2 100644 --- a/pkg/ccl/backupccl/create_scheduled_backup_test.go +++ b/pkg/ccl/backupccl/create_scheduled_backup_test.go @@ -35,6 +35,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/timeutil" pbtypes "github.com/gogo/protobuf/types" "github.com/stretchr/testify/require" @@ -172,6 +173,8 @@ func (t userType) String() string { // itself with the actual scheduling and the execution of those backups. func TestSerializesScheduledBackupExecutionArgs(t *testing.T) { defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + th, cleanup := newTestHelper(t) defer cleanup() @@ -429,6 +432,8 @@ func TestSerializesScheduledBackupExecutionArgs(t *testing.T) { func TestScheduleBackup(t *testing.T) { defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + th, cleanup := newTestHelper(t) defer cleanup() @@ -570,6 +575,8 @@ INSERT INTO t1 values (-1), (10), (-100); func TestCreateBackupScheduleRequiresAdminRole(t *testing.T) { defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + th, cleanup := newTestHelper(t) defer cleanup() @@ -591,6 +598,8 @@ func TestCreateBackupScheduleRequiresAdminRole(t *testing.T) { func TestCreateBackupScheduleCollectionOverwrite(t *testing.T) { defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + th, cleanup := newTestHelper(t) defer cleanup() @@ -611,6 +620,8 @@ func TestCreateBackupScheduleCollectionOverwrite(t *testing.T) { func TestCreateBackupScheduleInExplicitTxnRollback(t *testing.T) { defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + th, cleanup := newTestHelper(t) defer cleanup() diff --git a/pkg/cli/interactive_tests/test_force_auth_log.tcl b/pkg/cli/interactive_tests/test_force_auth_log.tcl new file mode 100644 index 000000000000..c23d46c8260c --- /dev/null +++ b/pkg/cli/interactive_tests/test_force_auth_log.tcl @@ -0,0 +1,47 @@ +#! /usr/bin/env expect -f + +source [file join [file dirname $argv0] common.tcl] + +set ::env(COCKROACH_INSECURE) "false" +set ::env(COCKROACH_HOST) "localhost" +set certs_dir "/certs" + +set ::env(COCKROACH_ALWAYS_LOG_SERVER_IDS) 1 +set ::env(COCKROACH_ALWAYS_LOG_AUTHN_EVENTS) 1 + +proc start_secure_server {argv certs_dir extra} { + report "BEGIN START SECURE SERVER" + system "$argv start-single-node --host=localhost --socket-dir=. --certs-dir=$certs_dir --pid-file=server_pid -s=path=logs/db --background $extra >>expect-cmd.log 2>&1; + $argv sql --certs-dir=$certs_dir -e 'select 1'" + report "END START SECURE SERVER" +} + +proc stop_secure_server {argv certs_dir} { + report "BEGIN STOP SECURE SERVER" + system "$argv quit --certs-dir=$certs_dir" + report "END STOP SECURE SERVER" +} + + +start_secure_server $argv $certs_dir "" + +set logfile logs/db/logs/cockroach-auth.log + +# run a client command, so we have at least one authn event in the log. +system "$argv sql -e 'create user someuser' --certs-dir=$certs_dir" +system "$argv sql -e 'select 1' --user someuser --certs-dir=$certs_dir 0 { + if buf.Len() > 0 { + buf.WriteByte(',') + } + buf.WriteString("clusterID=") + buf.WriteString(clusterID) + } + if tenantID := logging.tenantID.Get(); len(tenantID) > 0 { + if buf.Len() > 0 { + buf.WriteByte(',') + } + buf.WriteString("tenantID=") + buf.WriteString(tenantID) + } + if sqlInstanceID := atomic.LoadInt32(&logging.sqlInstanceID); sqlInstanceID != 0 { + if buf.Len() > 0 { + buf.WriteByte(',') + } + buf.WriteString("instanceID=") + buf.WriteString(strconv.Itoa(int(sqlInstanceID))) + } + entry.Tags = buf.String() + } + // Mark the logger as active, so that further configuration changes // are disabled. See IsActive() and its callers for details. setActive() diff --git a/pkg/util/log/flags.go b/pkg/util/log/flags.go index afcd72139e8f..aef46add5852 100644 --- a/pkg/util/log/flags.go +++ b/pkg/util/log/flags.go @@ -13,6 +13,7 @@ package log import ( "context" "flag" + "sync/atomic" "github.com/cockroachdb/cockroach/pkg/cli/cliflags" "github.com/cockroachdb/cockroach/pkg/util/log/logflags" @@ -36,6 +37,15 @@ func init() { logflags.LogFileVerbosityThresholdName, "minimum verbosity of messages written to the log file") } +// TestingClearServerIdentifiers clears the server identity from the +// logging system. This is for use in tests that start multiple +// servers with conflicting identities subsequently. +func TestingClearServerIdentifiers() { + logging.clusterID.Set("") + logging.tenantID.Set("") + atomic.StoreInt32(&logging.sqlInstanceID, 0) +} + // IsActive returns true iff the main logger already has some events // logged, or some secondary logger was created with configuration // taken from the main logger. diff --git a/pkg/util/log/sync_buffer.go b/pkg/util/log/sync_buffer.go index c8fcdb1b773b..1e0f7bc19dec 100644 --- a/pkg/util/log/sync_buffer.go +++ b/pkg/util/log/sync_buffer.go @@ -253,11 +253,9 @@ func (l *loggerT) initializeNewOutputFile( l.makeStartLine("arguments: %s", os.Args), ) - logging.mu.Lock() - if logging.mu.clusterID != "" { - messages = append(messages, l.makeStartLine("clusterID: %s", logging.mu.clusterID)) + if clusterID := logging.clusterID.Get(); len(clusterID) > 0 { + messages = append(messages, l.makeStartLine("clusterID: %s", clusterID)) } - logging.mu.Unlock() // Including a non-ascii character in the first 1024 bytes of the log helps // viewers that attempt to guess the character encoding. diff --git a/pkg/util/log/test_log_scope.go b/pkg/util/log/test_log_scope.go index 139600b8ee7c..b251bc20bc4f 100644 --- a/pkg/util/log/test_log_scope.go +++ b/pkg/util/log/test_log_scope.go @@ -151,6 +151,10 @@ func ScopeWithoutShowLogs(t tShim) (sc *TestLogScope) { // they only go to files. mainLog.stderrThreshold.set(Severity_NONE) + // Clear the server identifiers to prevent double initialization + // panics. + TestingClearServerIdentifiers() + t.Logf("test logs captured to: %s", tempDir) return sc }