From f0c6f0a37d2c7120dd3b61bf4092774ca408ed1c Mon Sep 17 00:00:00 2001 From: kousiknath Date: Mon, 1 Apr 2024 16:14:02 +0530 Subject: [PATCH] cli: adding column redaction to selected system and crdb_internal tables DB dump tables contain un-redacted information which might contain crucial customer data like hostname / ip addresses etc. Some of these table data is served to debug zip through APIs. But many self hosted customers would not like to pass these information out of their network. This PR solves this issue and introduces un-redacted query support for few of such tables. Epic: https://cockroachlabs.atlassian.net/browse/CRDB-19369 Release note: None --- pkg/cli/zip_table_registry.go | 139 +++++++++++++++++++++------------- 1 file changed, 87 insertions(+), 52 deletions(-) diff --git a/pkg/cli/zip_table_registry.go b/pkg/cli/zip_table_registry.go index 14abe9c18ced..2c8e68878943 100644 --- a/pkg/cli/zip_table_registry.go +++ b/pkg/cli/zip_table_registry.go @@ -500,28 +500,32 @@ var zipInternalTablesPerCluster = DebugZipTableRegistry{ "crdb_internal.kv_node_status": { // `env` column can contain sensitive node environment variable values, // such as AWS_ACCESS_KEY. - nonSensitiveCols: NonSensitiveColumns{ - "node_id", - "network", - "address", - "attrs", - "locality", - "server_version", - "go_version", - "tag", - "time", - "revision", - "cgo_compiler", - "platform", - "distribution", - "type", - "dependencies", - "started_at", - "updated_at", - "metrics", - "args", - "activity", - }, + // Some fields are marked as `` because we want to redact hostname, ip address and other sensitive fields + // in the db dump files contained in debugzip + customQueryRedacted: `SELECT + "node_id", + "network", + '' as address, + "attrs", + "locality", + "server_version", + "go_version", + "tag", + "time", + "revision", + "cgo_compiler", + "platform", + "distribution", + "type", + "dependencies", + "started_at", + "updated_at", + "metrics", + '' as args, + '' as env, + "activity" + FROM crdb_internal.kv_node_status + `, }, "crdb_internal.kv_store_status": { nonSensitiveCols: NonSensitiveColumns{ @@ -718,10 +722,25 @@ var zipInternalTablesPerNode = DebugZipTableRegistry{ "crdb_internal.gossip_nodes": { // `cluster_name` is hashed as we only care to see whether values are // identical across nodes. + // Some fields are marked as `` because we want to redact hostname, ip address and other sensitive fields + // in the db dump files contained in debugzip customQueryRedacted: `SELECT - node_id, network, address, advertise_address, sql_network, sql_address, - advertise_sql_address, attrs, locality, fnv32(cluster_name) as cluster_name, - server_version, build_tag, started_at, is_live, ranges, leases + node_id, + network, + '' as address, + '' as advertise_address, + sql_network, + '' as sql_address, + '' as advertise_sql_address, + attrs, + '' as locality, + fnv32(cluster_name) as cluster_name, + server_version, + build_tag, + started_at, + is_live, + ranges, + leases FROM crdb_internal.gossip_nodes`, }, "crdb_internal.leases": { @@ -848,12 +867,24 @@ var zipInternalTablesPerNode = DebugZipTableRegistry{ }, }, "crdb_internal.node_runtime_info": { - nonSensitiveCols: NonSensitiveColumns{ - "node_id", - "component", - "field", - "value", - }, + // Some fields are marked as `` because we want to redact hostname, ip address and other sensitive fields + // in the db dump files contained in debugzip + customQueryRedacted: `SELECT * FROM ( + SELECT + "node_id", + "component", + "field", + "value" + FROM crdb_internal.node_runtime_info + WHERE field NOT IN ('URL', 'Host', 'URI') UNION + SELECT + "node_id", + "component", + "field", + '' AS value + FROM crdb_internal.node_runtime_info + WHERE field IN ('URL', 'Host', 'URI') + ) ORDER BY node_id`, }, "crdb_internal.node_sessions": { // `client_address` contains unredacted client IP addresses. @@ -1074,17 +1105,17 @@ var zipInternalTablesPerNode = DebugZipTableRegistry{ /** * NB: The following system tables explicitly forbidden: - * - system.users: avoid downloading passwords. - * - system.web_sessions: avoid downloading active session tokens. - * - system.join_tokens: avoid downloading secret join keys. - * - system.comments: avoid downloading noise from SQL schema. - * - system.ui: avoid downloading noise from UI customizations. - * - system.zones: the contents of crdb_internal.zones is easier to use. - * - system.statement_bundle_chunks: avoid downloading a large table that's + * - system.users: avoid downloading passwords. + * - system.web_sessions: avoid downloading active session tokens. + * - system.join_tokens: avoid downloading secret join keys. + * - system.comments: avoid downloading noise from SQL schema. + * - system.ui: avoid downloading noise from UI customizations. + * - system.zones: the contents of crdb_internal.zones is easier to use. + * - system.statement_bundle_chunks: avoid downloading a large table that's * hard to interpret currently. - * - system.statement_statistics: historical data, usually too much to + * - system.statement_statistics: historical data, usually too much to * download. - * - system.transaction_statistics: ditto + * - system.transaction_statistics: ditto * - system.statement_activity: ditto * - system.transaction_activity: ditto * @@ -1304,15 +1335,15 @@ var zipSystemTables = DebugZipTableRegistry{ "system.settings": { customQueryUnredacted: `SELECT * FROM system.settings`, customQueryRedacted: `SELECT * FROM ( - SELECT * - FROM system.settings - WHERE "valueType" <> 's' + SELECT * + FROM system.settings + WHERE "valueType" <> 's' ) UNION ( - SELECT name, '' as value, - "lastUpdated", - "valueType" - FROM system.settings - WHERE "valueType" = 's' + SELECT name, '' as value, + "lastUpdated", + "valueType" + FROM system.settings + WHERE "valueType" = 's' )`, }, "system.span_configurations": { @@ -1329,12 +1360,16 @@ var zipSystemTables = DebugZipTableRegistry{ }, }, "system.sql_instances": { - nonSensitiveCols: NonSensitiveColumns{ + // Some fields are marked as `` because we want to redact hostname, ip address and other sensitive fields + // in the db dump files contained in debugzip + customQueryRedacted: `SELECT "id", - "addr", + '' as addr, "session_id", - "locality", - }, + '' as locality, + '' as sql_addr + FROM system.sql_instances + `, }, // system.sql_stats_cardinality shows row counts for all of the system tables related to the SQL Stats // system, grouped by aggregated timestamp. None of this information is sensitive. It aids in escalations