From ab5c60d625e42d38bdf5ff922a65d32de43de45c Mon Sep 17 00:00:00 2001 From: kousiknath Date: Mon, 1 Apr 2024 16:14:02 +0530 Subject: [PATCH] cli: adding column redaction to selected system and crdb_internal tables DB dump tables contain un-redacted information which might contain crucial customer data like hostname / ip addresses etc. Some of these table data is served to debug zip through APIs. But many self hosted customers would not like to pass these information out of their network. This PR solves this issue and introduces un-redacted query support for few of such tables. Epic: https://cockroachlabs.atlassian.net/browse/CRDB-19369 Release note: None --- pkg/cli/zip_table_registry.go | 120 ++++++++++++++++++++++------------ 1 file changed, 77 insertions(+), 43 deletions(-) diff --git a/pkg/cli/zip_table_registry.go b/pkg/cli/zip_table_registry.go index 4d1c0f5af53f..97401fa66d63 100644 --- a/pkg/cli/zip_table_registry.go +++ b/pkg/cli/zip_table_registry.go @@ -454,28 +454,32 @@ var zipInternalTablesPerCluster = DebugZipTableRegistry{ "crdb_internal.kv_node_status": { // `env` column can contain sensitive node environment variable values, // such as AWS_ACCESS_KEY. - nonSensitiveCols: NonSensitiveColumns{ - "node_id", - "network", - "address", - "attrs", - "locality", - "server_version", - "go_version", - "tag", - "time", - "revision", - "cgo_compiler", - "platform", - "distribution", - "type", - "dependencies", - "started_at", - "updated_at", - "metrics", - "args", - "activity", - }, + // Some fields are marked as `` because we want to redact hostname, ip address and other sensitive fields + // in the db dump files contained in debugzip + customQueryRedacted: `SELECT + "node_id", + "network", + '' as address, + "attrs", + "locality", + "server_version", + "go_version", + "tag", + "time", + "revision", + "cgo_compiler", + "platform", + "distribution", + "type", + "dependencies", + "started_at", + "updated_at", + "metrics", + '' as args, + '' as env, + "activity" + FROM crdb_internal.kv_node_status + `, }, "crdb_internal.kv_store_status": { nonSensitiveCols: NonSensitiveColumns{ @@ -672,10 +676,24 @@ var zipInternalTablesPerNode = DebugZipTableRegistry{ "crdb_internal.gossip_nodes": { // `cluster_name` is hashed as we only care to see whether values are // identical across nodes. + // Some fields are marked as `` because we want to redact hostname, ip address and other sensitive fields + // in the db dump files contained in debugzip customQueryRedacted: `SELECT - node_id, network, address, advertise_address, sql_network, sql_address, - advertise_sql_address, attrs, locality, fnv32(cluster_name) as cluster_name, - server_version, build_tag, started_at, is_live, ranges, leases + node_id, + network, + '' as address, + '' as advertise_address, + sql_network, + '' as sql_address, + '' as advertise_sql_address, + attrs, + '' as locality, + fnv32(cluster_name) as cluster_name, + server_version, + build_tag, + started_at, + is_live, + ranges, leases FROM crdb_internal.gossip_nodes`, }, "crdb_internal.leases": { @@ -801,12 +819,24 @@ var zipInternalTablesPerNode = DebugZipTableRegistry{ }, }, "crdb_internal.node_runtime_info": { - nonSensitiveCols: NonSensitiveColumns{ - "node_id", - "component", - "field", - "value", - }, + // Some fields are marked as `` because we want to redact hostname, ip address and other sensitive fields + // in the db dump files contained in debugzip + customQueryRedacted: `SELECT * FROM ( + SELECT + "node_id", + "component", + "field", + "value" + FROM crdb_internal.node_runtime_info + WHERE field NOT IN ('URL', 'Host', 'URI') UNION + SELECT + "node_id", + "component", + "field", + '' AS value + FROM crdb_internal.node_runtime_info + WHERE field IN ('URL', 'Host', 'URI') + ) ORDER BY node_id`, }, "crdb_internal.node_sessions": { // `client_address` contains unredacted client IP addresses. @@ -1253,15 +1283,15 @@ var zipSystemTables = DebugZipTableRegistry{ "system.settings": { customQueryUnredacted: `SELECT * FROM system.settings`, customQueryRedacted: `SELECT * FROM ( - SELECT * - FROM system.settings - WHERE "valueType" <> 's' + SELECT * + FROM system.settings + WHERE "valueType" <> 's' ) UNION ( - SELECT name, '' as value, - "lastUpdated", - "valueType" - FROM system.settings - WHERE "valueType" = 's' + SELECT name, '' as value, + "lastUpdated", + "valueType" + FROM system.settings + WHERE "valueType" = 's' )`, }, "system.span_configurations": { @@ -1278,12 +1308,16 @@ var zipSystemTables = DebugZipTableRegistry{ }, }, "system.sql_instances": { - nonSensitiveCols: NonSensitiveColumns{ + // Some fields are marked as `` because we want to redact hostname, ip address and other sensitive fields + // in the db dump files contained in debugzip + customQueryRedacted: `SELECT "id", - "addr", + '' as addr, "session_id", - "locality", - }, + '' as locality, + '' as sql_addr + FROM system.sql_instances + `, }, // system.sql_stats_cardinality shows row counts for all of the system tables related to the SQL Stats // system, grouped by aggregated timestamp. None of this information is sensitive. It aids in escalations