diff --git a/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant b/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant index eed665b125a6..5f8952e72b4c 100644 --- a/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant +++ b/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant @@ -194,10 +194,10 @@ SELECT * FROM crdb_internal.leases WHERE node_id < 0 ---- node_id table_id name parent_id expiration deleted -query ITTTTTIIITRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRBBTTTTT colnames +query ITTTTTIIITRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRBBTTTTTRRRRR colnames SELECT * FROM crdb_internal.node_statement_statistics WHERE node_id < 0 ---- -node_id application_name flags statement_id key anonymized count first_attempt_count max_retries last_error rows_avg rows_var idle_lat_avg idle_lat_var parse_lat_avg parse_lat_var plan_lat_avg plan_lat_var run_lat_avg run_lat_var service_lat_avg service_lat_var overhead_lat_avg overhead_lat_var bytes_read_avg bytes_read_var rows_read_avg rows_read_var rows_written_avg rows_written_var network_bytes_avg network_bytes_var network_msgs_avg network_msgs_var max_mem_usage_avg max_mem_usage_var max_disk_usage_avg max_disk_usage_var contention_time_avg contention_time_var cpu_sql_nanos_avg cpu_sql_nanos_var implicit_txn full_scan sample_plan database_name exec_node_ids txn_fingerprint_id index_recommendations +node_id application_name flags statement_id key anonymized count first_attempt_count max_retries last_error rows_avg rows_var idle_lat_avg idle_lat_var parse_lat_avg parse_lat_var plan_lat_avg plan_lat_var run_lat_avg run_lat_var service_lat_avg service_lat_var overhead_lat_avg overhead_lat_var bytes_read_avg bytes_read_var rows_read_avg rows_read_var rows_written_avg rows_written_var network_bytes_avg network_bytes_var network_msgs_avg network_msgs_var max_mem_usage_avg max_mem_usage_var max_disk_usage_avg max_disk_usage_var contention_time_avg contention_time_var cpu_sql_nanos_avg cpu_sql_nanos_var implicit_txn full_scan sample_plan database_name exec_node_ids txn_fingerprint_id index_recommendations latency_seconds_min latency_seconds_max latency_seconds_p50 latency_seconds_p90 latency_seconds_p99 query ITTTIIRRRRRRRRRRRRRRRRRRRRRR colnames SELECT * FROM crdb_internal.node_transaction_statistics WHERE node_id < 0 diff --git a/pkg/cli/zip_table_registry.go b/pkg/cli/zip_table_registry.go index 4b28bef56bc9..efcff136c442 100644 --- a/pkg/cli/zip_table_registry.go +++ b/pkg/cli/zip_table_registry.go @@ -755,6 +755,11 @@ var zipInternalTablesPerNode = DebugZipTableRegistry{ "exec_node_ids", "txn_fingerprint_id", "index_recommendations", + "latency_seconds_min", + "latency_seconds_max", + "latency_seconds_p50", + "latency_seconds_p90", + "latency_seconds_p99", }, }, "crdb_internal.node_transaction_statistics": { diff --git a/pkg/sql/appstatspb/app_stats.go b/pkg/sql/appstatspb/app_stats.go index 6c9a277d80b4..74a1a058d6e7 100644 --- a/pkg/sql/appstatspb/app_stats.go +++ b/pkg/sql/appstatspb/app_stats.go @@ -161,6 +161,7 @@ func (s *StatementStatistics) Add(other *StatementStatistics) { s.Indexes = util.CombineUniqueString(s.Indexes, other.Indexes) s.ExecStats.Add(other.ExecStats) + s.LatencyInfo.Add(other.LatencyInfo) if other.SensitiveInfo.LastErr != "" { s.SensitiveInfo.LastErr = other.SensitiveInfo.LastErr @@ -217,3 +218,20 @@ func (s *ExecStats) Add(other ExecStats) { s.Count += other.Count } + +// Add combines other into this LatencyInfo. +func (s *LatencyInfo) Add(other LatencyInfo) { + // Use the latest non-zero value. + if other.P50 != 0 { + s.P50 = other.P50 + s.P90 = other.P90 + s.P99 = other.P99 + } + + if s.Min == 0 || other.Min < s.Min { + s.Min = other.Min + } + if other.Max > s.Max { + s.Max = other.Max + } +} diff --git a/pkg/sql/appstatspb/app_stats.proto b/pkg/sql/appstatspb/app_stats.proto index d83cac1691c6..90d766e9587a 100644 --- a/pkg/sql/appstatspb/app_stats.proto +++ b/pkg/sql/appstatspb/app_stats.proto @@ -108,18 +108,21 @@ message StatementStatistics { // Nodes is the ordered list of nodes ids on which the statement was executed. repeated int64 nodes = 24; - // plan_gists is the list of a compressed version of plan that can be converted (lossily) + // PlanGists is the list of a compressed version of plan that can be converted (lossily) // back into a logical plan. // Each statement contain only one plan gist, but the same statement fingerprint id // can contain more than one value. repeated string plan_gists = 26; - // index_recommendations is the list of index recommendations generated for the statement fingerprint. + // IndexRecommendations is the list of index recommendations generated for the statement fingerprint. repeated string index_recommendations = 27; - // indexes is the list of indexes used by the particular plan when executing the statement. + // Indexes is the list of indexes used by the particular plan when executing the statement. repeated string indexes = 30; + // LatencyInfo is the information about latency, such min, max, p50, p90 and p99. + optional LatencyInfo latency_info = 31 [(gogoproto.nullable) = false]; + // Note: be sure to update `sql/app_stats.go` when adding/removing fields here! reserved 13, 14, 17, 18, 19, 20; @@ -338,3 +341,21 @@ message ExecStats { // Note: be sure to update `sql/app_stats.go` when adding/removing fields // here! } + +// LatencyInfo contains more details about the latency. +message LatencyInfo { + // Min is the minimum time in seconds spent executing the fingerprint. + optional double min = 1 [(gogoproto.nullable) = false]; + + // Max is the maximum time in seconds spent executing the fingerprint. + optional double max = 2 [(gogoproto.nullable) = false]; + + // P50 is the 50 Percentile in seconds for the fingerprint. + optional double p50 = 3 [(gogoproto.nullable) = false]; + + // P90 is the 90 Percentile in seconds for the fingerprint. + optional double p90 = 4 [(gogoproto.nullable) = false]; + + // P99 is the 99 Percentile in seconds for the fingerprint. + optional double p99 = 5 [(gogoproto.nullable) = false]; +} diff --git a/pkg/sql/conn_executor.go b/pkg/sql/conn_executor.go index 07c0086d9b93..37eccd6f72c6 100644 --- a/pkg/sql/conn_executor.go +++ b/pkg/sql/conn_executor.go @@ -374,6 +374,7 @@ func NewServer(cfg *ExecutorConfig, pool *mon.BytesMonitor) *Server { pool, nil, /* reportedProvider */ cfg.SQLStatsTestingKnobs, + insightsProvider.LatencyInformation(), ) reportedSQLStatsController := reportedSQLStats.GetController(cfg.SQLStatusServer) memSQLStats := sslocal.New( @@ -386,6 +387,7 @@ func NewServer(cfg *ExecutorConfig, pool *mon.BytesMonitor) *Server { pool, reportedSQLStats, cfg.SQLStatsTestingKnobs, + insightsProvider.LatencyInformation(), ) s := &Server{ cfg: cfg, diff --git a/pkg/sql/crdb_internal.go b/pkg/sql/crdb_internal.go index ff7f68bea527..e1a1133e3486 100644 --- a/pkg/sql/crdb_internal.go +++ b/pkg/sql/crdb_internal.go @@ -1375,7 +1375,12 @@ CREATE TABLE crdb_internal.node_statement_statistics ( database_name STRING NOT NULL, exec_node_ids INT[] NOT NULL, txn_fingerprint_id STRING, - index_recommendations STRING[] NOT NULL + index_recommendations STRING[] NOT NULL, + latency_seconds_min FLOAT, + latency_seconds_max FLOAT, + latency_seconds_p50 FLOAT, + latency_seconds_p90 FLOAT, + latency_seconds_p99 FLOAT )`, populate: func(ctx context.Context, p *planner, _ catalog.DatabaseDescriptor, addRow func(...tree.Datum) error) error { hasViewActivityOrViewActivityRedacted, err := p.HasViewActivityOrViewActivityRedactedRole(ctx) @@ -1485,6 +1490,11 @@ CREATE TABLE crdb_internal.node_statement_statistics ( execNodeIDs, // exec_node_ids txnFingerprintID, // txn_fingerprint_id indexRecommendations, // index_recommendations + tree.NewDFloat(tree.DFloat(stats.Stats.LatencyInfo.Min)), // latency_seconds_min + tree.NewDFloat(tree.DFloat(stats.Stats.LatencyInfo.Max)), // latency_seconds_max + tree.NewDFloat(tree.DFloat(stats.Stats.LatencyInfo.P50)), // latency_seconds_p50 + tree.NewDFloat(tree.DFloat(stats.Stats.LatencyInfo.P90)), // latency_seconds_p90 + tree.NewDFloat(tree.DFloat(stats.Stats.LatencyInfo.P99)), // latency_seconds_p99 ) if err != nil { return err diff --git a/pkg/sql/logictest/testdata/logic_test/crdb_internal b/pkg/sql/logictest/testdata/logic_test/crdb_internal index 546042dd8944..2f1f61b243d3 100644 --- a/pkg/sql/logictest/testdata/logic_test/crdb_internal +++ b/pkg/sql/logictest/testdata/logic_test/crdb_internal @@ -326,10 +326,10 @@ SELECT * FROM crdb_internal.leases WHERE node_id < 0 ---- node_id table_id name parent_id expiration deleted -query ITTTTTIIITRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRBBTTTTT colnames +query ITTTTTIIITRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRBBTTTTTRRRRR colnames SELECT * FROM crdb_internal.node_statement_statistics WHERE node_id < 0 ---- -node_id application_name flags statement_id key anonymized count first_attempt_count max_retries last_error rows_avg rows_var idle_lat_avg idle_lat_var parse_lat_avg parse_lat_var plan_lat_avg plan_lat_var run_lat_avg run_lat_var service_lat_avg service_lat_var overhead_lat_avg overhead_lat_var bytes_read_avg bytes_read_var rows_read_avg rows_read_var rows_written_avg rows_written_var network_bytes_avg network_bytes_var network_msgs_avg network_msgs_var max_mem_usage_avg max_mem_usage_var max_disk_usage_avg max_disk_usage_var contention_time_avg contention_time_var cpu_sql_nanos_avg cpu_sql_nanos_var implicit_txn full_scan sample_plan database_name exec_node_ids txn_fingerprint_id index_recommendations +node_id application_name flags statement_id key anonymized count first_attempt_count max_retries last_error rows_avg rows_var idle_lat_avg idle_lat_var parse_lat_avg parse_lat_var plan_lat_avg plan_lat_var run_lat_avg run_lat_var service_lat_avg service_lat_var overhead_lat_avg overhead_lat_var bytes_read_avg bytes_read_var rows_read_avg rows_read_var rows_written_avg rows_written_var network_bytes_avg network_bytes_var network_msgs_avg network_msgs_var max_mem_usage_avg max_mem_usage_var max_disk_usage_avg max_disk_usage_var contention_time_avg contention_time_var cpu_sql_nanos_avg cpu_sql_nanos_var implicit_txn full_scan sample_plan database_name exec_node_ids txn_fingerprint_id index_recommendations latency_seconds_min latency_seconds_max latency_seconds_p50 latency_seconds_p90 latency_seconds_p99 query ITTTIIRRRRRRRRRRRRRRRRRRRRRR colnames SELECT * FROM crdb_internal.node_transaction_statistics WHERE node_id < 0 diff --git a/pkg/sql/logictest/testdata/logic_test/crdb_internal_catalog b/pkg/sql/logictest/testdata/logic_test/crdb_internal_catalog index fb019cc5c330..57ebdd6b558c 100644 --- a/pkg/sql/logictest/testdata/logic_test/crdb_internal_catalog +++ b/pkg/sql/logictest/testdata/logic_test/crdb_internal_catalog @@ -415,7 +415,7 @@ SELECT id, strip_volatile(descriptor) FROM crdb_internal.kv_catalog_descriptor 4294967240 {"table": {"columns": [{"id": 1, "name": "range_id", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 2, "name": "start_key", "type": {"family": "BytesFamily", "oid": 17}}, {"id": 3, "name": "start_pretty", "type": {"family": "StringFamily", "oid": 25}}, {"id": 4, "name": "end_key", "type": {"family": "BytesFamily", "oid": 17}}, {"id": 5, "name": "end_pretty", "type": {"family": "StringFamily", "oid": 25}}, {"id": 6, "name": "replicas", "type": {"arrayContents": {"family": "IntFamily", "oid": 20, "width": 64}, "arrayElemType": "IntFamily", "family": "ArrayFamily", "oid": 1016, "width": 64}}, {"id": 7, "name": "replica_localities", "type": {"arrayContents": {"family": "StringFamily", "oid": 25}, "arrayElemType": "StringFamily", "family": "ArrayFamily", "oid": 1009}}, {"id": 8, "name": "voting_replicas", "type": {"arrayContents": {"family": "IntFamily", "oid": 20, "width": 64}, "arrayElemType": "IntFamily", "family": "ArrayFamily", "oid": 1016, "width": 64}}, {"id": 9, "name": "non_voting_replicas", "type": {"arrayContents": {"family": "IntFamily", "oid": 20, "width": 64}, "arrayElemType": "IntFamily", "family": "ArrayFamily", "oid": 1016, "width": 64}}, {"id": 10, "name": "learner_replicas", "type": {"arrayContents": {"family": "IntFamily", "oid": 20, "width": 64}, "arrayElemType": "IntFamily", "family": "ArrayFamily", "oid": 1016, "width": 64}}, {"id": 11, "name": "split_enforced_until", "nullable": true, "type": {"family": "TimestampFamily", "oid": 1114}}], "formatVersion": 3, "id": 4294967240, "name": "ranges_no_leases", "nextColumnId": 12, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": "32", "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967295, "version": "1"}} 4294967241 {"table": {"columns": [{"id": 1, "name": "table_id", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 2, "name": "index_id", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 3, "name": "parent_name", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 4, "name": "name", "type": {"family": "StringFamily", "oid": 25}}, {"id": 5, "name": "columns", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 6, "name": "column_names", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 7, "name": "list_value", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 8, "name": "range_value", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 9, "name": "zone_id", "nullable": true, "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 10, "name": "subzone_id", "nullable": true, "type": {"family": "IntFamily", "oid": 20, "width": 64}}], "formatVersion": 3, "id": 4294967241, "name": "partitions", "nextColumnId": 11, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": "32", "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967295, "version": "1"}} 4294967242 {"table": {"columns": [{"id": 1, "name": "node_id", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 2, "name": "application_name", "type": {"family": "StringFamily", "oid": 25}}, {"id": 3, "name": "txn_count", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 4, "name": "txn_time_avg_sec", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 5, "name": "txn_time_var_sec", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 6, "name": "committed_count", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 7, "name": "implicit_count", "type": {"family": "IntFamily", "oid": 20, "width": 64}}], "formatVersion": 3, "id": 4294967242, "name": "node_txn_stats", "nextColumnId": 8, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": "32", "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967295, "version": "1"}} -4294967243 {"table": {"columns": [{"id": 1, "name": "node_id", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 2, "name": "application_name", "type": {"family": "StringFamily", "oid": 25}}, {"id": 3, "name": "flags", "type": {"family": "StringFamily", "oid": 25}}, {"id": 4, "name": "statement_id", "type": {"family": "StringFamily", "oid": 25}}, {"id": 5, "name": "key", "type": {"family": "StringFamily", "oid": 25}}, {"id": 6, "name": "anonymized", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 7, "name": "count", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 8, "name": "first_attempt_count", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 9, "name": "max_retries", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 10, "name": "last_error", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 11, "name": "rows_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 12, "name": "rows_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 13, "name": "idle_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 14, "name": "idle_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 15, "name": "parse_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 16, "name": "parse_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 17, "name": "plan_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 18, "name": "plan_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 19, "name": "run_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 20, "name": "run_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 21, "name": "service_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 22, "name": "service_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 23, "name": "overhead_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 24, "name": "overhead_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 25, "name": "bytes_read_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 26, "name": "bytes_read_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 27, "name": "rows_read_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 28, "name": "rows_read_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 29, "name": "rows_written_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 30, "name": "rows_written_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 31, "name": "network_bytes_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 32, "name": "network_bytes_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 33, "name": "network_msgs_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 34, "name": "network_msgs_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 35, "name": "max_mem_usage_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 36, "name": "max_mem_usage_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 37, "name": "max_disk_usage_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 38, "name": "max_disk_usage_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 39, "name": "contention_time_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 40, "name": "contention_time_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 41, "name": "cpu_sql_nanos_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 42, "name": "cpu_sql_nanos_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 43, "name": "implicit_txn", "type": {"oid": 16}}, {"id": 44, "name": "full_scan", "type": {"oid": 16}}, {"id": 45, "name": "sample_plan", "nullable": true, "type": {"family": "JsonFamily", "oid": 3802}}, {"id": 46, "name": "database_name", "type": {"family": "StringFamily", "oid": 25}}, {"id": 47, "name": "exec_node_ids", "type": {"arrayContents": {"family": "IntFamily", "oid": 20, "width": 64}, "arrayElemType": "IntFamily", "family": "ArrayFamily", "oid": 1016, "width": 64}}, {"id": 48, "name": "txn_fingerprint_id", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 49, "name": "index_recommendations", "type": {"arrayContents": {"family": "StringFamily", "oid": 25}, "arrayElemType": "StringFamily", "family": "ArrayFamily", "oid": 1009}}], "formatVersion": 3, "id": 4294967243, "name": "node_statement_statistics", "nextColumnId": 50, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": "32", "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967295, "version": "1"}} +4294967243 {"table": {"columns": [{"id": 1, "name": "node_id", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 2, "name": "application_name", "type": {"family": "StringFamily", "oid": 25}}, {"id": 3, "name": "flags", "type": {"family": "StringFamily", "oid": 25}}, {"id": 4, "name": "statement_id", "type": {"family": "StringFamily", "oid": 25}}, {"id": 5, "name": "key", "type": {"family": "StringFamily", "oid": 25}}, {"id": 6, "name": "anonymized", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 7, "name": "count", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 8, "name": "first_attempt_count", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 9, "name": "max_retries", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 10, "name": "last_error", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 11, "name": "rows_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 12, "name": "rows_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 13, "name": "idle_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 14, "name": "idle_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 15, "name": "parse_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 16, "name": "parse_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 17, "name": "plan_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 18, "name": "plan_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 19, "name": "run_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 20, "name": "run_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 21, "name": "service_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 22, "name": "service_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 23, "name": "overhead_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 24, "name": "overhead_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 25, "name": "bytes_read_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 26, "name": "bytes_read_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 27, "name": "rows_read_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 28, "name": "rows_read_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 29, "name": "rows_written_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 30, "name": "rows_written_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 31, "name": "network_bytes_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 32, "name": "network_bytes_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 33, "name": "network_msgs_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 34, "name": "network_msgs_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 35, "name": "max_mem_usage_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 36, "name": "max_mem_usage_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 37, "name": "max_disk_usage_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 38, "name": "max_disk_usage_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 39, "name": "contention_time_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 40, "name": "contention_time_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 41, "name": "cpu_sql_nanos_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 42, "name": "cpu_sql_nanos_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 43, "name": "implicit_txn", "type": {"oid": 16}}, {"id": 44, "name": "full_scan", "type": {"oid": 16}}, {"id": 45, "name": "sample_plan", "nullable": true, "type": {"family": "JsonFamily", "oid": 3802}}, {"id": 46, "name": "database_name", "type": {"family": "StringFamily", "oid": 25}}, {"id": 47, "name": "exec_node_ids", "type": {"arrayContents": {"family": "IntFamily", "oid": 20, "width": 64}, "arrayElemType": "IntFamily", "family": "ArrayFamily", "oid": 1016, "width": 64}}, {"id": 48, "name": "txn_fingerprint_id", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 49, "name": "index_recommendations", "type": {"arrayContents": {"family": "StringFamily", "oid": 25}, "arrayElemType": "StringFamily", "family": "ArrayFamily", "oid": 1009}}, {"id": 50, "name": "latency_seconds_min", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 51, "name": "latency_seconds_max", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 52, "name": "latency_seconds_p50", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 53, "name": "latency_seconds_p90", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 54, "name": "latency_seconds_p99", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}], "formatVersion": 3, "id": 4294967243, "name": "node_statement_statistics", "nextColumnId": 55, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": "32", "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967295, "version": "1"}} 4294967244 {"table": {"columns": [{"id": 1, "name": "store_id", "nullable": true, "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 2, "name": "name", "type": {"family": "StringFamily", "oid": 25}}, {"id": 3, "name": "value", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}], "formatVersion": 3, "id": 4294967244, "name": "node_metrics", "nextColumnId": 4, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": "32", "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967295, "version": "1"}} 4294967245 {"table": {"columns": [{"id": 1, "name": "node_id", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 2, "name": "session_id", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 3, "name": "user_name", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 4, "name": "client_address", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 5, "name": "application_name", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 6, "name": "active_queries", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 7, "name": "last_active_query", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 8, "name": "num_txns_executed", "nullable": true, "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 9, "name": "session_start", "nullable": true, "type": {"family": "TimestampFamily", "oid": 1114}}, {"id": 10, "name": "active_query_start", "nullable": true, "type": {"family": "TimestampFamily", "oid": 1114}}, {"id": 11, "name": "kv_txn", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 12, "name": "alloc_bytes", "nullable": true, "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 13, "name": "max_alloc_bytes", "nullable": true, "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 14, "name": "status", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 15, "name": "session_end", "nullable": true, "type": {"family": "TimestampFamily", "oid": 1114}}], "formatVersion": 3, "id": 4294967245, "name": "node_sessions", "nextColumnId": 16, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": "32", "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967295, "version": "1"}} 4294967246 {"table": {"columns": [{"id": 1, "name": "id", "nullable": true, "type": {"family": "UuidFamily", "oid": 2950}}, {"id": 2, "name": "node_id", "nullable": true, "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 3, "name": "session_id", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 4, "name": "start", "nullable": true, "type": {"family": "TimestampFamily", "oid": 1114}}, {"id": 5, "name": "txn_string", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 6, "name": "application_name", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 7, "name": "num_stmts", "nullable": true, "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 8, "name": "num_retries", "nullable": true, "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 9, "name": "num_auto_retries", "nullable": true, "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 10, "name": "last_auto_retry_reason", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}], "formatVersion": 3, "id": 4294967246, "name": "node_transactions", "nextColumnId": 11, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": "32", "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967295, "version": "1"}} diff --git a/pkg/sql/sqlstats/insights/detector.go b/pkg/sql/sqlstats/insights/detector.go index 1e11904b31fe..d2d4dcbceff9 100644 --- a/pkg/sql/sqlstats/insights/detector.go +++ b/pkg/sql/sqlstats/insights/detector.go @@ -16,6 +16,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/appstatspb" "github.com/cockroachdb/cockroach/pkg/util/quantile" + "github.com/cockroachdb/cockroach/pkg/util/syncutil" ) type detector interface { @@ -50,13 +51,17 @@ func (d *compositeDetector) isSlow(statement *Statement) bool { return result } -var desiredQuantiles = map[float64]float64{0.5: 0.05, 0.99: 0.001} +var desiredQuantiles = map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001} type anomalyDetector struct { settings *cluster.Settings metrics Metrics store *list.List - index map[appstatspb.StmtFingerprintID]*list.Element + mu struct { + syncutil.RWMutex + + index map[appstatspb.StmtFingerprintID]*list.Element + } } type latencySummaryEntry struct { @@ -85,12 +90,29 @@ func (d *anomalyDetector) isSlow(stmt *Statement) (decision bool) { return } +func (d *anomalyDetector) GetPercentileValues(id appstatspb.StmtFingerprintID) PercentileValues { + d.mu.RLock() + defer d.mu.RUnlock() + latencies := PercentileValues{} + if entry, ok := d.mu.index[id]; ok { + latencySummary := entry.Value.(latencySummaryEntry).value + // If more percentiles are added, update the value of `desiredQuantiles` above + // to include the new keys. + latencies.P50 = latencySummary.Query(0.5) + latencies.P90 = latencySummary.Query(0.9) + latencies.P99 = latencySummary.Query(0.99) + } + return latencies +} + func (d *anomalyDetector) withFingerprintLatencySummary( stmt *Statement, consumer func(latencySummary *quantile.Stream), ) { + d.mu.Lock() + defer d.mu.Unlock() var latencySummary *quantile.Stream - if element, ok := d.index[stmt.FingerprintID]; ok { + if element, ok := d.mu.index[stmt.FingerprintID]; ok { // We are already tracking latencies for this fingerprint. latencySummary = element.Value.(latencySummaryEntry).value d.store.MoveToFront(element) // Mark this latency summary as recently used. @@ -98,7 +120,7 @@ func (d *anomalyDetector) withFingerprintLatencySummary( // We want to start tracking latencies for this fingerprint. latencySummary = quantile.NewTargeted(desiredQuantiles) entry := latencySummaryEntry{key: stmt.FingerprintID, value: latencySummary} - d.index[stmt.FingerprintID] = d.store.PushFront(entry) + d.mu.index[stmt.FingerprintID] = d.store.PushFront(entry) d.metrics.Fingerprints.Inc(1) d.metrics.Memory.Inc(latencySummary.ByteSize()) } else { @@ -114,7 +136,7 @@ func (d *anomalyDetector) withFingerprintLatencySummary( if d.metrics.Memory.Value() > AnomalyDetectionMemoryLimit.Get(&d.settings.SV) { element := d.store.Back() entry := d.store.Remove(element).(latencySummaryEntry) - delete(d.index, entry.key) + delete(d.mu.index, entry.key) d.metrics.Evictions.Inc(1) d.metrics.Fingerprints.Dec(1) d.metrics.Memory.Dec(entry.value.ByteSize()) @@ -122,12 +144,14 @@ func (d *anomalyDetector) withFingerprintLatencySummary( } func newAnomalyDetector(settings *cluster.Settings, metrics Metrics) *anomalyDetector { - return &anomalyDetector{ + anomaly := &anomalyDetector{ settings: settings, metrics: metrics, store: list.New(), - index: make(map[appstatspb.StmtFingerprintID]*list.Element), } + anomaly.mu.index = make(map[appstatspb.StmtFingerprintID]*list.Element) + + return anomaly } type latencyThresholdDetector struct { diff --git a/pkg/sql/sqlstats/insights/insights.go b/pkg/sql/sqlstats/insights/insights.go index eeb8b2ffe7aa..3d047468ef56 100644 --- a/pkg/sql/sqlstats/insights/insights.go +++ b/pkg/sql/sqlstats/insights/insights.go @@ -16,6 +16,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/settings" "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/sql/appstatspb" "github.com/cockroachdb/cockroach/pkg/sql/clusterunique" "github.com/cockroachdb/cockroach/pkg/util/metric" "github.com/cockroachdb/cockroach/pkg/util/stop" @@ -153,6 +154,16 @@ type Reader interface { IterateInsights(context.Context, func(context.Context, *Insight)) } +type LatencyInformation interface { + GetPercentileValues(fingerprintID appstatspb.StmtFingerprintID) PercentileValues +} + +type PercentileValues struct { + P50 float64 + P90 float64 + P99 float64 +} + // Provider offers access to the insights subsystem. type Provider interface { // Start launches the background tasks necessary for processing insights. @@ -164,21 +175,27 @@ type Provider interface { // Reader returns an object that offers read access to any detected insights. Reader() Reader + + // LatencyInformation returns an object that offers read access to latency information, + // such as percentiles. + LatencyInformation() LatencyInformation } // New builds a new Provider. func New(st *cluster.Settings, metrics Metrics) Provider { store := newStore(st) + anomalyDetector := newAnomalyDetector(st, metrics) return &defaultProvider{ store: store, ingester: newConcurrentBufferIngester( newRegistry(st, &compositeDetector{detectors: []detector{ &latencyThresholdDetector{st: st}, - newAnomalyDetector(st, metrics), + anomalyDetector, }}, &compositeSink{sinks: []sink{ store, }}), ), + anomalyDetector: anomalyDetector, } } diff --git a/pkg/sql/sqlstats/insights/provider.go b/pkg/sql/sqlstats/insights/provider.go index 8f9ca766f40c..9ea5b944c861 100644 --- a/pkg/sql/sqlstats/insights/provider.go +++ b/pkg/sql/sqlstats/insights/provider.go @@ -18,8 +18,9 @@ import ( ) type defaultProvider struct { - store *lockingStore - ingester *concurrentBufferIngester + store *lockingStore + ingester *concurrentBufferIngester + anomalyDetector *anomalyDetector } var _ Provider = &defaultProvider{} @@ -40,6 +41,10 @@ func (p *defaultProvider) Reader() Reader { return p.store } +func (p *defaultProvider) LatencyInformation() LatencyInformation { + return p.anomalyDetector +} + type nullWriter struct{} func (n *nullWriter) ObserveStatement(_ clusterunique.ID, _ *Statement) { diff --git a/pkg/sql/sqlstats/persistedsqlstats/sqlstatsutil/json_encoding_test.go b/pkg/sql/sqlstats/persistedsqlstats/sqlstatsutil/json_encoding_test.go index 36fb56c7b4e2..9ebe5e2fc94c 100644 --- a/pkg/sql/sqlstats/persistedsqlstats/sqlstatsutil/json_encoding_test.go +++ b/pkg/sql/sqlstats/persistedsqlstats/sqlstatsutil/json_encoding_test.go @@ -102,7 +102,14 @@ func TestSQLStatsJsonEncoding(t *testing.T) { }, "nodes": [{{joinInts .IntArray}}], "planGists": [{{joinStrings .StringArray}}], - "indexes": [{{joinStrings .StringArray}}] + "indexes": [{{joinStrings .StringArray}}], + "latencyInfo": { + "min": {{.Float}}, + "max": {{.Float}}, + "p50": {{.Float}}, + "p90": {{.Float}}, + "p99": {{.Float}} + } }, "execution_statistics": { "cnt": {{.Int64}}, @@ -224,8 +231,15 @@ func TestSQLStatsJsonEncoding(t *testing.T) { "mean": {{.Float}}, "sqDiff": {{.Float}} }, - "nodes": [{{joinInts .IntArray}}] - "planGists": [{{joinStrings .StringArray}}] + "nodes": [{{joinInts .IntArray}}], + "planGists": [{{joinStrings .StringArray}}], + "latencyInfo": { + "min": {{.Float}}, + "max": {{.Float}}, + "p50": {{.Float}}, + "p90": {{.Float}}, + "p99": {{.Float}}, + } }, "execution_statistics": { "cnt": {{.Int64}}, diff --git a/pkg/sql/sqlstats/persistedsqlstats/sqlstatsutil/json_impl.go b/pkg/sql/sqlstats/persistedsqlstats/sqlstatsutil/json_impl.go index bcd184d3ffdb..247766d13b25 100644 --- a/pkg/sql/sqlstats/persistedsqlstats/sqlstatsutil/json_impl.go +++ b/pkg/sql/sqlstats/persistedsqlstats/sqlstatsutil/json_impl.go @@ -50,6 +50,7 @@ var ( _ jsonMarshaler = (*jsonInt)(nil) _ jsonMarshaler = (*stmtFingerprintID)(nil) _ jsonMarshaler = (*int64Array)(nil) + _ jsonMarshaler = &latencyInfo{} ) type txnStats appstatspb.TransactionStatistics @@ -339,6 +340,7 @@ func (s *innerStmtStats) jsonFields() jsonFields { {"nodes", (*int64Array)(&s.Nodes)}, {"planGists", (*stringArray)(&s.PlanGists)}, {"indexes", (*stringArray)(&s.Indexes)}, + {"latencyInfo", (*latencyInfo)(&s.LatencyInfo)}, } } @@ -389,6 +391,26 @@ func (n *numericStats) encodeJSON() (json.JSON, error) { return n.jsonFields().encodeJSON() } +type latencyInfo appstatspb.LatencyInfo + +func (l *latencyInfo) jsonFields() jsonFields { + return jsonFields{ + {"min", (*jsonFloat)(&l.Min)}, + {"max", (*jsonFloat)(&l.Max)}, + {"p50", (*jsonFloat)(&l.P50)}, + {"p90", (*jsonFloat)(&l.P90)}, + {"p99", (*jsonFloat)(&l.P99)}, + } +} + +func (l *latencyInfo) decodeJSON(js json.JSON) error { + return l.jsonFields().decodeJSON(js) +} + +func (l *latencyInfo) encodeJSON() (json.JSON, error) { + return l.jsonFields().encodeJSON() +} + type jsonFields []jsonField func (jf jsonFields) decodeJSON(js json.JSON) (err error) { diff --git a/pkg/sql/sqlstats/sslocal/sql_stats.go b/pkg/sql/sqlstats/sslocal/sql_stats.go index 92fe6dcf3904..9866d03b5f10 100644 --- a/pkg/sql/sqlstats/sslocal/sql_stats.go +++ b/pkg/sql/sqlstats/sslocal/sql_stats.go @@ -67,7 +67,8 @@ type SQLStats struct { knobs *sqlstats.TestingKnobs - insights insights.WriterProvider + insights insights.WriterProvider + latencyInformation insights.LatencyInformation } func newSQLStats( @@ -80,6 +81,7 @@ func newSQLStats( parentMon *mon.BytesMonitor, flushTarget Sink, knobs *sqlstats.TestingKnobs, + latencyInformation insights.LatencyInformation, ) *SQLStats { monitor := mon.NewMonitor( "SQLStats", @@ -97,6 +99,7 @@ func newSQLStats( flushTarget: flushTarget, knobs: knobs, insights: insightsWriter, + latencyInformation: latencyInformation, } s.mu.apps = make(map[string]*ssmemstorage.Container) s.mu.mon = monitor @@ -135,6 +138,7 @@ func (s *SQLStats) getStatsForApplication(appName string) *ssmemstorage.Containe appName, s.knobs, s.insights(false /* internal */), + s.latencyInformation, ) s.mu.apps[appName] = a return a diff --git a/pkg/sql/sqlstats/sslocal/sql_stats_test.go b/pkg/sql/sqlstats/sslocal/sql_stats_test.go index dc73e5eb7a19..3d5daa4cb528 100644 --- a/pkg/sql/sqlstats/sslocal/sql_stats_test.go +++ b/pkg/sql/sqlstats/sslocal/sql_stats_test.go @@ -446,16 +446,18 @@ func TestExplicitTxnFingerprintAccounting(t *testing.T) { nil /* curCount */, nil /* maxHist */, math.MaxInt64, st, ) + insightsProvider := insights.New(st, insights.NewMetrics()) sqlStats := sslocal.New( st, sqlstats.MaxMemSQLStatsStmtFingerprints, sqlstats.MaxMemSQLStatsTxnFingerprints, nil, /* curMemoryBytesCount */ nil, /* maxMemoryBytesHist */ - insights.New(st, insights.NewMetrics()).Writer, + insightsProvider.Writer, monitor, nil, /* reportingSink */ nil, /* knobs */ + insightsProvider.LatencyInformation(), ) appStats := sqlStats.GetApplicationStats("" /* appName */, false /* internal */) @@ -564,16 +566,18 @@ func TestAssociatingStmtStatsWithTxnFingerprint(t *testing.T) { require.NoError(t, err) // Construct the SQL Stats machinery. + insightsProvider := insights.New(st, insights.NewMetrics()) sqlStats := sslocal.New( st, sqlstats.MaxMemSQLStatsStmtFingerprints, sqlstats.MaxMemSQLStatsTxnFingerprints, nil, nil, - insights.New(st, insights.NewMetrics()).Writer, + insightsProvider.Writer, monitor, nil, nil, + insightsProvider.LatencyInformation(), ) appStats := sqlStats.GetApplicationStats("" /* appName */, false /* internal */) statsCollector := sslocal.NewStatsCollector( @@ -1441,3 +1445,80 @@ func convertIDsToNames(t *testing.T, testConn *sqlutils.SQLRunner, indexes []str }) return indexesInfo } + +func TestSQLStatsLatencyInfo(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + ctx := context.Background() + params, _ := tests.CreateTestServerParams() + testServer, sqlConn, _ := serverutils.StartServer(t, params) + defer func() { + require.NoError(t, sqlConn.Close()) + testServer.Stopper().Stop(ctx) + }() + testConn := sqlutils.MakeSQLRunner(sqlConn) + appName := "latency-info" + testConn.Exec(t, "SET application_name = $1", appName) + testConn.Exec(t, "CREATE TABLE t1 (k INT)") + + testCases := []struct { + name string + statement string + fingerprint string + latencyMax float64 + }{ + { + name: "select on table", + statement: "SELECT * FROM t1", + fingerprint: "SELECT * FROM t1", + latencyMax: 1, + }, + { + name: "select sleep", + statement: "SELECT pg_sleep(0.06)", + fingerprint: "SELECT pg_sleep(_)", + latencyMax: 0.2, + }, + { + name: "select sleep", + statement: "SELECT pg_sleep(0.1)", + fingerprint: "SELECT pg_sleep(_)", + latencyMax: 0.2, + }, + { + name: "select sleep", + statement: "SELECT pg_sleep(0.07)", + fingerprint: "SELECT pg_sleep(_)", + latencyMax: 0.2, + }, + } + + var min float64 + var max float64 + var p50 float64 + var p90 float64 + var p99 float64 + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + testConn.Exec(t, tc.statement) + + rows := testConn.QueryRow(t, "SELECT statistics -> 'statistics' -> 'latencyInfo' ->> 'min',"+ + "statistics -> 'statistics' -> 'latencyInfo' ->> 'max',"+ + "statistics -> 'statistics' -> 'latencyInfo' ->> 'p50',"+ + "statistics -> 'statistics' -> 'latencyInfo' ->> 'p90',"+ + "statistics -> 'statistics' -> 'latencyInfo' ->> 'p99' "+ + "FROM CRDB_INTERNAL.STATEMENT_STATISTICS WHERE app_name = $1 "+ + "AND metadata ->> 'query'=$2", appName, "SELECT * FROM t1") + rows.Scan(&min, &max, &p50, &p90, &p99) + + require.Positive(t, min) + require.Positive(t, max) + require.GreaterOrEqual(t, max, min) + require.LessOrEqual(t, max, tc.latencyMax) + require.GreaterOrEqual(t, p99, p90) + require.GreaterOrEqual(t, p90, p50) + require.LessOrEqual(t, p99, max) + }) + } +} diff --git a/pkg/sql/sqlstats/sslocal/sslocal_provider.go b/pkg/sql/sqlstats/sslocal/sslocal_provider.go index e375c9c00e27..a81e03c9b37c 100644 --- a/pkg/sql/sqlstats/sslocal/sslocal_provider.go +++ b/pkg/sql/sqlstats/sslocal/sslocal_provider.go @@ -40,10 +40,20 @@ func New( pool *mon.BytesMonitor, reportingSink Sink, knobs *sqlstats.TestingKnobs, + latencyInformation insights.LatencyInformation, ) *SQLStats { - return newSQLStats(settings, maxStmtFingerprints, maxTxnFingerprints, - curMemoryBytesCount, maxMemoryBytesHist, insightsWriter, pool, - reportingSink, knobs) + return newSQLStats( + settings, + maxStmtFingerprints, + maxTxnFingerprints, + curMemoryBytesCount, + maxMemoryBytesHist, + insightsWriter, + pool, + reportingSink, + knobs, + latencyInformation, + ) } var _ sqlstats.Provider = &SQLStats{} @@ -105,6 +115,7 @@ func (s *SQLStats) GetApplicationStats(appName string, internal bool) sqlstats.A appName, s.knobs, s.insights(internal), + s.latencyInformation, ) s.mu.apps[appName] = a return a diff --git a/pkg/sql/sqlstats/ssmemstorage/ss_mem_storage.go b/pkg/sql/sqlstats/ssmemstorage/ss_mem_storage.go index 4a701b10ea28..ca24f4f2cca8 100644 --- a/pkg/sql/sqlstats/ssmemstorage/ss_mem_storage.go +++ b/pkg/sql/sqlstats/ssmemstorage/ss_mem_storage.go @@ -119,8 +119,9 @@ type Container struct { txnCounts transactionCounts mon *mon.BytesMonitor - knobs *sqlstats.TestingKnobs - insights insights.Writer + knobs *sqlstats.TestingKnobs + insights insights.Writer + latencyInformation insights.LatencyInformation } var _ sqlstats.ApplicationStats = &Container{} @@ -136,6 +137,7 @@ func New( appName string, knobs *sqlstats.TestingKnobs, insightsWriter insights.Writer, + latencyInformation insights.LatencyInformation, ) *Container { s := &Container{ st: st, @@ -145,6 +147,7 @@ func New( mon: mon, knobs: knobs, insights: insightsWriter, + latencyInformation: latencyInformation, } if mon != nil { @@ -251,6 +254,7 @@ func NewTempContainerFromExistingStmtStats( appName, nil, /* knobs */ nil, /* insights */ + nil, /*latencyInformation */ ) for i := range statistics { @@ -324,6 +328,7 @@ func NewTempContainerFromExistingTxnStats( appName, nil, /* knobs */ nil, /* insights */ + nil, /* latencyInformation */ ) for i := range statistics { @@ -358,13 +363,14 @@ func (s *Container) NewApplicationStatsWithInheritedOptions() sqlstats.Applicati sqlstats.MaxSQLStatsStmtFingerprintsPerExplicitTxn, // There is no need to constraint txn fingerprint limit since in temporary // container, there will never be more than one transaction fingerprint. - nil, // uniqueTxnFingerprintLimit, + nil, // uniqueTxnFingerprintLimit &uniqueStmtFingerprintCount, &uniqueTxnFingerprintCount, s.mon, s.appName, s.knobs, s.insights, + s.latencyInformation, ) } diff --git a/pkg/sql/sqlstats/ssmemstorage/ss_mem_writer.go b/pkg/sql/sqlstats/ssmemstorage/ss_mem_writer.go index 603c65b479e9..2301d8ba4dc7 100644 --- a/pkg/sql/sqlstats/ssmemstorage/ss_mem_writer.go +++ b/pkg/sql/sqlstats/ssmemstorage/ss_mem_writer.go @@ -140,6 +140,18 @@ func (s *Container) RecordStatement( stats.mu.data.IndexRecommendations = value.IndexRecommendations stats.mu.data.Indexes = util.CombineUniqueString(stats.mu.data.Indexes, value.Indexes) + // Percentile latencies are only being sampled if the latency was above the + // AnomalyDetectionLatencyThreshold. + latencies := s.latencyInformation.GetPercentileValues(stmtFingerprintID) + latencyInfo := appstatspb.LatencyInfo{ + Min: value.ServiceLatency, + Max: value.ServiceLatency, + P50: latencies.P50, + P90: latencies.P90, + P99: latencies.P99, + } + stats.mu.data.LatencyInfo.Add(latencyInfo) + // Note that some fields derived from tracing statements (such as // BytesSentOverNetwork) are not updated here because they are collected // on-demand. @@ -155,7 +167,7 @@ func (s *Container) RecordStatement( // stats size + stmtKey size + hash of the statementKey estimatedMemoryAllocBytes := stats.sizeUnsafe() + statementKey.size() + 8 - // We also accounts for the memory used for s.sampledPlanMetadataCache. + // We also account for the memory used for s.sampledPlanMetadataCache. // timestamp size + key size + hash. estimatedMemoryAllocBytes += timestampSize + statementKey.sampledPlanKey.size() + 8 s.mu.Lock() diff --git a/pkg/ui/workspaces/cluster-ui/src/statementsPage/statementsPage.fixture.ts b/pkg/ui/workspaces/cluster-ui/src/statementsPage/statementsPage.fixture.ts index 5f32d03adf43..77380a65f342 100644 --- a/pkg/ui/workspaces/cluster-ui/src/statementsPage/statementsPage.fixture.ts +++ b/pkg/ui/workspaces/cluster-ui/src/statementsPage/statementsPage.fixture.ts @@ -17,6 +17,8 @@ import { noop } from "lodash"; import * as protos from "@cockroachlabs/crdb-protobuf-client"; import { RequestError } from "src/util"; import { StatementDiagnosticsReport } from "../api"; +import { cockroach } from "@cockroachlabs/crdb-protobuf-client"; +import ILatencyInfo = cockroach.sql.ILatencyInfo; type IStatementStatistics = protos.cockroach.sql.IStatementStatistics; type IExecStats = protos.cockroach.sql.IExecStats; @@ -51,6 +53,14 @@ const execStats: Required = { }, }; +const latencyInfo: Required = { + min: 0.00008, + max: 0.00028, + p50: 0.00015, + p90: 0.00016, + p99: 0.00018, +} + const statementStats: Required = { count: Long.fromNumber(180000), first_attempt_count: Long.fromNumber(50000), @@ -103,6 +113,7 @@ const statementStats: Required = { index_recommendations: [""], indexes: ["123@456"], exec_stats: execStats, + latency_info: latencyInfo, last_exec_timestamp: { seconds: Long.fromInt(1599670292), nanos: 111613000, diff --git a/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.fixture.ts b/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.fixture.ts index 386490979fbc..befa55330e67 100644 --- a/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.fixture.ts +++ b/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.fixture.ts @@ -112,6 +112,13 @@ export const statementsWithSameIdButDifferentNodeId: CollectedStatementStatistic network_messages: { mean: 0, squared_diffs: 0 }, max_disk_usage: { mean: 0, squared_diffs: 0 }, }, + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }, id: new Long(8717981371097536892), }, @@ -223,6 +230,13 @@ export const statementsWithSameIdButDifferentNodeId: CollectedStatementStatistic network_messages: { mean: 0, squared_diffs: 0 }, max_disk_usage: { mean: 0, squared_diffs: 0 }, }, + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }, id: new Long(8717981371097536892), }, @@ -337,6 +351,13 @@ export const statementsWithSameIdButDifferentNodeId: CollectedStatementStatistic network_messages: { mean: 0, squared_diffs: 0 }, max_disk_usage: { mean: 0, squared_diffs: 0 }, }, + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }, id: new Long(8717981371097536892), }, @@ -442,6 +463,13 @@ export const statementsWithSameIdButDifferentNodeId: CollectedStatementStatistic network_messages: { mean: 0, squared_diffs: 0 }, max_disk_usage: { mean: 0, squared_diffs: 0 }, }, + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }, id: new Long(8717981371097536892), }, @@ -547,6 +575,13 @@ export const statementsWithSameIdButDifferentNodeId: CollectedStatementStatistic network_messages: { mean: 0, squared_diffs: 0 }, max_disk_usage: { mean: 0, squared_diffs: 0 }, }, + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }, id: new Long(8717981371097536892), }, @@ -661,6 +696,13 @@ export const statementsWithSameIdButDifferentNodeId: CollectedStatementStatistic network_messages: { mean: 0, squared_diffs: 0 }, max_disk_usage: { mean: 0, squared_diffs: 0 }, }, + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }, id: new Long(8717981371097536892), }, @@ -775,6 +817,13 @@ export const statementsWithSameIdButDifferentNodeId: CollectedStatementStatistic network_messages: { mean: 0, squared_diffs: 0 }, max_disk_usage: { mean: 0, squared_diffs: 0 }, }, + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }, id: new Long(8717981371097536892), }, @@ -892,6 +941,13 @@ export const statementsWithSameIdButDifferentNodeId: CollectedStatementStatistic network_messages: { mean: 0, squared_diffs: 0 }, max_disk_usage: { mean: 0, squared_diffs: 0 }, }, + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }, id: new Long(8717981371097536892), }, @@ -1006,6 +1062,13 @@ export const statementsWithSameIdButDifferentNodeId: CollectedStatementStatistic network_messages: { mean: 0, squared_diffs: 0 }, max_disk_usage: { mean: 0, squared_diffs: 0 }, }, + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }, id: new Long(8717981371097536892), }, diff --git a/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.spec.ts b/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.spec.ts index 0dc48ebde9af..d74354d83063 100644 --- a/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.spec.ts +++ b/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.spec.ts @@ -277,6 +277,13 @@ function randomStats( plan_gists: ["Ais="], index_recommendations: [""], indexes: ["123@456"], + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }; } diff --git a/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.ts b/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.ts index 1e63f07e2816..22980865f2a0 100644 --- a/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.ts +++ b/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.ts @@ -61,6 +61,41 @@ export function aggregateNumericStats( }; } +export function aggregateLatencyInfo( + a: StatementStatistics, + b: StatementStatistics, +): protos.cockroach.sql.ILatencyInfo { + const min = + a.latency_info?.min == 0 || a.latency_info?.min > b.latency_info?.min + ? b.latency_info?.min + : a.latency_info?.min; + const max = + a.latency_info?.max > b.latency_info?.max + ? a.latency_info?.max + : b.latency_info?.max; + + let p50 = b.latency_info?.p50; + let p90 = b.latency_info?.p90; + let p99 = b.latency_info?.p99; + // Use the latest value we have that is not zero. + if ( + b.last_exec_timestamp < a.last_exec_timestamp && + b.latency_info?.p50 != 0 + ) { + p50 = a.latency_info?.p50; + p90 = a.latency_info?.p90; + p99 = a.latency_info?.p99; + } + + return { + min, + max, + p50, + p90, + p99, + }; +} + export function coalesceSensitiveInfo( a: protos.cockroach.sql.ISensitiveInfo, b: protos.cockroach.sql.ISensitiveInfo, @@ -214,6 +249,7 @@ export function addStatementStats( plan_gists: planGists, index_recommendations: indexRec, indexes: indexes, + latency_info: aggregateLatencyInfo(a, b), }; } diff --git a/pkg/ui/workspaces/db-console/src/views/statements/statements.spec.tsx b/pkg/ui/workspaces/db-console/src/views/statements/statements.spec.tsx index 741c962c5b89..98933f14001a 100644 --- a/pkg/ui/workspaces/db-console/src/views/statements/statements.spec.tsx +++ b/pkg/ui/workspaces/db-console/src/views/statements/statements.spec.tsx @@ -530,6 +530,13 @@ function makeStats(): Required { plan_gists: ["Ais="], index_recommendations: [], indexes: ["123@456"], + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }; }