From 87156d254046961eef02856bbf419533557303b3 Mon Sep 17 00:00:00 2001 From: maryliag Date: Wed, 1 Feb 2023 11:15:20 -0500 Subject: [PATCH] roachpb, sql: add latency info to statement statistics This commits adds latency info in seconds to the statement statistics on `crdb_internal.statement_statistics`, `system.statement_statistics` and `crdb_internal.cluster_statement_statistics`, with information about: min, max, p50, p90 and p99. It also adds to `crdb_internal.node_statement_statistics` the columns: `latency_seconds_min`, `latency_seconds_max`, `latency_seconds_p50`, ` latency_seconds_p90` and `latency_seconds_p99`. Part Of #72954 This initial version is leveraging the latency summary that already existed on Insights. Since we were already collecting that information for all statements that executed for at least `AnomalyDetectionLatencyThreshold`, this commit makes that mapping available so SQLStats can use it to retrieve the values. This value is an estimate, since we're missing the executions that took less than the 50ms (default). We were also collecting just for p50, p90 and p99, so the function that returns the values adds that limitation, in case someone else was trying to use that function to retrieve other percentiles. A following task will focus on making this information more complete. Release note (sql change): Add latency info (min, max, p50, p90, p99) to crdb_internal.statement_statistics, system.statement_statistics and crdb_internal.cluster_statement_statistics. Also adds `latency_seconds_min`, `latency_seconds_max`, `latency_seconds_p50`, `latency_seconds_p90` and `latency_seconds_p99` to crdb_internal.node_statement_statistics. --- .../testdata/logic_test/crdb_internal_tenant | 4 +- pkg/cli/zip_table_registry.go | 5 ++ pkg/sql/appstatspb/app_stats.go | 18 ++++ pkg/sql/appstatspb/app_stats.proto | 27 +++++- pkg/sql/conn_executor.go | 2 + pkg/sql/crdb_internal.go | 12 ++- .../testdata/logic_test/crdb_internal | 4 +- .../testdata/logic_test/crdb_internal_catalog | 2 +- pkg/sql/sqlstats/insights/detector.go | 38 +++++++-- pkg/sql/sqlstats/insights/insights.go | 19 ++++- pkg/sql/sqlstats/insights/provider.go | 9 +- .../sqlstatsutil/json_encoding_test.go | 20 ++++- .../sqlstatsutil/json_impl.go | 22 +++++ pkg/sql/sqlstats/sslocal/sql_stats.go | 6 +- pkg/sql/sqlstats/sslocal/sql_stats_test.go | 85 ++++++++++++++++++- pkg/sql/sqlstats/sslocal/sslocal_provider.go | 17 +++- .../sqlstats/ssmemstorage/ss_mem_storage.go | 12 ++- .../sqlstats/ssmemstorage/ss_mem_writer.go | 14 ++- .../statementsPage/statementsPage.fixture.ts | 11 +++ .../src/util/appStats/appStats.fixture.ts | 63 ++++++++++++++ .../src/util/appStats/appStats.spec.ts | 7 ++ .../cluster-ui/src/util/appStats/appStats.ts | 36 ++++++++ .../src/views/statements/statements.spec.tsx | 7 ++ 23 files changed, 408 insertions(+), 32 deletions(-) diff --git a/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant b/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant index eed665b125a6..5f8952e72b4c 100644 --- a/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant +++ b/pkg/ccl/logictestccl/testdata/logic_test/crdb_internal_tenant @@ -194,10 +194,10 @@ SELECT * FROM crdb_internal.leases WHERE node_id < 0 ---- node_id table_id name parent_id expiration deleted -query ITTTTTIIITRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRBBTTTTT colnames +query ITTTTTIIITRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRBBTTTTTRRRRR colnames SELECT * FROM crdb_internal.node_statement_statistics WHERE node_id < 0 ---- -node_id application_name flags statement_id key anonymized count first_attempt_count max_retries last_error rows_avg rows_var idle_lat_avg idle_lat_var parse_lat_avg parse_lat_var plan_lat_avg plan_lat_var run_lat_avg run_lat_var service_lat_avg service_lat_var overhead_lat_avg overhead_lat_var bytes_read_avg bytes_read_var rows_read_avg rows_read_var rows_written_avg rows_written_var network_bytes_avg network_bytes_var network_msgs_avg network_msgs_var max_mem_usage_avg max_mem_usage_var max_disk_usage_avg max_disk_usage_var contention_time_avg contention_time_var cpu_sql_nanos_avg cpu_sql_nanos_var implicit_txn full_scan sample_plan database_name exec_node_ids txn_fingerprint_id index_recommendations +node_id application_name flags statement_id key anonymized count first_attempt_count max_retries last_error rows_avg rows_var idle_lat_avg idle_lat_var parse_lat_avg parse_lat_var plan_lat_avg plan_lat_var run_lat_avg run_lat_var service_lat_avg service_lat_var overhead_lat_avg overhead_lat_var bytes_read_avg bytes_read_var rows_read_avg rows_read_var rows_written_avg rows_written_var network_bytes_avg network_bytes_var network_msgs_avg network_msgs_var max_mem_usage_avg max_mem_usage_var max_disk_usage_avg max_disk_usage_var contention_time_avg contention_time_var cpu_sql_nanos_avg cpu_sql_nanos_var implicit_txn full_scan sample_plan database_name exec_node_ids txn_fingerprint_id index_recommendations latency_seconds_min latency_seconds_max latency_seconds_p50 latency_seconds_p90 latency_seconds_p99 query ITTTIIRRRRRRRRRRRRRRRRRRRRRR colnames SELECT * FROM crdb_internal.node_transaction_statistics WHERE node_id < 0 diff --git a/pkg/cli/zip_table_registry.go b/pkg/cli/zip_table_registry.go index 4b28bef56bc9..efcff136c442 100644 --- a/pkg/cli/zip_table_registry.go +++ b/pkg/cli/zip_table_registry.go @@ -755,6 +755,11 @@ var zipInternalTablesPerNode = DebugZipTableRegistry{ "exec_node_ids", "txn_fingerprint_id", "index_recommendations", + "latency_seconds_min", + "latency_seconds_max", + "latency_seconds_p50", + "latency_seconds_p90", + "latency_seconds_p99", }, }, "crdb_internal.node_transaction_statistics": { diff --git a/pkg/sql/appstatspb/app_stats.go b/pkg/sql/appstatspb/app_stats.go index 6c9a277d80b4..74a1a058d6e7 100644 --- a/pkg/sql/appstatspb/app_stats.go +++ b/pkg/sql/appstatspb/app_stats.go @@ -161,6 +161,7 @@ func (s *StatementStatistics) Add(other *StatementStatistics) { s.Indexes = util.CombineUniqueString(s.Indexes, other.Indexes) s.ExecStats.Add(other.ExecStats) + s.LatencyInfo.Add(other.LatencyInfo) if other.SensitiveInfo.LastErr != "" { s.SensitiveInfo.LastErr = other.SensitiveInfo.LastErr @@ -217,3 +218,20 @@ func (s *ExecStats) Add(other ExecStats) { s.Count += other.Count } + +// Add combines other into this LatencyInfo. +func (s *LatencyInfo) Add(other LatencyInfo) { + // Use the latest non-zero value. + if other.P50 != 0 { + s.P50 = other.P50 + s.P90 = other.P90 + s.P99 = other.P99 + } + + if s.Min == 0 || other.Min < s.Min { + s.Min = other.Min + } + if other.Max > s.Max { + s.Max = other.Max + } +} diff --git a/pkg/sql/appstatspb/app_stats.proto b/pkg/sql/appstatspb/app_stats.proto index d83cac1691c6..90d766e9587a 100644 --- a/pkg/sql/appstatspb/app_stats.proto +++ b/pkg/sql/appstatspb/app_stats.proto @@ -108,18 +108,21 @@ message StatementStatistics { // Nodes is the ordered list of nodes ids on which the statement was executed. repeated int64 nodes = 24; - // plan_gists is the list of a compressed version of plan that can be converted (lossily) + // PlanGists is the list of a compressed version of plan that can be converted (lossily) // back into a logical plan. // Each statement contain only one plan gist, but the same statement fingerprint id // can contain more than one value. repeated string plan_gists = 26; - // index_recommendations is the list of index recommendations generated for the statement fingerprint. + // IndexRecommendations is the list of index recommendations generated for the statement fingerprint. repeated string index_recommendations = 27; - // indexes is the list of indexes used by the particular plan when executing the statement. + // Indexes is the list of indexes used by the particular plan when executing the statement. repeated string indexes = 30; + // LatencyInfo is the information about latency, such min, max, p50, p90 and p99. + optional LatencyInfo latency_info = 31 [(gogoproto.nullable) = false]; + // Note: be sure to update `sql/app_stats.go` when adding/removing fields here! reserved 13, 14, 17, 18, 19, 20; @@ -338,3 +341,21 @@ message ExecStats { // Note: be sure to update `sql/app_stats.go` when adding/removing fields // here! } + +// LatencyInfo contains more details about the latency. +message LatencyInfo { + // Min is the minimum time in seconds spent executing the fingerprint. + optional double min = 1 [(gogoproto.nullable) = false]; + + // Max is the maximum time in seconds spent executing the fingerprint. + optional double max = 2 [(gogoproto.nullable) = false]; + + // P50 is the 50 Percentile in seconds for the fingerprint. + optional double p50 = 3 [(gogoproto.nullable) = false]; + + // P90 is the 90 Percentile in seconds for the fingerprint. + optional double p90 = 4 [(gogoproto.nullable) = false]; + + // P99 is the 99 Percentile in seconds for the fingerprint. + optional double p99 = 5 [(gogoproto.nullable) = false]; +} diff --git a/pkg/sql/conn_executor.go b/pkg/sql/conn_executor.go index ae19ca9ffcd6..d4242b31bc84 100644 --- a/pkg/sql/conn_executor.go +++ b/pkg/sql/conn_executor.go @@ -374,6 +374,7 @@ func NewServer(cfg *ExecutorConfig, pool *mon.BytesMonitor) *Server { pool, nil, /* reportedProvider */ cfg.SQLStatsTestingKnobs, + insightsProvider.LatencyInformation(), ) reportedSQLStatsController := reportedSQLStats.GetController(cfg.SQLStatusServer) memSQLStats := sslocal.New( @@ -386,6 +387,7 @@ func NewServer(cfg *ExecutorConfig, pool *mon.BytesMonitor) *Server { pool, reportedSQLStats, cfg.SQLStatsTestingKnobs, + insightsProvider.LatencyInformation(), ) s := &Server{ cfg: cfg, diff --git a/pkg/sql/crdb_internal.go b/pkg/sql/crdb_internal.go index 75f5b910ca94..591e1c66f40b 100644 --- a/pkg/sql/crdb_internal.go +++ b/pkg/sql/crdb_internal.go @@ -1375,7 +1375,12 @@ CREATE TABLE crdb_internal.node_statement_statistics ( database_name STRING NOT NULL, exec_node_ids INT[] NOT NULL, txn_fingerprint_id STRING, - index_recommendations STRING[] NOT NULL + index_recommendations STRING[] NOT NULL, + latency_seconds_min FLOAT, + latency_seconds_max FLOAT, + latency_seconds_p50 FLOAT, + latency_seconds_p90 FLOAT, + latency_seconds_p99 FLOAT )`, populate: func(ctx context.Context, p *planner, _ catalog.DatabaseDescriptor, addRow func(...tree.Datum) error) error { hasViewActivityOrViewActivityRedacted, err := p.HasViewActivityOrViewActivityRedactedRole(ctx) @@ -1485,6 +1490,11 @@ CREATE TABLE crdb_internal.node_statement_statistics ( execNodeIDs, // exec_node_ids txnFingerprintID, // txn_fingerprint_id indexRecommendations, // index_recommendations + tree.NewDFloat(tree.DFloat(stats.Stats.LatencyInfo.Min)), // latency_seconds_min + tree.NewDFloat(tree.DFloat(stats.Stats.LatencyInfo.Max)), // latency_seconds_max + tree.NewDFloat(tree.DFloat(stats.Stats.LatencyInfo.P50)), // latency_seconds_p50 + tree.NewDFloat(tree.DFloat(stats.Stats.LatencyInfo.P90)), // latency_seconds_p90 + tree.NewDFloat(tree.DFloat(stats.Stats.LatencyInfo.P99)), // latency_seconds_p99 ) if err != nil { return err diff --git a/pkg/sql/logictest/testdata/logic_test/crdb_internal b/pkg/sql/logictest/testdata/logic_test/crdb_internal index a09c041aa448..65a7d3eda4f0 100644 --- a/pkg/sql/logictest/testdata/logic_test/crdb_internal +++ b/pkg/sql/logictest/testdata/logic_test/crdb_internal @@ -326,10 +326,10 @@ SELECT * FROM crdb_internal.leases WHERE node_id < 0 ---- node_id table_id name parent_id expiration deleted -query ITTTTTIIITRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRBBTTTTT colnames +query ITTTTTIIITRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRBBTTTTTRRRRR colnames SELECT * FROM crdb_internal.node_statement_statistics WHERE node_id < 0 ---- -node_id application_name flags statement_id key anonymized count first_attempt_count max_retries last_error rows_avg rows_var idle_lat_avg idle_lat_var parse_lat_avg parse_lat_var plan_lat_avg plan_lat_var run_lat_avg run_lat_var service_lat_avg service_lat_var overhead_lat_avg overhead_lat_var bytes_read_avg bytes_read_var rows_read_avg rows_read_var rows_written_avg rows_written_var network_bytes_avg network_bytes_var network_msgs_avg network_msgs_var max_mem_usage_avg max_mem_usage_var max_disk_usage_avg max_disk_usage_var contention_time_avg contention_time_var cpu_sql_nanos_avg cpu_sql_nanos_var implicit_txn full_scan sample_plan database_name exec_node_ids txn_fingerprint_id index_recommendations +node_id application_name flags statement_id key anonymized count first_attempt_count max_retries last_error rows_avg rows_var idle_lat_avg idle_lat_var parse_lat_avg parse_lat_var plan_lat_avg plan_lat_var run_lat_avg run_lat_var service_lat_avg service_lat_var overhead_lat_avg overhead_lat_var bytes_read_avg bytes_read_var rows_read_avg rows_read_var rows_written_avg rows_written_var network_bytes_avg network_bytes_var network_msgs_avg network_msgs_var max_mem_usage_avg max_mem_usage_var max_disk_usage_avg max_disk_usage_var contention_time_avg contention_time_var cpu_sql_nanos_avg cpu_sql_nanos_var implicit_txn full_scan sample_plan database_name exec_node_ids txn_fingerprint_id index_recommendations latency_seconds_min latency_seconds_max latency_seconds_p50 latency_seconds_p90 latency_seconds_p99 query ITTTIIRRRRRRRRRRRRRRRRRRRRRR colnames SELECT * FROM crdb_internal.node_transaction_statistics WHERE node_id < 0 diff --git a/pkg/sql/logictest/testdata/logic_test/crdb_internal_catalog b/pkg/sql/logictest/testdata/logic_test/crdb_internal_catalog index a1c256b3910e..e5a3b1f382eb 100644 --- a/pkg/sql/logictest/testdata/logic_test/crdb_internal_catalog +++ b/pkg/sql/logictest/testdata/logic_test/crdb_internal_catalog @@ -415,7 +415,7 @@ SELECT id, strip_volatile(descriptor) FROM crdb_internal.kv_catalog_descriptor 4294967240 {"table": {"columns": [{"id": 1, "name": "range_id", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 2, "name": "start_key", "type": {"family": "BytesFamily", "oid": 17}}, {"id": 3, "name": "start_pretty", "type": {"family": "StringFamily", "oid": 25}}, {"id": 4, "name": "end_key", "type": {"family": "BytesFamily", "oid": 17}}, {"id": 5, "name": "end_pretty", "type": {"family": "StringFamily", "oid": 25}}, {"id": 6, "name": "replicas", "type": {"arrayContents": {"family": "IntFamily", "oid": 20, "width": 64}, "arrayElemType": "IntFamily", "family": "ArrayFamily", "oid": 1016, "width": 64}}, {"id": 7, "name": "replica_localities", "type": {"arrayContents": {"family": "StringFamily", "oid": 25}, "arrayElemType": "StringFamily", "family": "ArrayFamily", "oid": 1009}}, {"id": 8, "name": "voting_replicas", "type": {"arrayContents": {"family": "IntFamily", "oid": 20, "width": 64}, "arrayElemType": "IntFamily", "family": "ArrayFamily", "oid": 1016, "width": 64}}, {"id": 9, "name": "non_voting_replicas", "type": {"arrayContents": {"family": "IntFamily", "oid": 20, "width": 64}, "arrayElemType": "IntFamily", "family": "ArrayFamily", "oid": 1016, "width": 64}}, {"id": 10, "name": "learner_replicas", "type": {"arrayContents": {"family": "IntFamily", "oid": 20, "width": 64}, "arrayElemType": "IntFamily", "family": "ArrayFamily", "oid": 1016, "width": 64}}, {"id": 11, "name": "split_enforced_until", "nullable": true, "type": {"family": "TimestampFamily", "oid": 1114}}], "formatVersion": 3, "id": 4294967240, "name": "ranges_no_leases", "nextColumnId": 12, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": "32", "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967295, "version": "1"}} 4294967241 {"table": {"columns": [{"id": 1, "name": "table_id", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 2, "name": "index_id", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 3, "name": "parent_name", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 4, "name": "name", "type": {"family": "StringFamily", "oid": 25}}, {"id": 5, "name": "columns", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 6, "name": "column_names", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 7, "name": "list_value", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 8, "name": "range_value", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 9, "name": "zone_id", "nullable": true, "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 10, "name": "subzone_id", "nullable": true, "type": {"family": "IntFamily", "oid": 20, "width": 64}}], "formatVersion": 3, "id": 4294967241, "name": "partitions", "nextColumnId": 11, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": "32", "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967295, "version": "1"}} 4294967242 {"table": {"columns": [{"id": 1, "name": "node_id", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 2, "name": "application_name", "type": {"family": "StringFamily", "oid": 25}}, {"id": 3, "name": "txn_count", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 4, "name": "txn_time_avg_sec", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 5, "name": "txn_time_var_sec", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 6, "name": "committed_count", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 7, "name": "implicit_count", "type": {"family": "IntFamily", "oid": 20, "width": 64}}], "formatVersion": 3, "id": 4294967242, "name": "node_txn_stats", "nextColumnId": 8, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": "32", "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967295, "version": "1"}} -4294967243 {"table": {"columns": [{"id": 1, "name": "node_id", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 2, "name": "application_name", "type": {"family": "StringFamily", "oid": 25}}, {"id": 3, "name": "flags", "type": {"family": "StringFamily", "oid": 25}}, {"id": 4, "name": "statement_id", "type": {"family": "StringFamily", "oid": 25}}, {"id": 5, "name": "key", "type": {"family": "StringFamily", "oid": 25}}, {"id": 6, "name": "anonymized", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 7, "name": "count", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 8, "name": "first_attempt_count", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 9, "name": "max_retries", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 10, "name": "last_error", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 11, "name": "rows_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 12, "name": "rows_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 13, "name": "idle_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 14, "name": "idle_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 15, "name": "parse_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 16, "name": "parse_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 17, "name": "plan_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 18, "name": "plan_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 19, "name": "run_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 20, "name": "run_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 21, "name": "service_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 22, "name": "service_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 23, "name": "overhead_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 24, "name": "overhead_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 25, "name": "bytes_read_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 26, "name": "bytes_read_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 27, "name": "rows_read_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 28, "name": "rows_read_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 29, "name": "rows_written_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 30, "name": "rows_written_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 31, "name": "network_bytes_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 32, "name": "network_bytes_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 33, "name": "network_msgs_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 34, "name": "network_msgs_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 35, "name": "max_mem_usage_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 36, "name": "max_mem_usage_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 37, "name": "max_disk_usage_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 38, "name": "max_disk_usage_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 39, "name": "contention_time_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 40, "name": "contention_time_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 41, "name": "cpu_sql_nanos_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 42, "name": "cpu_sql_nanos_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 43, "name": "implicit_txn", "type": {"oid": 16}}, {"id": 44, "name": "full_scan", "type": {"oid": 16}}, {"id": 45, "name": "sample_plan", "nullable": true, "type": {"family": "JsonFamily", "oid": 3802}}, {"id": 46, "name": "database_name", "type": {"family": "StringFamily", "oid": 25}}, {"id": 47, "name": "exec_node_ids", "type": {"arrayContents": {"family": "IntFamily", "oid": 20, "width": 64}, "arrayElemType": "IntFamily", "family": "ArrayFamily", "oid": 1016, "width": 64}}, {"id": 48, "name": "txn_fingerprint_id", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 49, "name": "index_recommendations", "type": {"arrayContents": {"family": "StringFamily", "oid": 25}, "arrayElemType": "StringFamily", "family": "ArrayFamily", "oid": 1009}}], "formatVersion": 3, "id": 4294967243, "name": "node_statement_statistics", "nextColumnId": 50, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": "32", "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967295, "version": "1"}} +4294967243 {"table": {"columns": [{"id": 1, "name": "node_id", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 2, "name": "application_name", "type": {"family": "StringFamily", "oid": 25}}, {"id": 3, "name": "flags", "type": {"family": "StringFamily", "oid": 25}}, {"id": 4, "name": "statement_id", "type": {"family": "StringFamily", "oid": 25}}, {"id": 5, "name": "key", "type": {"family": "StringFamily", "oid": 25}}, {"id": 6, "name": "anonymized", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 7, "name": "count", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 8, "name": "first_attempt_count", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 9, "name": "max_retries", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 10, "name": "last_error", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 11, "name": "rows_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 12, "name": "rows_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 13, "name": "idle_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 14, "name": "idle_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 15, "name": "parse_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 16, "name": "parse_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 17, "name": "plan_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 18, "name": "plan_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 19, "name": "run_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 20, "name": "run_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 21, "name": "service_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 22, "name": "service_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 23, "name": "overhead_lat_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 24, "name": "overhead_lat_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 25, "name": "bytes_read_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 26, "name": "bytes_read_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 27, "name": "rows_read_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 28, "name": "rows_read_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 29, "name": "rows_written_avg", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 30, "name": "rows_written_var", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 31, "name": "network_bytes_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 32, "name": "network_bytes_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 33, "name": "network_msgs_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 34, "name": "network_msgs_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 35, "name": "max_mem_usage_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 36, "name": "max_mem_usage_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 37, "name": "max_disk_usage_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 38, "name": "max_disk_usage_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 39, "name": "contention_time_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 40, "name": "contention_time_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 41, "name": "cpu_sql_nanos_avg", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 42, "name": "cpu_sql_nanos_var", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 43, "name": "implicit_txn", "type": {"oid": 16}}, {"id": 44, "name": "full_scan", "type": {"oid": 16}}, {"id": 45, "name": "sample_plan", "nullable": true, "type": {"family": "JsonFamily", "oid": 3802}}, {"id": 46, "name": "database_name", "type": {"family": "StringFamily", "oid": 25}}, {"id": 47, "name": "exec_node_ids", "type": {"arrayContents": {"family": "IntFamily", "oid": 20, "width": 64}, "arrayElemType": "IntFamily", "family": "ArrayFamily", "oid": 1016, "width": 64}}, {"id": 48, "name": "txn_fingerprint_id", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 49, "name": "index_recommendations", "type": {"arrayContents": {"family": "StringFamily", "oid": 25}, "arrayElemType": "StringFamily", "family": "ArrayFamily", "oid": 1009}}, {"id": 50, "name": "latency_seconds_min", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 51, "name": "latency_seconds_max", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 52, "name": "latency_seconds_p50", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 53, "name": "latency_seconds_p90", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}, {"id": 54, "name": "latency_seconds_p99", "nullable": true, "type": {"family": "FloatFamily", "oid": 701, "width": 64}}], "formatVersion": 3, "id": 4294967243, "name": "node_statement_statistics", "nextColumnId": 55, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": "32", "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967295, "version": "1"}} 4294967244 {"table": {"columns": [{"id": 1, "name": "store_id", "nullable": true, "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 2, "name": "name", "type": {"family": "StringFamily", "oid": 25}}, {"id": 3, "name": "value", "type": {"family": "FloatFamily", "oid": 701, "width": 64}}], "formatVersion": 3, "id": 4294967244, "name": "node_metrics", "nextColumnId": 4, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": "32", "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967295, "version": "1"}} 4294967245 {"table": {"columns": [{"id": 1, "name": "node_id", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 2, "name": "session_id", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 3, "name": "user_name", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 4, "name": "client_address", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 5, "name": "application_name", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 6, "name": "active_queries", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 7, "name": "last_active_query", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 8, "name": "num_txns_executed", "nullable": true, "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 9, "name": "session_start", "nullable": true, "type": {"family": "TimestampFamily", "oid": 1114}}, {"id": 10, "name": "active_query_start", "nullable": true, "type": {"family": "TimestampFamily", "oid": 1114}}, {"id": 11, "name": "kv_txn", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 12, "name": "alloc_bytes", "nullable": true, "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 13, "name": "max_alloc_bytes", "nullable": true, "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 14, "name": "status", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 15, "name": "session_end", "nullable": true, "type": {"family": "TimestampFamily", "oid": 1114}}], "formatVersion": 3, "id": 4294967245, "name": "node_sessions", "nextColumnId": 16, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": "32", "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967295, "version": "1"}} 4294967246 {"table": {"columns": [{"id": 1, "name": "id", "nullable": true, "type": {"family": "UuidFamily", "oid": 2950}}, {"id": 2, "name": "node_id", "nullable": true, "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 3, "name": "session_id", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 4, "name": "start", "nullable": true, "type": {"family": "TimestampFamily", "oid": 1114}}, {"id": 5, "name": "txn_string", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 6, "name": "application_name", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 7, "name": "num_stmts", "nullable": true, "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 8, "name": "num_retries", "nullable": true, "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 9, "name": "num_auto_retries", "nullable": true, "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 10, "name": "last_auto_retry_reason", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}], "formatVersion": 3, "id": 4294967246, "name": "node_transactions", "nextColumnId": 11, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": "32", "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967295, "version": "1"}} diff --git a/pkg/sql/sqlstats/insights/detector.go b/pkg/sql/sqlstats/insights/detector.go index 1e11904b31fe..d2d4dcbceff9 100644 --- a/pkg/sql/sqlstats/insights/detector.go +++ b/pkg/sql/sqlstats/insights/detector.go @@ -16,6 +16,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/appstatspb" "github.com/cockroachdb/cockroach/pkg/util/quantile" + "github.com/cockroachdb/cockroach/pkg/util/syncutil" ) type detector interface { @@ -50,13 +51,17 @@ func (d *compositeDetector) isSlow(statement *Statement) bool { return result } -var desiredQuantiles = map[float64]float64{0.5: 0.05, 0.99: 0.001} +var desiredQuantiles = map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001} type anomalyDetector struct { settings *cluster.Settings metrics Metrics store *list.List - index map[appstatspb.StmtFingerprintID]*list.Element + mu struct { + syncutil.RWMutex + + index map[appstatspb.StmtFingerprintID]*list.Element + } } type latencySummaryEntry struct { @@ -85,12 +90,29 @@ func (d *anomalyDetector) isSlow(stmt *Statement) (decision bool) { return } +func (d *anomalyDetector) GetPercentileValues(id appstatspb.StmtFingerprintID) PercentileValues { + d.mu.RLock() + defer d.mu.RUnlock() + latencies := PercentileValues{} + if entry, ok := d.mu.index[id]; ok { + latencySummary := entry.Value.(latencySummaryEntry).value + // If more percentiles are added, update the value of `desiredQuantiles` above + // to include the new keys. + latencies.P50 = latencySummary.Query(0.5) + latencies.P90 = latencySummary.Query(0.9) + latencies.P99 = latencySummary.Query(0.99) + } + return latencies +} + func (d *anomalyDetector) withFingerprintLatencySummary( stmt *Statement, consumer func(latencySummary *quantile.Stream), ) { + d.mu.Lock() + defer d.mu.Unlock() var latencySummary *quantile.Stream - if element, ok := d.index[stmt.FingerprintID]; ok { + if element, ok := d.mu.index[stmt.FingerprintID]; ok { // We are already tracking latencies for this fingerprint. latencySummary = element.Value.(latencySummaryEntry).value d.store.MoveToFront(element) // Mark this latency summary as recently used. @@ -98,7 +120,7 @@ func (d *anomalyDetector) withFingerprintLatencySummary( // We want to start tracking latencies for this fingerprint. latencySummary = quantile.NewTargeted(desiredQuantiles) entry := latencySummaryEntry{key: stmt.FingerprintID, value: latencySummary} - d.index[stmt.FingerprintID] = d.store.PushFront(entry) + d.mu.index[stmt.FingerprintID] = d.store.PushFront(entry) d.metrics.Fingerprints.Inc(1) d.metrics.Memory.Inc(latencySummary.ByteSize()) } else { @@ -114,7 +136,7 @@ func (d *anomalyDetector) withFingerprintLatencySummary( if d.metrics.Memory.Value() > AnomalyDetectionMemoryLimit.Get(&d.settings.SV) { element := d.store.Back() entry := d.store.Remove(element).(latencySummaryEntry) - delete(d.index, entry.key) + delete(d.mu.index, entry.key) d.metrics.Evictions.Inc(1) d.metrics.Fingerprints.Dec(1) d.metrics.Memory.Dec(entry.value.ByteSize()) @@ -122,12 +144,14 @@ func (d *anomalyDetector) withFingerprintLatencySummary( } func newAnomalyDetector(settings *cluster.Settings, metrics Metrics) *anomalyDetector { - return &anomalyDetector{ + anomaly := &anomalyDetector{ settings: settings, metrics: metrics, store: list.New(), - index: make(map[appstatspb.StmtFingerprintID]*list.Element), } + anomaly.mu.index = make(map[appstatspb.StmtFingerprintID]*list.Element) + + return anomaly } type latencyThresholdDetector struct { diff --git a/pkg/sql/sqlstats/insights/insights.go b/pkg/sql/sqlstats/insights/insights.go index eeb8b2ffe7aa..3d047468ef56 100644 --- a/pkg/sql/sqlstats/insights/insights.go +++ b/pkg/sql/sqlstats/insights/insights.go @@ -16,6 +16,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/settings" "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/sql/appstatspb" "github.com/cockroachdb/cockroach/pkg/sql/clusterunique" "github.com/cockroachdb/cockroach/pkg/util/metric" "github.com/cockroachdb/cockroach/pkg/util/stop" @@ -153,6 +154,16 @@ type Reader interface { IterateInsights(context.Context, func(context.Context, *Insight)) } +type LatencyInformation interface { + GetPercentileValues(fingerprintID appstatspb.StmtFingerprintID) PercentileValues +} + +type PercentileValues struct { + P50 float64 + P90 float64 + P99 float64 +} + // Provider offers access to the insights subsystem. type Provider interface { // Start launches the background tasks necessary for processing insights. @@ -164,21 +175,27 @@ type Provider interface { // Reader returns an object that offers read access to any detected insights. Reader() Reader + + // LatencyInformation returns an object that offers read access to latency information, + // such as percentiles. + LatencyInformation() LatencyInformation } // New builds a new Provider. func New(st *cluster.Settings, metrics Metrics) Provider { store := newStore(st) + anomalyDetector := newAnomalyDetector(st, metrics) return &defaultProvider{ store: store, ingester: newConcurrentBufferIngester( newRegistry(st, &compositeDetector{detectors: []detector{ &latencyThresholdDetector{st: st}, - newAnomalyDetector(st, metrics), + anomalyDetector, }}, &compositeSink{sinks: []sink{ store, }}), ), + anomalyDetector: anomalyDetector, } } diff --git a/pkg/sql/sqlstats/insights/provider.go b/pkg/sql/sqlstats/insights/provider.go index 8f9ca766f40c..9ea5b944c861 100644 --- a/pkg/sql/sqlstats/insights/provider.go +++ b/pkg/sql/sqlstats/insights/provider.go @@ -18,8 +18,9 @@ import ( ) type defaultProvider struct { - store *lockingStore - ingester *concurrentBufferIngester + store *lockingStore + ingester *concurrentBufferIngester + anomalyDetector *anomalyDetector } var _ Provider = &defaultProvider{} @@ -40,6 +41,10 @@ func (p *defaultProvider) Reader() Reader { return p.store } +func (p *defaultProvider) LatencyInformation() LatencyInformation { + return p.anomalyDetector +} + type nullWriter struct{} func (n *nullWriter) ObserveStatement(_ clusterunique.ID, _ *Statement) { diff --git a/pkg/sql/sqlstats/persistedsqlstats/sqlstatsutil/json_encoding_test.go b/pkg/sql/sqlstats/persistedsqlstats/sqlstatsutil/json_encoding_test.go index 36fb56c7b4e2..9ebe5e2fc94c 100644 --- a/pkg/sql/sqlstats/persistedsqlstats/sqlstatsutil/json_encoding_test.go +++ b/pkg/sql/sqlstats/persistedsqlstats/sqlstatsutil/json_encoding_test.go @@ -102,7 +102,14 @@ func TestSQLStatsJsonEncoding(t *testing.T) { }, "nodes": [{{joinInts .IntArray}}], "planGists": [{{joinStrings .StringArray}}], - "indexes": [{{joinStrings .StringArray}}] + "indexes": [{{joinStrings .StringArray}}], + "latencyInfo": { + "min": {{.Float}}, + "max": {{.Float}}, + "p50": {{.Float}}, + "p90": {{.Float}}, + "p99": {{.Float}} + } }, "execution_statistics": { "cnt": {{.Int64}}, @@ -224,8 +231,15 @@ func TestSQLStatsJsonEncoding(t *testing.T) { "mean": {{.Float}}, "sqDiff": {{.Float}} }, - "nodes": [{{joinInts .IntArray}}] - "planGists": [{{joinStrings .StringArray}}] + "nodes": [{{joinInts .IntArray}}], + "planGists": [{{joinStrings .StringArray}}], + "latencyInfo": { + "min": {{.Float}}, + "max": {{.Float}}, + "p50": {{.Float}}, + "p90": {{.Float}}, + "p99": {{.Float}}, + } }, "execution_statistics": { "cnt": {{.Int64}}, diff --git a/pkg/sql/sqlstats/persistedsqlstats/sqlstatsutil/json_impl.go b/pkg/sql/sqlstats/persistedsqlstats/sqlstatsutil/json_impl.go index bcd184d3ffdb..247766d13b25 100644 --- a/pkg/sql/sqlstats/persistedsqlstats/sqlstatsutil/json_impl.go +++ b/pkg/sql/sqlstats/persistedsqlstats/sqlstatsutil/json_impl.go @@ -50,6 +50,7 @@ var ( _ jsonMarshaler = (*jsonInt)(nil) _ jsonMarshaler = (*stmtFingerprintID)(nil) _ jsonMarshaler = (*int64Array)(nil) + _ jsonMarshaler = &latencyInfo{} ) type txnStats appstatspb.TransactionStatistics @@ -339,6 +340,7 @@ func (s *innerStmtStats) jsonFields() jsonFields { {"nodes", (*int64Array)(&s.Nodes)}, {"planGists", (*stringArray)(&s.PlanGists)}, {"indexes", (*stringArray)(&s.Indexes)}, + {"latencyInfo", (*latencyInfo)(&s.LatencyInfo)}, } } @@ -389,6 +391,26 @@ func (n *numericStats) encodeJSON() (json.JSON, error) { return n.jsonFields().encodeJSON() } +type latencyInfo appstatspb.LatencyInfo + +func (l *latencyInfo) jsonFields() jsonFields { + return jsonFields{ + {"min", (*jsonFloat)(&l.Min)}, + {"max", (*jsonFloat)(&l.Max)}, + {"p50", (*jsonFloat)(&l.P50)}, + {"p90", (*jsonFloat)(&l.P90)}, + {"p99", (*jsonFloat)(&l.P99)}, + } +} + +func (l *latencyInfo) decodeJSON(js json.JSON) error { + return l.jsonFields().decodeJSON(js) +} + +func (l *latencyInfo) encodeJSON() (json.JSON, error) { + return l.jsonFields().encodeJSON() +} + type jsonFields []jsonField func (jf jsonFields) decodeJSON(js json.JSON) (err error) { diff --git a/pkg/sql/sqlstats/sslocal/sql_stats.go b/pkg/sql/sqlstats/sslocal/sql_stats.go index 90c3350961eb..037376727272 100644 --- a/pkg/sql/sqlstats/sslocal/sql_stats.go +++ b/pkg/sql/sqlstats/sslocal/sql_stats.go @@ -67,7 +67,8 @@ type SQLStats struct { knobs *sqlstats.TestingKnobs - insights insights.WriterProvider + insights insights.WriterProvider + latencyInformation insights.LatencyInformation } func newSQLStats( @@ -80,6 +81,7 @@ func newSQLStats( parentMon *mon.BytesMonitor, flushTarget Sink, knobs *sqlstats.TestingKnobs, + latencyInformation insights.LatencyInformation, ) *SQLStats { monitor := mon.NewMonitor( "SQLStats", @@ -97,6 +99,7 @@ func newSQLStats( flushTarget: flushTarget, knobs: knobs, insights: insightsWriter, + latencyInformation: latencyInformation, } s.mu.apps = make(map[string]*ssmemstorage.Container) s.mu.mon = monitor @@ -135,6 +138,7 @@ func (s *SQLStats) getStatsForApplication(appName string) *ssmemstorage.Containe appName, s.knobs, s.insights(false /* internal */), + s.latencyInformation, ) s.mu.apps[appName] = a return a diff --git a/pkg/sql/sqlstats/sslocal/sql_stats_test.go b/pkg/sql/sqlstats/sslocal/sql_stats_test.go index dc73e5eb7a19..3d5daa4cb528 100644 --- a/pkg/sql/sqlstats/sslocal/sql_stats_test.go +++ b/pkg/sql/sqlstats/sslocal/sql_stats_test.go @@ -446,16 +446,18 @@ func TestExplicitTxnFingerprintAccounting(t *testing.T) { nil /* curCount */, nil /* maxHist */, math.MaxInt64, st, ) + insightsProvider := insights.New(st, insights.NewMetrics()) sqlStats := sslocal.New( st, sqlstats.MaxMemSQLStatsStmtFingerprints, sqlstats.MaxMemSQLStatsTxnFingerprints, nil, /* curMemoryBytesCount */ nil, /* maxMemoryBytesHist */ - insights.New(st, insights.NewMetrics()).Writer, + insightsProvider.Writer, monitor, nil, /* reportingSink */ nil, /* knobs */ + insightsProvider.LatencyInformation(), ) appStats := sqlStats.GetApplicationStats("" /* appName */, false /* internal */) @@ -564,16 +566,18 @@ func TestAssociatingStmtStatsWithTxnFingerprint(t *testing.T) { require.NoError(t, err) // Construct the SQL Stats machinery. + insightsProvider := insights.New(st, insights.NewMetrics()) sqlStats := sslocal.New( st, sqlstats.MaxMemSQLStatsStmtFingerprints, sqlstats.MaxMemSQLStatsTxnFingerprints, nil, nil, - insights.New(st, insights.NewMetrics()).Writer, + insightsProvider.Writer, monitor, nil, nil, + insightsProvider.LatencyInformation(), ) appStats := sqlStats.GetApplicationStats("" /* appName */, false /* internal */) statsCollector := sslocal.NewStatsCollector( @@ -1441,3 +1445,80 @@ func convertIDsToNames(t *testing.T, testConn *sqlutils.SQLRunner, indexes []str }) return indexesInfo } + +func TestSQLStatsLatencyInfo(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + ctx := context.Background() + params, _ := tests.CreateTestServerParams() + testServer, sqlConn, _ := serverutils.StartServer(t, params) + defer func() { + require.NoError(t, sqlConn.Close()) + testServer.Stopper().Stop(ctx) + }() + testConn := sqlutils.MakeSQLRunner(sqlConn) + appName := "latency-info" + testConn.Exec(t, "SET application_name = $1", appName) + testConn.Exec(t, "CREATE TABLE t1 (k INT)") + + testCases := []struct { + name string + statement string + fingerprint string + latencyMax float64 + }{ + { + name: "select on table", + statement: "SELECT * FROM t1", + fingerprint: "SELECT * FROM t1", + latencyMax: 1, + }, + { + name: "select sleep", + statement: "SELECT pg_sleep(0.06)", + fingerprint: "SELECT pg_sleep(_)", + latencyMax: 0.2, + }, + { + name: "select sleep", + statement: "SELECT pg_sleep(0.1)", + fingerprint: "SELECT pg_sleep(_)", + latencyMax: 0.2, + }, + { + name: "select sleep", + statement: "SELECT pg_sleep(0.07)", + fingerprint: "SELECT pg_sleep(_)", + latencyMax: 0.2, + }, + } + + var min float64 + var max float64 + var p50 float64 + var p90 float64 + var p99 float64 + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + testConn.Exec(t, tc.statement) + + rows := testConn.QueryRow(t, "SELECT statistics -> 'statistics' -> 'latencyInfo' ->> 'min',"+ + "statistics -> 'statistics' -> 'latencyInfo' ->> 'max',"+ + "statistics -> 'statistics' -> 'latencyInfo' ->> 'p50',"+ + "statistics -> 'statistics' -> 'latencyInfo' ->> 'p90',"+ + "statistics -> 'statistics' -> 'latencyInfo' ->> 'p99' "+ + "FROM CRDB_INTERNAL.STATEMENT_STATISTICS WHERE app_name = $1 "+ + "AND metadata ->> 'query'=$2", appName, "SELECT * FROM t1") + rows.Scan(&min, &max, &p50, &p90, &p99) + + require.Positive(t, min) + require.Positive(t, max) + require.GreaterOrEqual(t, max, min) + require.LessOrEqual(t, max, tc.latencyMax) + require.GreaterOrEqual(t, p99, p90) + require.GreaterOrEqual(t, p90, p50) + require.LessOrEqual(t, p99, max) + }) + } +} diff --git a/pkg/sql/sqlstats/sslocal/sslocal_provider.go b/pkg/sql/sqlstats/sslocal/sslocal_provider.go index b508533ada09..df298949e28a 100644 --- a/pkg/sql/sqlstats/sslocal/sslocal_provider.go +++ b/pkg/sql/sqlstats/sslocal/sslocal_provider.go @@ -40,10 +40,20 @@ func New( pool *mon.BytesMonitor, reportingSink Sink, knobs *sqlstats.TestingKnobs, + latencyInformation insights.LatencyInformation, ) *SQLStats { - return newSQLStats(settings, maxStmtFingerprints, maxTxnFingerprints, - curMemoryBytesCount, maxMemoryBytesHist, insightsWriter, pool, - reportingSink, knobs) + return newSQLStats( + settings, + maxStmtFingerprints, + maxTxnFingerprints, + curMemoryBytesCount, + maxMemoryBytesHist, + insightsWriter, + pool, + reportingSink, + knobs, + latencyInformation, + ) } var _ sqlstats.Provider = &SQLStats{} @@ -105,6 +115,7 @@ func (s *SQLStats) GetApplicationStats(appName string, internal bool) sqlstats.A appName, s.knobs, s.insights(internal), + s.latencyInformation, ) s.mu.apps[appName] = a return a diff --git a/pkg/sql/sqlstats/ssmemstorage/ss_mem_storage.go b/pkg/sql/sqlstats/ssmemstorage/ss_mem_storage.go index 4a701b10ea28..ca24f4f2cca8 100644 --- a/pkg/sql/sqlstats/ssmemstorage/ss_mem_storage.go +++ b/pkg/sql/sqlstats/ssmemstorage/ss_mem_storage.go @@ -119,8 +119,9 @@ type Container struct { txnCounts transactionCounts mon *mon.BytesMonitor - knobs *sqlstats.TestingKnobs - insights insights.Writer + knobs *sqlstats.TestingKnobs + insights insights.Writer + latencyInformation insights.LatencyInformation } var _ sqlstats.ApplicationStats = &Container{} @@ -136,6 +137,7 @@ func New( appName string, knobs *sqlstats.TestingKnobs, insightsWriter insights.Writer, + latencyInformation insights.LatencyInformation, ) *Container { s := &Container{ st: st, @@ -145,6 +147,7 @@ func New( mon: mon, knobs: knobs, insights: insightsWriter, + latencyInformation: latencyInformation, } if mon != nil { @@ -251,6 +254,7 @@ func NewTempContainerFromExistingStmtStats( appName, nil, /* knobs */ nil, /* insights */ + nil, /*latencyInformation */ ) for i := range statistics { @@ -324,6 +328,7 @@ func NewTempContainerFromExistingTxnStats( appName, nil, /* knobs */ nil, /* insights */ + nil, /* latencyInformation */ ) for i := range statistics { @@ -358,13 +363,14 @@ func (s *Container) NewApplicationStatsWithInheritedOptions() sqlstats.Applicati sqlstats.MaxSQLStatsStmtFingerprintsPerExplicitTxn, // There is no need to constraint txn fingerprint limit since in temporary // container, there will never be more than one transaction fingerprint. - nil, // uniqueTxnFingerprintLimit, + nil, // uniqueTxnFingerprintLimit &uniqueStmtFingerprintCount, &uniqueTxnFingerprintCount, s.mon, s.appName, s.knobs, s.insights, + s.latencyInformation, ) } diff --git a/pkg/sql/sqlstats/ssmemstorage/ss_mem_writer.go b/pkg/sql/sqlstats/ssmemstorage/ss_mem_writer.go index 603c65b479e9..2301d8ba4dc7 100644 --- a/pkg/sql/sqlstats/ssmemstorage/ss_mem_writer.go +++ b/pkg/sql/sqlstats/ssmemstorage/ss_mem_writer.go @@ -140,6 +140,18 @@ func (s *Container) RecordStatement( stats.mu.data.IndexRecommendations = value.IndexRecommendations stats.mu.data.Indexes = util.CombineUniqueString(stats.mu.data.Indexes, value.Indexes) + // Percentile latencies are only being sampled if the latency was above the + // AnomalyDetectionLatencyThreshold. + latencies := s.latencyInformation.GetPercentileValues(stmtFingerprintID) + latencyInfo := appstatspb.LatencyInfo{ + Min: value.ServiceLatency, + Max: value.ServiceLatency, + P50: latencies.P50, + P90: latencies.P90, + P99: latencies.P99, + } + stats.mu.data.LatencyInfo.Add(latencyInfo) + // Note that some fields derived from tracing statements (such as // BytesSentOverNetwork) are not updated here because they are collected // on-demand. @@ -155,7 +167,7 @@ func (s *Container) RecordStatement( // stats size + stmtKey size + hash of the statementKey estimatedMemoryAllocBytes := stats.sizeUnsafe() + statementKey.size() + 8 - // We also accounts for the memory used for s.sampledPlanMetadataCache. + // We also account for the memory used for s.sampledPlanMetadataCache. // timestamp size + key size + hash. estimatedMemoryAllocBytes += timestampSize + statementKey.sampledPlanKey.size() + 8 s.mu.Lock() diff --git a/pkg/ui/workspaces/cluster-ui/src/statementsPage/statementsPage.fixture.ts b/pkg/ui/workspaces/cluster-ui/src/statementsPage/statementsPage.fixture.ts index 5f32d03adf43..77380a65f342 100644 --- a/pkg/ui/workspaces/cluster-ui/src/statementsPage/statementsPage.fixture.ts +++ b/pkg/ui/workspaces/cluster-ui/src/statementsPage/statementsPage.fixture.ts @@ -17,6 +17,8 @@ import { noop } from "lodash"; import * as protos from "@cockroachlabs/crdb-protobuf-client"; import { RequestError } from "src/util"; import { StatementDiagnosticsReport } from "../api"; +import { cockroach } from "@cockroachlabs/crdb-protobuf-client"; +import ILatencyInfo = cockroach.sql.ILatencyInfo; type IStatementStatistics = protos.cockroach.sql.IStatementStatistics; type IExecStats = protos.cockroach.sql.IExecStats; @@ -51,6 +53,14 @@ const execStats: Required = { }, }; +const latencyInfo: Required = { + min: 0.00008, + max: 0.00028, + p50: 0.00015, + p90: 0.00016, + p99: 0.00018, +} + const statementStats: Required = { count: Long.fromNumber(180000), first_attempt_count: Long.fromNumber(50000), @@ -103,6 +113,7 @@ const statementStats: Required = { index_recommendations: [""], indexes: ["123@456"], exec_stats: execStats, + latency_info: latencyInfo, last_exec_timestamp: { seconds: Long.fromInt(1599670292), nanos: 111613000, diff --git a/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.fixture.ts b/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.fixture.ts index 386490979fbc..befa55330e67 100644 --- a/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.fixture.ts +++ b/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.fixture.ts @@ -112,6 +112,13 @@ export const statementsWithSameIdButDifferentNodeId: CollectedStatementStatistic network_messages: { mean: 0, squared_diffs: 0 }, max_disk_usage: { mean: 0, squared_diffs: 0 }, }, + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }, id: new Long(8717981371097536892), }, @@ -223,6 +230,13 @@ export const statementsWithSameIdButDifferentNodeId: CollectedStatementStatistic network_messages: { mean: 0, squared_diffs: 0 }, max_disk_usage: { mean: 0, squared_diffs: 0 }, }, + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }, id: new Long(8717981371097536892), }, @@ -337,6 +351,13 @@ export const statementsWithSameIdButDifferentNodeId: CollectedStatementStatistic network_messages: { mean: 0, squared_diffs: 0 }, max_disk_usage: { mean: 0, squared_diffs: 0 }, }, + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }, id: new Long(8717981371097536892), }, @@ -442,6 +463,13 @@ export const statementsWithSameIdButDifferentNodeId: CollectedStatementStatistic network_messages: { mean: 0, squared_diffs: 0 }, max_disk_usage: { mean: 0, squared_diffs: 0 }, }, + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }, id: new Long(8717981371097536892), }, @@ -547,6 +575,13 @@ export const statementsWithSameIdButDifferentNodeId: CollectedStatementStatistic network_messages: { mean: 0, squared_diffs: 0 }, max_disk_usage: { mean: 0, squared_diffs: 0 }, }, + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }, id: new Long(8717981371097536892), }, @@ -661,6 +696,13 @@ export const statementsWithSameIdButDifferentNodeId: CollectedStatementStatistic network_messages: { mean: 0, squared_diffs: 0 }, max_disk_usage: { mean: 0, squared_diffs: 0 }, }, + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }, id: new Long(8717981371097536892), }, @@ -775,6 +817,13 @@ export const statementsWithSameIdButDifferentNodeId: CollectedStatementStatistic network_messages: { mean: 0, squared_diffs: 0 }, max_disk_usage: { mean: 0, squared_diffs: 0 }, }, + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }, id: new Long(8717981371097536892), }, @@ -892,6 +941,13 @@ export const statementsWithSameIdButDifferentNodeId: CollectedStatementStatistic network_messages: { mean: 0, squared_diffs: 0 }, max_disk_usage: { mean: 0, squared_diffs: 0 }, }, + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }, id: new Long(8717981371097536892), }, @@ -1006,6 +1062,13 @@ export const statementsWithSameIdButDifferentNodeId: CollectedStatementStatistic network_messages: { mean: 0, squared_diffs: 0 }, max_disk_usage: { mean: 0, squared_diffs: 0 }, }, + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }, id: new Long(8717981371097536892), }, diff --git a/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.spec.ts b/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.spec.ts index 0dc48ebde9af..d74354d83063 100644 --- a/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.spec.ts +++ b/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.spec.ts @@ -277,6 +277,13 @@ function randomStats( plan_gists: ["Ais="], index_recommendations: [""], indexes: ["123@456"], + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }; } diff --git a/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.ts b/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.ts index 1e63f07e2816..22980865f2a0 100644 --- a/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.ts +++ b/pkg/ui/workspaces/cluster-ui/src/util/appStats/appStats.ts @@ -61,6 +61,41 @@ export function aggregateNumericStats( }; } +export function aggregateLatencyInfo( + a: StatementStatistics, + b: StatementStatistics, +): protos.cockroach.sql.ILatencyInfo { + const min = + a.latency_info?.min == 0 || a.latency_info?.min > b.latency_info?.min + ? b.latency_info?.min + : a.latency_info?.min; + const max = + a.latency_info?.max > b.latency_info?.max + ? a.latency_info?.max + : b.latency_info?.max; + + let p50 = b.latency_info?.p50; + let p90 = b.latency_info?.p90; + let p99 = b.latency_info?.p99; + // Use the latest value we have that is not zero. + if ( + b.last_exec_timestamp < a.last_exec_timestamp && + b.latency_info?.p50 != 0 + ) { + p50 = a.latency_info?.p50; + p90 = a.latency_info?.p90; + p99 = a.latency_info?.p99; + } + + return { + min, + max, + p50, + p90, + p99, + }; +} + export function coalesceSensitiveInfo( a: protos.cockroach.sql.ISensitiveInfo, b: protos.cockroach.sql.ISensitiveInfo, @@ -214,6 +249,7 @@ export function addStatementStats( plan_gists: planGists, index_recommendations: indexRec, indexes: indexes, + latency_info: aggregateLatencyInfo(a, b), }; } diff --git a/pkg/ui/workspaces/db-console/src/views/statements/statements.spec.tsx b/pkg/ui/workspaces/db-console/src/views/statements/statements.spec.tsx index 741c962c5b89..98933f14001a 100644 --- a/pkg/ui/workspaces/db-console/src/views/statements/statements.spec.tsx +++ b/pkg/ui/workspaces/db-console/src/views/statements/statements.spec.tsx @@ -530,6 +530,13 @@ function makeStats(): Required { plan_gists: ["Ais="], index_recommendations: [], indexes: ["123@456"], + latency_info: { + min: 0.01, + max: 1.2, + p50: 0.4, + p90: 0.7, + p99: 1.1, + }, }; }