From b8eb3a7b6d6453075295bdebd952083d182cc2fe Mon Sep 17 00:00:00 2001 From: "blathers-crl[bot]" <63125349+blathers-crl[bot]@users.noreply.github.com> Date: Thu, 24 Oct 2024 10:48:09 +0000 Subject: [PATCH] tmp --- .github/workflows/update_releases.yaml | 1 - DEPS.bzl | 6 +- build/bazelutil/distdir_files.bzl | 2 +- docs/generated/metrics/metrics.html | 13 +- .../settings/settings-for-tenants.txt | 2 +- docs/generated/settings/settings.html | 2 +- go.mod | 2 +- go.sum | 4 +- pkg/BUILD.bazel | 3 - pkg/build/version.txt | 2 +- pkg/ccl/backupccl/backup_tenant_test.go | 1 + pkg/ccl/backupccl/backup_test.go | 96 ---- pkg/ccl/backupccl/show_test.go | 23 - pkg/ccl/crosscluster/BUILD.bazel | 11 + .../crosscluster_type_resolver.go} | 24 +- pkg/ccl/crosscluster/logical/BUILD.bazel | 1 - .../create_logical_replication_stmt.go | 12 +- .../logical/logical_replication_job.go | 6 +- .../logical/logical_replication_job_test.go | 139 ++++- .../crosscluster/logical/lww_row_processor.go | 13 + .../streamclient/partitioned_stream_client.go | 4 +- .../logictestccl/testdata/logic_test/generic | 27 - pkg/cli/testdata/declarative-rules/deprules | 2 +- .../declarative-rules/invalid_version | 4 +- pkg/clusterversion/cockroach_versions.go | 2 +- pkg/cmd/drtprod/configs/drt_chaos.yaml | 4 - pkg/cmd/drtprod/configs/drt_large.yaml | 4 - pkg/cmd/drtprod/configs/drt_scale.yaml | 12 +- .../drtprod/configs/drt_scale_operations.yaml | 2 +- pkg/cmd/drtprod/scripts/tpcc_init.sh | 32 +- pkg/cmd/roachtest/operations/add_column.go | 18 - pkg/cmd/roachtest/operations/add_index.go | 22 +- pkg/cmd/roachtest/operations/utils.go | 24 - pkg/cmd/roachtest/tests/activerecord.go | 2 +- .../tests/admission_control_latency.go | 5 - pkg/cmd/roachtest/tests/follower_reads.go | 7 + pkg/cmd/roachtest/tests/pgjdbc_blocklist.go | 1 - pkg/cmd/roachtest/tests/ruby_pg.go | 2 +- pkg/kv/kvserver/BUILD.bazel | 3 +- pkg/kv/kvserver/client_merge_test.go | 40 +- pkg/kv/kvserver/client_raft_log_queue_test.go | 62 --- .../kvserver/flow_control_integration_test.go | 65 +++ pkg/kv/kvserver/kvflowcontrol/rac2/metrics.go | 28 + .../kvflowcontrol/rac2/range_controller.go | 43 +- .../rac2/range_controller_test.go | 70 ++- .../kvflowcontrol/rac2/store_stream.go | 4 +- .../kvflowcontrol/rac2/token_counter.go | 86 +++- .../kvflowcontrol/rac2/token_counter_test.go | 26 +- .../kvflowcontrol/replica_rac2/processor.go | 5 +- .../replica_rac2/processor_test.go | 2 +- .../kvflowcontrol/replica_rac2/raft_node.go | 9 + pkg/kv/kvserver/kvserverpb/raft.proto | 16 - pkg/kv/kvserver/raft.go | 2 +- pkg/kv/kvserver/rafttrace/BUILD.bazel | 38 -- pkg/kv/kvserver/rafttrace/rafttrace.go | 477 ------------------ pkg/kv/kvserver/rafttrace/rafttrace_test.go | 344 ------------- pkg/kv/kvserver/replica.go | 5 - .../kvserver/replica_application_decoder.go | 2 +- pkg/kv/kvserver/replica_application_result.go | 6 +- .../replica_application_result_test.go | 16 +- pkg/kv/kvserver/replica_destroy.go | 1 - pkg/kv/kvserver/replica_init.go | 4 +- pkg/kv/kvserver/replica_proposal.go | 22 +- pkg/kv/kvserver/replica_proposal_buf.go | 49 +- pkg/kv/kvserver/replica_proposal_buf_test.go | 5 +- pkg/kv/kvserver/replica_raft.go | 44 +- pkg/kv/kvserver/replica_store_liveness.go | 15 +- pkg/kv/kvserver/replica_test.go | 10 +- pkg/kv/kvserver/store.go | 5 - pkg/kv/kvserver/store_snapshot.go | 9 +- .../testdata/replica_unavailable_error.txt | 2 +- pkg/raft/BUILD.bazel | 1 + pkg/raft/node_test.go | 10 +- pkg/raft/quorum/joint.go | 14 + pkg/raft/quorum/quorum_test.go | 19 + pkg/raft/raft.go | 38 +- pkg/raft/raft_test.go | 45 +- pkg/raft/raftpb/raft.go | 6 - pkg/raft/rawnode_test.go | 31 +- pkg/raft/status.go | 4 +- pkg/raft/storage.go | 169 ++++--- pkg/raft/storage_test.go | 101 ++-- pkg/raft/tracker/fortificationtracker.go | 27 +- pkg/raft/types.go | 13 +- pkg/raft/util.go | 4 - .../install/files/cockroachdb-logging.yaml | 6 +- pkg/server/BUILD.bazel | 4 + pkg/server/api_v2.go | 2 + pkg/server/http_metrics.go | 114 +++++ pkg/server/http_metrics_test.go | 262 ++++++++++ pkg/sql/alter_default_privileges.go | 12 +- pkg/sql/alter_table.go | 13 +- pkg/sql/catalog/bootstrap/testdata/testdata | 8 +- .../testdata/bootstrap_system | 2 +- .../testdata/bootstrap_tenant | 2 +- .../tabledesc/logical_replication_helpers.go | 94 +++- pkg/sql/conn_executor.go | 10 + pkg/sql/conn_executor_exec.go | 2 +- pkg/sql/exec_log.go | 102 ++-- pkg/sql/exec_util.go | 16 +- pkg/sql/executor_statement_metrics.go | 6 +- pkg/sql/importer/BUILD.bazel | 3 +- pkg/sql/importer/read_import_base.go | 3 +- pkg/sql/instrumentation.go | 3 +- pkg/sql/logictest/REPOSITORIES.bzl | 20 +- .../alter_default_privileges_for_schema | 4 - .../alter_default_privileges_for_sequence | 3 - .../alter_default_privileges_for_table | 24 +- .../alter_default_privileges_for_type | 3 - .../alter_default_privileges_in_schema | 3 - .../testdata/logic_test/crdb_internal_catalog | 2 +- .../testdata/logic_test/reassign_owned_by | 11 + .../logic_test/show_default_privileges | 1 - pkg/sql/opt/exec/execbuilder/BUILD.bazel | 1 + pkg/sql/opt/exec/execbuilder/builder.go | 38 +- pkg/sql/opt/exec/execbuilder/relational.go | 15 +- .../exec/execbuilder/testdata/inverted_index | 57 ++- .../opt/memo/testdata/stats/inverted-array | 8 +- pkg/sql/opt/memo/testdata/stats/inverted-json | 132 ++++- pkg/sql/opt/props/histogram.go | 60 --- pkg/sql/opt/props/histogram_test.go | 49 -- pkg/sql/opt/xform/rules/select.opt | 16 - pkg/sql/opt/xform/select_funcs.go | 148 +----- pkg/sql/opt/xform/testdata/rules/select | 471 +---------------- pkg/sql/plan_opt.go | 210 ++++---- pkg/sql/prepared_stmt.go | 17 +- pkg/sql/reassign_owned_by.go | 36 +- .../scbuild/internal/scbuildstmt/helpers.go | 22 +- pkg/sql/sem/tree/schema_helpers.go | 45 +- pkg/sql/sem/tree/schema_helpers_test.go | 20 +- pkg/storage/pebble.go | 20 +- pkg/storage/pebble_key_schema.go | 42 +- pkg/storage/pebble_key_schema_test.go | 17 +- pkg/storage/pebble_test.go | 52 +- .../lint/passes/fmtsafe/functions.go | 2 - .../lint/passes/redactcheck/redactcheck.go | 12 +- pkg/testutils/release/cockroach_releases.yaml | 6 +- .../cluster-ui/src/api/databaseDetailsApi.ts | 4 - .../src/databasesPage/databasesPage.tsx | 23 - .../databaseDetailsSpanStats.saga.spec.ts | 1 - .../db-console/src/util/api.spec.ts | 2 - .../nodeGraphs/dashboards/overview.tsx | 9 +- .../containers/nodeGraphs/dashboards/sql.tsx | 9 +- .../containers/nodeGraphs/summaryBar.tsx | 27 +- pkg/util/admission/snapshot_queue.go | 20 +- pkg/util/admission/snapshot_queue_test.go | 5 +- pkg/util/metric/histogram_buckets.go | 11 +- pkg/util/metric/metric.go | 81 +++ pkg/util/metric/metric_test.go | 112 ++++ .../metric/testdata/ResponseTime30sBuckets | 27 + pkg/util/tracing/tracingpb/recorded_span.go | 4 - pkg/workload/BUILD.bazel | 3 - pkg/workload/cli/BUILD.bazel | 1 - pkg/workload/cli/check.go | 23 +- pkg/workload/datadog.go | 69 --- .../schemachange/operation_generator.go | 7 +- pkg/workload/schemachange/schemachange.go | 22 +- scripts/bump-pebble.sh | 2 +- 158 files changed, 2199 insertions(+), 3028 deletions(-) rename pkg/{sql/importer/import_type_resolver.go => ccl/crosscluster/crosscluster_type_resolver.go} (80%) delete mode 100644 pkg/kv/kvserver/rafttrace/BUILD.bazel delete mode 100644 pkg/kv/kvserver/rafttrace/rafttrace.go delete mode 100644 pkg/kv/kvserver/rafttrace/rafttrace_test.go create mode 100644 pkg/server/http_metrics.go create mode 100644 pkg/server/http_metrics_test.go create mode 100644 pkg/util/metric/testdata/ResponseTime30sBuckets delete mode 100644 pkg/workload/datadog.go diff --git a/.github/workflows/update_releases.yaml b/.github/workflows/update_releases.yaml index 86427d57d0f7..cb65bf0eab4c 100644 --- a/.github/workflows/update_releases.yaml +++ b/.github/workflows/update_releases.yaml @@ -31,7 +31,6 @@ jobs: - "release-23.2" - "release-24.1" - "release-24.2" - - "release-24.3" name: Update pkg/testutils/release/cockroach_releases.yaml on ${{ matrix.branch }} runs-on: ubuntu-latest steps: diff --git a/DEPS.bzl b/DEPS.bzl index 6ab96672742c..5448be130659 100644 --- a/DEPS.bzl +++ b/DEPS.bzl @@ -1818,10 +1818,10 @@ def go_deps(): patches = [ "@com_github_cockroachdb_cockroach//build/patches:com_github_cockroachdb_pebble.patch", ], - sha256 = "8c165990dc3d4d67618b19e45e2c79f5f48ab9df4e19f881ee1cfa82cdd009df", - strip_prefix = "github.com/cockroachdb/pebble@v0.0.0-20241017195839-1d2e9e829b92", + sha256 = "a72c365ccf143d2bdb7c9619bab0a577568bb205b5d298711f32297098747b7c", + strip_prefix = "github.com/cockroachdb/pebble@v0.0.0-20241023221932-8bf23da79c5c", urls = [ - "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/pebble/com_github_cockroachdb_pebble-v0.0.0-20241017195839-1d2e9e829b92.zip", + "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/pebble/com_github_cockroachdb_pebble-v0.0.0-20241023221932-8bf23da79c5c.zip", ], ) go_repository( diff --git a/build/bazelutil/distdir_files.bzl b/build/bazelutil/distdir_files.bzl index bf391b59f57f..b02be4e1ab55 100644 --- a/build/bazelutil/distdir_files.bzl +++ b/build/bazelutil/distdir_files.bzl @@ -345,7 +345,7 @@ DISTDIR_FILES = { "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/gostdlib/com_github_cockroachdb_gostdlib-v1.19.0.zip": "c4d516bcfe8c07b6fc09b8a9a07a95065b36c2855627cb3514e40c98f872b69e", "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/logtags/com_github_cockroachdb_logtags-v0.0.0-20230118201751-21c54148d20b.zip": "ca7776f47e5fecb4c495490a679036bfc29d95bd7625290cfdb9abb0baf97476", "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/metamorphic/com_github_cockroachdb_metamorphic-v0.0.0-20231108215700-4ba948b56895.zip": "28c8cf42192951b69378cf537be5a9a43f2aeb35542908cc4fe5f689505853ea", - "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/pebble/com_github_cockroachdb_pebble-v0.0.0-20241017195839-1d2e9e829b92.zip": "8c165990dc3d4d67618b19e45e2c79f5f48ab9df4e19f881ee1cfa82cdd009df", + "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/pebble/com_github_cockroachdb_pebble-v0.0.0-20241023221932-8bf23da79c5c.zip": "a72c365ccf143d2bdb7c9619bab0a577568bb205b5d298711f32297098747b7c", "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/redact/com_github_cockroachdb_redact-v1.1.5.zip": "11b30528eb0dafc8bc1a5ba39d81277c257cbe6946a7564402f588357c164560", "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/returncheck/com_github_cockroachdb_returncheck-v0.0.0-20200612231554-92cdbca611dd.zip": "ce92ba4352deec995b1f2eecf16eba7f5d51f5aa245a1c362dfe24c83d31f82b", "https://storage.googleapis.com/cockroach-godeps/gomod/github.com/cockroachdb/stress/com_github_cockroachdb_stress-v0.0.0-20220803192808-1806698b1b7b.zip": "3fda531795c600daf25532a4f98be2a1335cd1e5e182c72789bca79f5f69fcc1", diff --git a/docs/generated/metrics/metrics.html b/docs/generated/metrics/metrics.html index 4bbbd213e189..4d8f7d88dc8f 100644 --- a/docs/generated/metrics/metrics.html +++ b/docs/generated/metrics/metrics.html @@ -1541,6 +1541,7 @@ APPLICATIONschedules.scheduled-sql-stats-compaction-executor.failedNumber of scheduled-sql-stats-compaction-executor jobs failedJobsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE APPLICATIONschedules.scheduled-sql-stats-compaction-executor.startedNumber of scheduled-sql-stats-compaction-executor jobs startedJobsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE APPLICATIONschedules.scheduled-sql-stats-compaction-executor.succeededNumber of scheduled-sql-stats-compaction-executor jobs succeededJobsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE +APPLICATIONserver.http.request.duration.nanosDuration of an HTTP request in nanoseconds.DurationHISTOGRAMNANOSECONDSAVGNONE APPLICATIONsql.bytesinNumber of SQL bytes receivedSQL BytesCOUNTERBYTESAVGNON_NEGATIVE_DERIVATIVE APPLICATIONsql.bytesoutNumber of SQL bytes sentSQL BytesCOUNTERBYTESAVGNON_NEGATIVE_DERIVATIVE APPLICATIONsql.conn.failuresNumber of SQL connection failuresConnectionsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE @@ -1560,6 +1561,10 @@ APPLICATIONsql.copy.nonatomic.started.count.internalNumber of non-atomic COPY SQL statements started (internal queries)SQL Internal StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE APPLICATIONsql.copy.started.countNumber of COPY SQL statements startedSQL StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE APPLICATIONsql.copy.started.count.internalNumber of COPY SQL statements started (internal queries)SQL Internal StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE +APPLICATIONsql.crud_query.countNumber of SQL SELECT, INSERT, UPDATE, DELETE statements successfully executedSQL StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE +APPLICATIONsql.crud_query.count.internalNumber of SQL SELECT, INSERT, UPDATE, DELETE statements successfully executed (internal queries)SQL Internal StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE +APPLICATIONsql.crud_query.started.countNumber of SQL SELECT, INSERT, UPDATE, DELETE statements startedSQL StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE +APPLICATIONsql.crud_query.started.count.internalNumber of SQL SELECT, INSERT, UPDATE, DELETE statements started (internal queries)SQL Internal StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE APPLICATIONsql.ddl.countNumber of SQL DDL statements successfully executedSQL StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE APPLICATIONsql.ddl.count.internalNumber of SQL DDL statements successfully executed (internal queries)SQL Internal StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE APPLICATIONsql.ddl.started.countNumber of SQL DDL statements startedSQL StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE @@ -1675,10 +1680,10 @@ APPLICATIONsql.pre_serve.mem.curCurrent memory usage for SQL connections prior to routing the connection to the target SQL serverMemoryGAUGEBYTESAVGNONE APPLICATIONsql.pre_serve.mem.maxMemory usage for SQL connections prior to routing the connection to the target SQL serverMemoryHISTOGRAMBYTESAVGNONE APPLICATIONsql.pre_serve.new_connsNumber of SQL connections created prior to routing the connection to the target SQL serverConnectionsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE -APPLICATIONsql.query.countNumber of SQL queries executedSQL StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE -APPLICATIONsql.query.count.internalNumber of SQL queries executed (internal queries)SQL Internal StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE -APPLICATIONsql.query.started.countNumber of SQL queries startedSQL StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE -APPLICATIONsql.query.started.count.internalNumber of SQL queries started (internal queries)SQL Internal StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE +APPLICATIONsql.query.countNumber of SQL operations started including queries, and transaction control statementsSQL StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE +APPLICATIONsql.query.count.internalNumber of SQL operations started including queries, and transaction control statements (internal queries)SQL Internal StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE +APPLICATIONsql.query.started.countNumber of SQL operations started including queries, and transaction control statementsSQL StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE +APPLICATIONsql.query.started.count.internalNumber of SQL operations started including queries, and transaction control statements (internal queries)SQL Internal StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE APPLICATIONsql.restart_savepoint.countNumber of `SAVEPOINT cockroach_restart` statements successfully executedSQL StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE APPLICATIONsql.restart_savepoint.count.internalNumber of `SAVEPOINT cockroach_restart` statements successfully executed (internal queries)SQL Internal StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE APPLICATIONsql.restart_savepoint.release.countNumber of `RELEASE SAVEPOINT cockroach_restart` statements successfully executedSQL StatementsCOUNTERCOUNTAVGNON_NEGATIVE_DERIVATIVE diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt index bbe69ae53f43..7134c0d894a0 100644 --- a/docs/generated/settings/settings-for-tenants.txt +++ b/docs/generated/settings/settings-for-tenants.txt @@ -401,4 +401,4 @@ trace.snapshot.rate duration 0s if non-zero, interval at which background trace trace.span_registry.enabled boolean true if set, ongoing traces can be seen at https:///#/debug/tracez application trace.zipkin.collector string the address of a Zipkin instance to receive traces, as :. If no port is specified, 9411 will be used. application ui.display_timezone enumeration etc/utc the timezone used to format timestamps in the ui [etc/utc = 0, america/new_york = 1] application -version version 1000024.2-upgrading-to-1000024.3-step-022 set the active cluster version in the format '.' application +version version 24.2-upgrading-to-24.3-step-022 set the active cluster version in the format '.' application diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html index 525c0e5e827d..ab5c41a3ada6 100644 --- a/docs/generated/settings/settings.html +++ b/docs/generated/settings/settings.html @@ -359,6 +359,6 @@
trace.span_registry.enabled
booleantrueif set, ongoing traces can be seen at https://<ui>/#/debug/tracezServerless/Dedicated/Self-Hosted
trace.zipkin.collector
stringthe address of a Zipkin instance to receive traces, as <host>:<port>. If no port is specified, 9411 will be used.Serverless/Dedicated/Self-Hosted
ui.display_timezone
enumerationetc/utcthe timezone used to format timestamps in the ui [etc/utc = 0, america/new_york = 1]Serverless/Dedicated/Self-Hosted -
version
version1000024.2-upgrading-to-1000024.3-step-022set the active cluster version in the format '<major>.<minor>'Serverless/Dedicated/Self-Hosted +
version
version24.2-upgrading-to-24.3-step-022set the active cluster version in the format '<major>.<minor>'Serverless/Dedicated/Self-Hosted diff --git a/go.mod b/go.mod index 40921e4081a8..8226a5e00c85 100644 --- a/go.mod +++ b/go.mod @@ -135,7 +135,7 @@ require ( github.com/cockroachdb/go-test-teamcity v0.0.0-20191211140407-cff980ad0a55 github.com/cockroachdb/gostdlib v1.19.0 github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b - github.com/cockroachdb/pebble v0.0.0-20241017195839-1d2e9e829b92 + github.com/cockroachdb/pebble v0.0.0-20241023221932-8bf23da79c5c github.com/cockroachdb/redact v1.1.5 github.com/cockroachdb/returncheck v0.0.0-20200612231554-92cdbca611dd github.com/cockroachdb/stress v0.0.0-20220803192808-1806698b1b7b diff --git a/go.sum b/go.sum index e1a96f46f20d..20658cb6efe5 100644 --- a/go.sum +++ b/go.sum @@ -536,8 +536,8 @@ github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b h1:r6VH0faHjZe github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs= github.com/cockroachdb/metamorphic v0.0.0-20231108215700-4ba948b56895 h1:XANOgPYtvELQ/h4IrmPAohXqe2pWA8Bwhejr3VQoZsA= github.com/cockroachdb/metamorphic v0.0.0-20231108215700-4ba948b56895/go.mod h1:aPd7gM9ov9M8v32Yy5NJrDyOcD8z642dqs+F0CeNXfA= -github.com/cockroachdb/pebble v0.0.0-20241017195839-1d2e9e829b92 h1:AEWpYdO8k0gpPWZtpP8CyTr901vv7yxKVrzkXz5Vte8= -github.com/cockroachdb/pebble v0.0.0-20241017195839-1d2e9e829b92/go.mod h1:XmS8uVDd9YFw/1R7J0J/CmTUANwT7iGnBRxH9AyDA90= +github.com/cockroachdb/pebble v0.0.0-20241023221932-8bf23da79c5c h1:KxaJAPo1rdkJdghI6y4GhHUDNIBMsvTz8fW6nThzWLg= +github.com/cockroachdb/pebble v0.0.0-20241023221932-8bf23da79c5c/go.mod h1:XmS8uVDd9YFw/1R7J0J/CmTUANwT7iGnBRxH9AyDA90= github.com/cockroachdb/redact v1.1.3/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= github.com/cockroachdb/redact v1.1.5 h1:u1PMllDkdFfPWaNGMyLD1+so+aq3uUItthCFqzwPJ30= github.com/cockroachdb/redact v1.1.5/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index f3bc4fc7a43d..bdf953d5406c 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -269,7 +269,6 @@ ALL_TESTS = [ "//pkg/kv/kvserver/protectedts:protectedts_test", "//pkg/kv/kvserver/raftentry:raftentry_test", "//pkg/kv/kvserver/raftlog:raftlog_test", - "//pkg/kv/kvserver/rafttrace:rafttrace_test", "//pkg/kv/kvserver/raftutil:raftutil_test", "//pkg/kv/kvserver/rangefeed:rangefeed_test", "//pkg/kv/kvserver/rangelog:rangelog_test", @@ -1514,8 +1513,6 @@ GO_TARGETS = [ "//pkg/kv/kvserver/raftentry:raftentry_test", "//pkg/kv/kvserver/raftlog:raftlog", "//pkg/kv/kvserver/raftlog:raftlog_test", - "//pkg/kv/kvserver/rafttrace:rafttrace", - "//pkg/kv/kvserver/rafttrace:rafttrace_test", "//pkg/kv/kvserver/raftutil:raftutil", "//pkg/kv/kvserver/raftutil:raftutil_test", "//pkg/kv/kvserver/rangefeed:rangefeed", diff --git a/pkg/build/version.txt b/pkg/build/version.txt index a6f0b11222a8..20e1b7af7c9d 100644 --- a/pkg/build/version.txt +++ b/pkg/build/version.txt @@ -1 +1 @@ -v24.3.0-alpha.3 +v24.3.0-beta.2 diff --git a/pkg/ccl/backupccl/backup_tenant_test.go b/pkg/ccl/backupccl/backup_tenant_test.go index 47962069654e..6643e8e58b01 100644 --- a/pkg/ccl/backupccl/backup_tenant_test.go +++ b/pkg/ccl/backupccl/backup_tenant_test.go @@ -42,6 +42,7 @@ func TestBackupSharedProcessTenantNodeDown(t *testing.T) { ctx := context.Background() skip.UnderRace(t, "multi-node, multi-tenant test too slow under race") + skip.UnderDeadlock(t, "too slow under deadlock detector") params := base.TestClusterArgs{ ServerArgs: base.TestServerArgs{ DefaultTestTenant: base.TestControlsTenantsExplicitly, diff --git a/pkg/ccl/backupccl/backup_test.go b/pkg/ccl/backupccl/backup_test.go index 54c006f43ae8..6d85a48b18d9 100644 --- a/pkg/ccl/backupccl/backup_test.go +++ b/pkg/ccl/backupccl/backup_test.go @@ -4105,102 +4105,6 @@ func TestBackupRestoreChecksum(t *testing.T) { sqlDB.ExpectErr(t, "checksum mismatch", `RESTORE data.* FROM $1`, localFoo) } -// TestNonLinearChain observes the effect of a non-linear chain of backups, for -// example if two inc backups run concurrently, where the second starts before -// the first finishes and thus does not use the first's end time when picking a -// start time. In such a chain this first backup is made redundant by the second -// and should be ignored by restore rather than restored. -func TestNonLinearChain(t *testing.T) { - defer leaktest.AfterTest(t)() - defer log.Scope(t).Close(t) - - dir, cleanup := testutils.TempDir(t) - defer cleanup() - - tc := testcluster.NewTestCluster(t, 1, base.TestClusterArgs{ServerArgs: base.TestServerArgs{ - DefaultTestTenant: base.TODOTestTenantDisabled, ExternalIODir: dir, Knobs: base.TestingKnobs{ - JobsTestingKnobs: jobs.NewTestingKnobsWithShortIntervals(), - }, - }}) - - tc.Start(t) - defer tc.Stopper().Stop(context.Background()) - - sqlDB := sqlutils.MakeSQLRunner(tc.Conns[0]) - - // Make a table with a row in it and make a full backup of it. - sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY)`) - sqlDB.Exec(t, `INSERT INTO t VALUES (0)`) - sqlDB.Exec(t, `BACKUP TABLE defaultdb.t INTO $1`, localFoo) - require.Len(t, sqlDB.QueryStr(t, `SELECT DISTINCT end_time FROM [SHOW BACKUP LATEST IN $1]`, localFoo), 1) - - // Write a row and note the time that includes that row. - var ts1, ts2 string - sqlDB.Exec(t, `INSERT INTO t VALUES (1)`) - sqlDB.QueryRow(t, `SELECT cluster_logical_timestamp()`).Scan(&ts1) - - // Start *but pause rather than finish* an inc backup to ts1 of our new row. - var j jobspb.JobID - sqlDB.Exec(t, `SET CLUSTER SETTING jobs.debug.pausepoints = 'backup.before.flow'`) - sqlDB.QueryRow(t, fmt.Sprintf(`BACKUP TABLE defaultdb.t INTO LATEST IN $1 AS OF SYSTEM TIME %s WITH DETACHED`, ts1), localFoo).Scan(&j) - jobutils.WaitForJobToPause(t, sqlDB, j) - sqlDB.Exec(t, `RESET CLUSTER SETTING jobs.debug.pausepoints`) - - // Add another row and record the time that includes it. - sqlDB.Exec(t, `INSERT INTO t VALUES (2)`) - sqlDB.QueryRow(t, `SELECT cluster_logical_timestamp()`).Scan(&ts2) - - // Run -- and finish -- an inc backup to ts2. Since the first inc has not yet - // finished, this will find the full as its parent and use its end, rather - // than the paused inc, as its start time. - sqlDB.Exec(t, fmt.Sprintf(`BACKUP TABLE defaultdb.t INTO LATEST IN $1 AS OF SYSTEM TIME %s`, ts2), localFoo) - - // We should see two end times now in the shown backup -- the full and this - // (second) inc. - require.Len(t, sqlDB.QueryStr(t, `SELECT DISTINCT end_time FROM [SHOW BACKUP LATEST IN $1]`, localFoo), 2) - - // Now we have a full ending at t0, an incomplete inc from t0 to t1, and a - // complete inc also from t0 but to t2. We will move `t` out of our way and - // run a restore of the chain, i.e. to t2 to see what happens, noting how many - // files we open to do so. - sqlDB.Exec(t, `DROP TABLE t`) - openedBefore := tc.Servers[0].MustGetSQLCounter("cloud.readers_opened") - sqlDB.Exec(t, `RESTORE TABLE defaultdb.t FROM LATEST IN $1`, localFoo) - sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"0"}, {"1"}, {"2"}}) - - // Note how many files the restore opened. - openedA := tc.Servers[0].MustGetSQLCounter("cloud.readers_opened") - openedBefore - - // Now let's let the paused backup finish, adding a bonus "spur" to the chian. - sqlDB.Exec(t, `RESUME JOB $1`, j) - jobutils.WaitForJobToSucceed(t, sqlDB, j) - - // We should see three end times now in the shown backup -- the full, the 2nd - // inc we saw before, but now also this first inc as well. - require.Len(t, sqlDB.QueryStr(t, `SELECT DISTINCT end_time FROM [SHOW BACKUP LATEST IN $1]`, localFoo), 3) - - // Restore the same thing -- t2 -- we did before but now with the extra inc - // spur hanging out in the chain. This should produce the same result, and we - // would like it to only open one extra file to do so -- the manifest that - // includes the timestamps that then show it is not needed by the restore. - sqlDB.Exec(t, `DROP TABLE t`) - sqlDB.Exec(t, `RESTORE TABLE defaultdb.t FROM LATEST IN $1`, localFoo) - sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"0"}, {"1"}, {"2"}}) - openedB := tc.Servers[0].MustGetSQLCounter("cloud.readers_opened") - openedA - openedBefore - // TODO(dt): enable this assertion once it holds. - if false { - require.Equal(t, openedA+1, openedB) - } else { - require.Less(t, openedA+1, openedB) - } - - // Finally, make sure we can restore from the tip of the spur, not just the - // tip of the chain. - sqlDB.Exec(t, `DROP TABLE t`) - sqlDB.Exec(t, fmt.Sprintf(`RESTORE TABLE defaultdb.t FROM LATEST IN $1 AS OF SYSTEM TIME %s`, ts1), localFoo) - sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"0"}, {"1"}}) -} - func TestTimestampMismatch(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) diff --git a/pkg/ccl/backupccl/show_test.go b/pkg/ccl/backupccl/show_test.go index 30a55bb21364..7331fb351677 100644 --- a/pkg/ccl/backupccl/show_test.go +++ b/pkg/ccl/backupccl/show_test.go @@ -698,29 +698,6 @@ func TestShowBackupWithDebugIDs(t *testing.T) { require.Greater(t, dbID, 0) require.Greater(t, publicID, 0) - - res := sqlDB.QueryStr(t, ` - SELECT database_name, database_id, parent_schema_name, parent_schema_id, object_name, object_id, object_type - FROM [SHOW BACKUP FROM LATEST IN $1 WITH debug_ids] - ORDER BY object_id`, full) - - dbIDStr := strconv.Itoa(dbID) - publicIDStr := strconv.Itoa(publicID) - schemaIDStr := strconv.Itoa(dbID + 5) - - expectedObjects := [][]string{ - {"NULL", "NULL", "NULL", "NULL", "data", dbIDStr, "database"}, - {"data", dbIDStr, "NULL", "NULL", "public", strconv.Itoa(dbID + 1), "schema"}, - {"data", dbIDStr, "public", publicIDStr, "bank", strconv.Itoa(dbID + 2), "table"}, - {"data", dbIDStr, "public", publicIDStr, "welcome", strconv.Itoa(dbID + 3), "type"}, - {"data", dbIDStr, "public", publicIDStr, "_welcome", strconv.Itoa(dbID + 4), "type"}, - {"data", dbIDStr, "NULL", "NULL", "sc", schemaIDStr, "schema"}, - {"data", dbIDStr, "sc", schemaIDStr, "t1", strconv.Itoa(dbID + 6), "table"}, - {"data", dbIDStr, "sc", schemaIDStr, "t2", strconv.Itoa(dbID + 7), "table"}, - } - - require.Equal(t, expectedObjects, res) - } func TestShowBackupPathIsCollectionRoot(t *testing.T) { diff --git a/pkg/ccl/crosscluster/BUILD.bazel b/pkg/ccl/crosscluster/BUILD.bazel index b669e5b4bada..1da9bee6c2c3 100644 --- a/pkg/ccl/crosscluster/BUILD.bazel +++ b/pkg/ccl/crosscluster/BUILD.bazel @@ -4,6 +4,7 @@ go_library( name = "crosscluster", srcs = [ "addresses.go", + "crosscluster_type_resolver.go", "errors.go", "event.go", "settings.go", @@ -15,5 +16,15 @@ go_library( "//pkg/repstream/streampb", "//pkg/roachpb", "//pkg/settings", + "//pkg/sql/catalog", + "//pkg/sql/catalog/descpb", + "//pkg/sql/catalog/typedesc", + "//pkg/sql/pgwire/pgcode", + "//pkg/sql/pgwire/pgerror", + "//pkg/sql/sem/tree", + "//pkg/sql/sqlerrors", + "//pkg/sql/types", + "@com_github_cockroachdb_errors//:errors", + "@com_github_lib_pq//oid", ], ) diff --git a/pkg/sql/importer/import_type_resolver.go b/pkg/ccl/crosscluster/crosscluster_type_resolver.go similarity index 80% rename from pkg/sql/importer/import_type_resolver.go rename to pkg/ccl/crosscluster/crosscluster_type_resolver.go index 061bf464c009..840f8618389c 100644 --- a/pkg/sql/importer/import_type_resolver.go +++ b/pkg/ccl/crosscluster/crosscluster_type_resolver.go @@ -1,9 +1,9 @@ -// Copyright 2017 The Cockroach Authors. +// Copyright 2024 The Cockroach Authors. // // Use of this software is governed by the CockroachDB Software License // included in the /LICENSE file. -package importer +package crosscluster import ( "context" @@ -20,16 +20,18 @@ import ( "github.com/lib/pq/oid" ) -type ImportTypeResolver struct { +// CrossClusterTypeResolver is meant to be used to resolve types using type +// descriptors that originate from a different cluster. +type CrossClusterTypeResolver struct { typeIDToDesc map[descpb.ID]*descpb.TypeDescriptor typeNameToDesc map[string][]*descpb.TypeDescriptor } -var _ tree.TypeReferenceResolver = ImportTypeResolver{} -var _ catalog.TypeDescriptorResolver = ImportTypeResolver{} +var _ tree.TypeReferenceResolver = CrossClusterTypeResolver{} +var _ catalog.TypeDescriptorResolver = CrossClusterTypeResolver{} -func MakeImportTypeResolver(typeDescs []*descpb.TypeDescriptor) ImportTypeResolver { - itr := ImportTypeResolver{ +func MakeCrossClusterTypeResolver(typeDescs []*descpb.TypeDescriptor) CrossClusterTypeResolver { + itr := CrossClusterTypeResolver{ typeIDToDesc: make(map[descpb.ID]*descpb.TypeDescriptor), typeNameToDesc: make(map[string][]*descpb.TypeDescriptor), } @@ -52,7 +54,7 @@ func MakeImportTypeResolver(typeDescs []*descpb.TypeDescriptor) ImportTypeResolv // Note that if a table happens to have multiple types with the same name (but // different schemas), this implementation will return a "feature unsupported" // error. -func (i ImportTypeResolver) ResolveType( +func (i CrossClusterTypeResolver) ResolveType( ctx context.Context, name *tree.UnresolvedObjectName, ) (*types.T, error) { var descs []*descpb.TypeDescriptor @@ -75,12 +77,14 @@ func (i ImportTypeResolver) ResolveType( } // ResolveTypeByOID implements the tree.TypeReferenceResolver interface. -func (i ImportTypeResolver) ResolveTypeByOID(ctx context.Context, oid oid.Oid) (*types.T, error) { +func (i CrossClusterTypeResolver) ResolveTypeByOID( + ctx context.Context, oid oid.Oid, +) (*types.T, error) { return typedesc.ResolveHydratedTByOID(ctx, oid, i) } // GetTypeDescriptor implements the catalog.TypeDescriptorResolver interface. -func (i ImportTypeResolver) GetTypeDescriptor( +func (i CrossClusterTypeResolver) GetTypeDescriptor( _ context.Context, id descpb.ID, ) (tree.TypeName, catalog.TypeDescriptor, error) { var desc *descpb.TypeDescriptor diff --git a/pkg/ccl/crosscluster/logical/BUILD.bazel b/pkg/ccl/crosscluster/logical/BUILD.bazel index 42b6731903a4..e9b64205f66a 100644 --- a/pkg/ccl/crosscluster/logical/BUILD.bazel +++ b/pkg/ccl/crosscluster/logical/BUILD.bazel @@ -50,7 +50,6 @@ go_library( "//pkg/sql/execinfra", "//pkg/sql/execinfrapb", "//pkg/sql/exprutil", - "//pkg/sql/importer", "//pkg/sql/isql", "//pkg/sql/lexbase", "//pkg/sql/parser", diff --git a/pkg/ccl/crosscluster/logical/create_logical_replication_stmt.go b/pkg/ccl/crosscluster/logical/create_logical_replication_stmt.go index 81713260cb3c..a98570a46b29 100644 --- a/pkg/ccl/crosscluster/logical/create_logical_replication_stmt.go +++ b/pkg/ccl/crosscluster/logical/create_logical_replication_stmt.go @@ -208,6 +208,12 @@ func createLogicalReplicationStreamPlanHook( return err } + sourceTypes := make([]*descpb.TypeDescriptor, len(spec.TypeDescriptors)) + for i, desc := range spec.TypeDescriptors { + sourceTypes[i] = &desc + } + crossClusterResolver := crosscluster.MakeCrossClusterTypeResolver(sourceTypes) + // If the user asked to ignore "ttl-deletes", make sure that at least one of // the source tables actually has a TTL job which sets the omit bit that // is used for filtering; if not, they probably forgot that step. @@ -215,7 +221,11 @@ func createLogicalReplicationStreamPlanHook( for i, name := range srcTableNames { td := spec.TableDescriptors[name] - srcTableDescs[i] = &td + cpy := tabledesc.NewBuilder(&td).BuildCreatedMutableTable() + if err := typedesc.HydrateTypesInDescriptor(ctx, cpy, crossClusterResolver); err != nil { + return err + } + srcTableDescs[i] = cpy.TableDesc() repPairs[i].SrcDescriptorID = int32(td.ID) if td.RowLevelTTL != nil && td.RowLevelTTL.DisableChangefeedReplication { throwNoTTLWithCDCIgnoreError = false diff --git a/pkg/ccl/crosscluster/logical/logical_replication_job.go b/pkg/ccl/crosscluster/logical/logical_replication_job.go index 9f29c615ff1a..08ccfede3ef8 100644 --- a/pkg/ccl/crosscluster/logical/logical_replication_job.go +++ b/pkg/ccl/crosscluster/logical/logical_replication_job.go @@ -29,7 +29,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/catalog/tabledesc" "github.com/cockroachdb/cockroach/pkg/sql/catalog/typedesc" "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" - "github.com/cockroachdb/cockroach/pkg/sql/importer" "github.com/cockroachdb/cockroach/pkg/sql/isql" "github.com/cockroachdb/cockroach/pkg/sql/physicalplan" "github.com/cockroachdb/cockroach/pkg/sql/sem/catid" @@ -359,14 +358,13 @@ func (p *logicalReplicationPlanner) generatePlanImpl( defaultFnOID = catid.FuncIDToOID(catid.DescID(defaultFnID)) } - // TODO(msbutler): is this import type resolver kosher? Should put in a new package. - importResolver := importer.MakeImportTypeResolver(plan.SourceTypes) + crossClusterResolver := crosscluster.MakeCrossClusterTypeResolver(plan.SourceTypes) tableMetadataByDestID := make(map[int32]execinfrapb.TableReplicationMetadata) if err := sql.DescsTxn(ctx, execCfg, func(ctx context.Context, txn isql.Txn, descriptors *descs.Collection) error { for _, pair := range payload.ReplicationPairs { srcTableDesc := plan.DescriptorMap[pair.SrcDescriptorID] cpy := tabledesc.NewBuilder(&srcTableDesc).BuildCreatedMutableTable() - if err := typedesc.HydrateTypesInDescriptor(ctx, cpy, importResolver); err != nil { + if err := typedesc.HydrateTypesInDescriptor(ctx, cpy, crossClusterResolver); err != nil { return err } srcTableDesc = *cpy.TableDesc() diff --git a/pkg/ccl/crosscluster/logical/logical_replication_job_test.go b/pkg/ccl/crosscluster/logical/logical_replication_job_test.go index ca52a991af07..2f32e2876764 100644 --- a/pkg/ccl/crosscluster/logical/logical_replication_job_test.go +++ b/pkg/ccl/crosscluster/logical/logical_replication_job_test.go @@ -1953,20 +1953,35 @@ func TestUserDefinedTypes(t *testing.T) { // Create the same user-defined type both tables. dbA.Exec(t, "CREATE TYPE my_enum AS ENUM ('one', 'two', 'three')") dbB.Exec(t, "CREATE TYPE my_enum AS ENUM ('one', 'two', 'three')") + dbA.Exec(t, "CREATE TYPE my_composite AS (a INT, b TEXT)") + dbB.Exec(t, "CREATE TYPE my_composite AS (a INT, b TEXT)") - dbA.Exec(t, "CREATE TABLE data (pk INT PRIMARY KEY, val my_enum DEFAULT 'two')") - dbB.Exec(t, "CREATE TABLE data (pk INT PRIMARY KEY, val my_enum DEFAULT 'two')") - - dbB.Exec(t, "INSERT INTO data VALUES (1, 'one')") - // Force default expression evaluation. - dbB.Exec(t, "INSERT INTO data VALUES (2)") - - var jobAID jobspb.JobID - dbA.QueryRow(t, "CREATE LOGICAL REPLICATION STREAM FROM TABLE data ON $1 INTO TABLE data with skip schema check", dbBURL.String()).Scan(&jobAID) - WaitUntilReplicatedTime(t, s.Clock().Now(), dbA, jobAID) - require.NoError(t, replicationtestutils.CheckEmptyDLQs(ctx, dbA.DB, "A")) - dbB.CheckQueryResults(t, "SELECT * FROM data", [][]string{{"1", "one"}, {"2", "two"}}) - dbA.CheckQueryResults(t, "SELECT * FROM data", [][]string{{"1", "one"}, {"2", "two"}}) + for _, mode := range []string{"validated", "immediate"} { + t.Run(mode, func(t *testing.T) { + dbA.Exec(t, "CREATE TABLE data (pk INT PRIMARY KEY, val1 my_enum DEFAULT 'two', val2 my_composite)") + dbB.Exec(t, "CREATE TABLE data (pk INT PRIMARY KEY, val1 my_enum DEFAULT 'two', val2 my_composite)") + + dbB.Exec(t, "INSERT INTO data VALUES (1, 'one', (3, 'cat'))") + // Force default expression evaluation. + dbB.Exec(t, "INSERT INTO data (pk, val2) VALUES (2, (4, 'dog'))") + + var jobAID jobspb.JobID + dbA.QueryRow(t, + fmt.Sprintf("CREATE LOGICAL REPLICATION STREAM FROM TABLE data ON $1 INTO TABLE data WITH mode = %s", mode), + dbBURL.String(), + ).Scan(&jobAID) + WaitUntilReplicatedTime(t, s.Clock().Now(), dbA, jobAID) + require.NoError(t, replicationtestutils.CheckEmptyDLQs(ctx, dbA.DB, "A")) + dbB.CheckQueryResults(t, "SELECT * FROM data", [][]string{{"1", "one", "(3,cat)"}, {"2", "two", "(4,dog)"}}) + dbA.CheckQueryResults(t, "SELECT * FROM data", [][]string{{"1", "one", "(3,cat)"}, {"2", "two", "(4,dog)"}}) + + dbA.Exec(t, "CANCEL JOB $1", jobAID) + jobutils.WaitForJobToCancel(t, dbA, jobAID) + + dbA.Exec(t, "DROP TABLE data") + dbB.Exec(t, "DROP TABLE data") + }) + } } // TestLogicalReplicationCreationChecks verifies that we check that the table @@ -2075,7 +2090,7 @@ func TestLogicalReplicationCreationChecks(t *testing.T) { `cannot create logical replication stream: destination table tab CHECK constraints do not match source table tab`, "CREATE LOGICAL REPLICATION STREAM FROM TABLE tab ON $1 INTO TABLE tab", dbBURL.String(), ) - // Allos user to create LDR stream with mismatched CHECK via SKIP SCHEMA CHECK. + // Allow user to create LDR stream with mismatched CHECK via SKIP SCHEMA CHECK. var jobIDSkipSchemaCheck jobspb.JobID dbA.QueryRow(t, "CREATE LOGICAL REPLICATION STREAM FROM TABLE tab ON $1 INTO TABLE tab WITH SKIP SCHEMA CHECK", @@ -2097,13 +2112,45 @@ func TestLogicalReplicationCreationChecks(t *testing.T) { dbA.Exec(t, "CANCEL JOB $1", jobAID) jobutils.WaitForJobToCancel(t, dbA, jobAID) - // Verify that the stream cannot be created with user defined types. + // Check if the table references a UDF. + dbA.Exec(t, "CREATE OR REPLACE FUNCTION my_udf() RETURNS INT AS $$ SELECT 1 $$ LANGUAGE SQL") + dbA.Exec(t, "ALTER TABLE tab ADD COLUMN udf_col INT NOT NULL") + dbA.Exec(t, "ALTER TABLE tab ALTER COLUMN udf_col SET DEFAULT my_udf()") + dbB.Exec(t, "ALTER TABLE tab ADD COLUMN udf_col INT NOT NULL DEFAULT 1") + dbA.ExpectErr(t, + `cannot create logical replication stream: table tab references functions with IDs \[[0-9]+\]`, + "CREATE LOGICAL REPLICATION STREAM FROM TABLE tab ON $1 INTO TABLE tab", dbBURL.String(), + ) + + // Check if the table references a sequence. + dbA.Exec(t, "ALTER TABLE tab DROP COLUMN udf_col") + dbB.Exec(t, "ALTER TABLE tab DROP COLUMN udf_col") + dbA.Exec(t, "CREATE SEQUENCE my_seq") + dbA.Exec(t, "ALTER TABLE tab ADD COLUMN seq_col INT NOT NULL DEFAULT nextval('my_seq')") + dbB.Exec(t, "ALTER TABLE tab ADD COLUMN seq_col INT NOT NULL DEFAULT 1") + dbA.ExpectErr(t, + `cannot create logical replication stream: table tab references sequences with IDs \[[0-9]+\]`, + "CREATE LOGICAL REPLICATION STREAM FROM TABLE tab ON $1 INTO TABLE tab", dbBURL.String(), + ) + + // Check if table has a trigger. + dbA.Exec(t, "ALTER TABLE tab DROP COLUMN seq_col") + dbB.Exec(t, "ALTER TABLE tab DROP COLUMN seq_col") + dbA.Exec(t, "CREATE OR REPLACE FUNCTION my_trigger() RETURNS TRIGGER AS $$ BEGIN RETURN NEW; END $$ LANGUAGE PLPGSQL") + dbA.Exec(t, "CREATE TRIGGER my_trigger BEFORE INSERT ON tab FOR EACH ROW EXECUTE FUNCTION my_trigger()") + dbA.ExpectErr(t, + `cannot create logical replication stream: table tab references triggers \[my_trigger\]`, + "CREATE LOGICAL REPLICATION STREAM FROM TABLE tab ON $1 INTO TABLE tab", dbBURL.String(), + ) + + // Verify that the stream cannot be created with mismatched enum types. + dbA.Exec(t, "DROP TRIGGER my_trigger ON tab") dbA.Exec(t, "CREATE TYPE mytype AS ENUM ('a', 'b', 'c')") - dbB.Exec(t, "CREATE TYPE b.mytype AS ENUM ('a', 'b', 'c')") + dbB.Exec(t, "CREATE TYPE b.mytype AS ENUM ('a', 'b')") dbA.Exec(t, "ALTER TABLE tab ADD COLUMN enum_col mytype NOT NULL") dbB.Exec(t, "ALTER TABLE b.tab ADD COLUMN enum_col b.mytype NOT NULL") dbA.ExpectErr(t, - `cannot create logical replication stream: destination table tab column enum_col has user-defined type USER DEFINED ENUM: public.mytype`, + `cannot create logical replication stream: .* destination type USER DEFINED ENUM: public.mytype has logical representations \[a b c\], but the source type USER DEFINED ENUM: mytype has \[a b\]`, "CREATE LOGICAL REPLICATION STREAM FROM TABLE tab ON $1 INTO TABLE tab", dbBURL.String(), ) // Allows user to create LDR stream with UDT via SKIP SCHEMA CHECK. @@ -2114,9 +2161,21 @@ func TestLogicalReplicationCreationChecks(t *testing.T) { dbA.Exec(t, "CANCEL JOB $1", jobIDSkipSchemaCheck) jobutils.WaitForJobToCancel(t, dbA, jobIDSkipSchemaCheck) - // Check that UNIQUE indexes match. + // Verify that the stream cannot be created with mismatched composite types. dbA.Exec(t, "ALTER TABLE tab DROP COLUMN enum_col") dbB.Exec(t, "ALTER TABLE b.tab DROP COLUMN enum_col") + dbA.Exec(t, "CREATE TYPE composite_typ AS (a INT, b TEXT)") + dbB.Exec(t, "CREATE TYPE b.composite_typ AS (a TEXT, b INT)") + dbA.Exec(t, "ALTER TABLE tab ADD COLUMN composite_udt_col composite_typ NOT NULL") + dbB.Exec(t, "ALTER TABLE b.tab ADD COLUMN composite_udt_col b.composite_typ NOT NULL") + dbA.ExpectErr(t, + `cannot create logical replication stream: .* destination type USER DEFINED RECORD: public.composite_typ tuple element 0 does not match source type USER DEFINED RECORD: composite_typ tuple element 0: destination type INT8 does not match source type STRING`, + "CREATE LOGICAL REPLICATION STREAM FROM TABLE tab ON $1 INTO TABLE tab", dbBURL.String(), + ) + + // Check that UNIQUE indexes match. + dbA.Exec(t, "ALTER TABLE tab DROP COLUMN composite_udt_col") + dbB.Exec(t, "ALTER TABLE b.tab DROP COLUMN composite_udt_col") dbA.Exec(t, "CREATE UNIQUE INDEX payload_idx ON tab(payload)") dbB.Exec(t, "CREATE UNIQUE INDEX multi_idx ON b.tab(composite_col, pk)") dbA.ExpectErr(t, @@ -2134,6 +2193,50 @@ func TestLogicalReplicationCreationChecks(t *testing.T) { dbBURL.String(), ).Scan(&jobAID) + // Verify that unsupported CREATE INDEX statements are blocked. + dbA.ExpectErr(t, + "this schema change is disallowed on table tab because it is referenced by one or more logical replication jobs", + "CREATE INDEX virtual_col_idx ON tab(virtual_col)", + ) + dbA.ExpectErr(t, + "this schema change is disallowed on table tab because it is referenced by one or more logical replication jobs", + "CREATE INDEX hash_idx ON tab(pk) USING HASH WITH (bucket_count = 4)", + ) + dbA.ExpectErr(t, + "this schema change is disallowed on table tab because it is referenced by one or more logical replication jobs", + "CREATE INDEX partial_idx ON tab(composite_col) WHERE pk > 0", + ) + dbA.ExpectErr(t, + "this schema change is disallowed on table tab because it is referenced by one or more logical replication jobs", + "CREATE UNIQUE INDEX unique_idx ON tab(composite_col)", + ) + + // Creating triggers is also blocked. + dbA.ExpectErr(t, + "this schema change is disallowed on table tab because it is referenced by one or more logical replication jobs", + "CREATE TRIGGER my_trigger BEFORE INSERT ON tab FOR EACH ROW EXECUTE FUNCTION my_trigger()", + ) + + // Creating a "normal" secondary index (and dropping it) is allowed. + dbA.Exec(t, "CREATE INDEX normal_idx ON tab(composite_col)") + dbA.Exec(t, "DROP INDEX normal_idx") + + // Changing safe table storage parameters is allowed. + dbA.ExpectErr(t, + "this schema change is disallowed on table tab because it is referenced by one or more logical replication jobs", + "ALTER TABLE tab SET (ttl = 'on', ttl_expire_after = '5m')", + ) + dbA.Exec(t, "ALTER TABLE tab SET (ttl = 'on', ttl_expiration_expression = $$ '2024-01-01 12:00:00'::TIMESTAMPTZ $$)") + dbA.ExpectErr(t, + "this schema change is disallowed on table tab because it is referenced by one or more logical replication jobs", + "ALTER TABLE tab RESET (ttl)", + ) + // Storage param updates are only allowed if it is the only change. + dbA.ExpectErr(t, + "this schema change is disallowed on table tab because it is referenced by one or more logical replication jobs", + "ALTER TABLE tab ADD COLUMN c INT, SET (fillfactor = 70)", + ) + // Kill replication job. dbA.Exec(t, "CANCEL JOB $1", jobAID) jobutils.WaitForJobToCancel(t, dbA, jobAID) diff --git a/pkg/ccl/crosscluster/logical/lww_row_processor.go b/pkg/ccl/crosscluster/logical/lww_row_processor.go index 645ec83d83ec..63122a4b469a 100644 --- a/pkg/ccl/crosscluster/logical/lww_row_processor.go +++ b/pkg/ccl/crosscluster/logical/lww_row_processor.go @@ -32,6 +32,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/sessiondata" "github.com/cockroachdb/cockroach/pkg/sql/sessiondatapb" + "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/metamorphic" "github.com/cockroachdb/cockroach/pkg/util/randutil" @@ -90,6 +91,12 @@ func (q *queryBuilder) AddRow(row cdcevent.Row) error { return err } if err := it.Datum(func(d tree.Datum, col cdcevent.ResultColumn) error { + if dEnum, ok := d.(*tree.DEnum); ok { + // Override the type to Unknown to avoid a mismatched type OID error + // during execution. Note that Unknown is the type used by default + // when a SQL statement is executed without type hints. + dEnum.EnumTyp = types.Unknown + } q.scratchDatums = append(q.scratchDatums, d) return nil }); err != nil { @@ -116,6 +123,12 @@ func (q *queryBuilder) AddRowDefaultNull(row *cdcevent.Row) error { continue } if err := it.Datum(func(d tree.Datum, col cdcevent.ResultColumn) error { + if dEnum, ok := d.(*tree.DEnum); ok { + // Override the type to Unknown to avoid a mismatched type OID error + // during execution. Note that Unknown is the type used by default + // when a SQL statement is executed without type hints. + dEnum.EnumTyp = types.Unknown + } q.scratchDatums = append(q.scratchDatums, d) return nil }); err != nil { diff --git a/pkg/ccl/crosscluster/streamclient/partitioned_stream_client.go b/pkg/ccl/crosscluster/streamclient/partitioned_stream_client.go index 529d8408527d..d4ba69a4d9d2 100644 --- a/pkg/ccl/crosscluster/streamclient/partitioned_stream_client.go +++ b/pkg/ccl/crosscluster/streamclient/partitioned_stream_client.go @@ -343,8 +343,8 @@ func (p *partitionedStreamClient) PlanLogicalReplication( } sourceTypes := make([]*descpb.TypeDescriptor, len(streamSpec.TypeDescriptors)) - for _, desc := range streamSpec.TypeDescriptors { - sourceTypes = append(sourceTypes, &desc) + for i, desc := range streamSpec.TypeDescriptors { + sourceTypes[i] = &desc } return LogicalReplicationPlan{ diff --git a/pkg/ccl/logictestccl/testdata/logic_test/generic b/pkg/ccl/logictestccl/testdata/logic_test/generic index 175591a55e64..bc9493959480 100644 --- a/pkg/ccl/logictestccl/testdata/logic_test/generic +++ b/pkg/ccl/logictestccl/testdata/logic_test/generic @@ -1193,30 +1193,3 @@ quality of service: regular regions: actual row count: 1 size: 1 column, 1 row - -statement ok -DEALLOCATE p - -# Regression test for #132963. Do not cache non-reusable plans. -statement ok -SET plan_cache_mode = auto - -statement ok -CREATE TABLE a (a INT PRIMARY KEY) - -statement ok -PREPARE p AS SELECT create_statement FROM [SHOW CREATE TABLE a] - -query T -EXECUTE p ----- -CREATE TABLE public.a ( - a INT8 NOT NULL, - CONSTRAINT a_pkey PRIMARY KEY (a ASC) -) - -statement ok -ALTER TABLE a RENAME TO b - -statement error pgcode 42P01 pq: relation \"a\" does not exist -EXECUTE p diff --git a/pkg/cli/testdata/declarative-rules/deprules b/pkg/cli/testdata/declarative-rules/deprules index 844da777f531..9cf90b386e04 100644 --- a/pkg/cli/testdata/declarative-rules/deprules +++ b/pkg/cli/testdata/declarative-rules/deprules @@ -1,6 +1,6 @@ dep ---- -debug declarative-print-rules 1000024.2 dep +debug declarative-print-rules 24.2 dep deprules ---- - name: 'CheckConstraint transitions to ABSENT uphold 2-version invariant: PUBLIC->VALIDATED' diff --git a/pkg/cli/testdata/declarative-rules/invalid_version b/pkg/cli/testdata/declarative-rules/invalid_version index 84e28c5287b1..231ea4f78b35 100644 --- a/pkg/cli/testdata/declarative-rules/invalid_version +++ b/pkg/cli/testdata/declarative-rules/invalid_version @@ -4,5 +4,5 @@ invalid_version debug declarative-print-rules 1.1 op unsupported version number, the supported versions are: latest - 1000024.2 - 1000024.1 + 24.2 + 24.1 diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go index a006e6e01781..cf4e79805de8 100644 --- a/pkg/clusterversion/cockroach_versions.go +++ b/pkg/clusterversion/cockroach_versions.go @@ -352,7 +352,7 @@ const V24_3 = Latest // binary in a dev cluster. // // See devOffsetKeyStart for more details. -const DevelopmentBranch = true +const DevelopmentBranch = false // finalVersion should be set on a release branch to the minted final cluster // version key, e.g. to V23_2 on the release-23.2 branch once it is minted. diff --git a/pkg/cmd/drtprod/configs/drt_chaos.yaml b/pkg/cmd/drtprod/configs/drt_chaos.yaml index f55e5789627a..a3e62fbb3b90 100644 --- a/pkg/cmd/drtprod/configs/drt_chaos.yaml +++ b/pkg/cmd/drtprod/configs/drt_chaos.yaml @@ -88,9 +88,5 @@ targets: - workload - script: "pkg/cmd/drtprod/scripts/setup_datadog_workload" - script: "pkg/cmd/drtprod/scripts/tpcc_init.sh" - args: - - cct_tpcc # suffix added to script name tpcc_init_cct_tpcc.sh - - true # determines whether to execute the script immediately on workload node flags: warehouses: 12000 - db: cct_tpcc diff --git a/pkg/cmd/drtprod/configs/drt_large.yaml b/pkg/cmd/drtprod/configs/drt_large.yaml index 4bc9ec6d7a6d..a6080097bc7e 100644 --- a/pkg/cmd/drtprod/configs/drt_large.yaml +++ b/pkg/cmd/drtprod/configs/drt_large.yaml @@ -101,9 +101,5 @@ targets: - workload - script: "pkg/cmd/drtprod/scripts/setup_datadog_workload" - script: "pkg/cmd/drtprod/scripts/tpcc_init.sh" - args: - - cct_tpcc # suffix added to script name tpcc_init_cct_tpcc.sh - - true # determines whether to execute the script immediately on workload node flags: warehouses: 15000 - db: cct_tpcc diff --git a/pkg/cmd/drtprod/configs/drt_scale.yaml b/pkg/cmd/drtprod/configs/drt_scale.yaml index d8a24e137888..a2977aa26196 100644 --- a/pkg/cmd/drtprod/configs/drt_scale.yaml +++ b/pkg/cmd/drtprod/configs/drt_scale.yaml @@ -123,16 +123,6 @@ targets: - pkg/cmd/drt/scripts/roachtest_operations_run.sh - roachtest_operations_run.sh - script: "pkg/cmd/drtprod/scripts/tpcc_init.sh" - args: - - cct_tpcc_320k # suffix added to script name tpcc_init_cct_tpcc_320k.sh - - true # determines whether to execute the script immediately on workload node flags: - warehouses: 320000 + warehouses: 100000 db: cct_tpcc - - script: "pkg/cmd/drtprod/scripts/tpcc_init.sh" - args: - - cct_tpcc_640k # suffix added to script name tpcc_init_cct_tpcc_640k.sh - - false # determines whether to execute the script immediately on workload node - flags: - warehouses: 640000 - db: cct_tpcc_big diff --git a/pkg/cmd/drtprod/configs/drt_scale_operations.yaml b/pkg/cmd/drtprod/configs/drt_scale_operations.yaml index 412995b46acc..becfa51be963 100644 --- a/pkg/cmd/drtprod/configs/drt_scale_operations.yaml +++ b/pkg/cmd/drtprod/configs/drt_scale_operations.yaml @@ -13,5 +13,5 @@ targets: steps: - script: "pkg/cmd/drtprod/scripts/create_run_operation.sh" args: - - "schema_change,add-column|add-index" + - "schema_change,add-column|add-index,0 0 * * *" # runs every day at 12 AM - "kill_stall,disk-stall|network-partition|node-kill,0 * * * *" # runs every 1 hour diff --git a/pkg/cmd/drtprod/scripts/tpcc_init.sh b/pkg/cmd/drtprod/scripts/tpcc_init.sh index 851f30e4106d..a9122a6c7cf6 100755 --- a/pkg/cmd/drtprod/scripts/tpcc_init.sh +++ b/pkg/cmd/drtprod/scripts/tpcc_init.sh @@ -9,23 +9,6 @@ # The --warehouses and other flags for import are passed as argument to this script # NOTE - This uses CLUSTER and WORKLOAD_CLUSTER environment variable, if not set the script fails -# The first argument is the name suffix that is added to the script as tpcc_init_.sh -if [ "$#" -lt 4 ]; then - echo "Usage: $0 " - exit 1 -fi -suffix=$1 -shift -# The second argument represents whether the init process should be started in the workload cluster -# The value is true or false -if [ "$1" != "true" ] && [ "$1" != "false" ]; then - # $1 is used again because of the shift - echo "Error: The second argument must be 'true' or 'false' which implies whether the script should be started in background or not." - exit 1 -fi -execute_script=$1 -shift - if [ -z "${CLUSTER}" ]; then echo "environment CLUSTER is not set" exit 1 @@ -36,22 +19,19 @@ if [ -z "${WORKLOAD_CLUSTER}" ]; then exit 1 fi -absolute_path=$(roachprod run "${WORKLOAD_CLUSTER}":1 -- "realpath ./tpcc_init_${suffix}.sh") +absolute_path=$(roachprod run "${WORKLOAD_CLUSTER}":1 -- "realpath ./tpcc_init.sh") pwd=$(roachprod run "${WORKLOAD_CLUSTER}":1 -- "dirname ${absolute_path}") -PGURLS=$(roachprod pgurl "${CLUSTER}") # script is responsible for importing the tpcc database for workload -roachprod ssh "${WORKLOAD_CLUSTER}":1 -- "tee tpcc_init_${suffix}.sh > /dev/null << 'EOF' +roachprod ssh "${WORKLOAD_CLUSTER}":1 -- "tee tpcc_init.sh > /dev/null << 'EOF' #!/bin/bash export ROACHPROD_GCE_DEFAULT_PROJECT=${ROACHPROD_GCE_DEFAULT_PROJECT} export ROACHPROD_DNS=${ROACHPROD_DNS} ${pwd}/roachprod sync sleep 20 -${pwd}/cockroach workload init tpcc $@ --secure --families $PGURLS +PGURLS=\$(${pwd}/roachprod pgurl ${CLUSTER} | sed s/\'//g) +${pwd}/cockroach workload init tpcc $@ --secure --families \$PGURLS EOF" -roachprod ssh "${WORKLOAD_CLUSTER}":1 -- "chmod +x tpcc_init_${suffix}.sh" - -if [ "$execute_script" = "true" ]; then - roachprod run "${WORKLOAD_CLUSTER}":1 -- "sudo systemd-run --unit tpccinit_${suffix} --same-dir --uid \$(id -u) --gid \$(id -g) bash ${pwd}/tpcc_init_${suffix}.sh" -fi +roachprod ssh "${WORKLOAD_CLUSTER}":1 -- "chmod +x tpcc_init.sh" +roachprod run "${WORKLOAD_CLUSTER}":1 -- "sudo systemd-run --unit tpccinit --same-dir --uid \$(id -u) --gid \$(id -g) bash ${pwd}/tpcc_init.sh" diff --git a/pkg/cmd/roachtest/operations/add_column.go b/pkg/cmd/roachtest/operations/add_column.go index 68d64217bbc5..10027e2d1edf 100644 --- a/pkg/cmd/roachtest/operations/add_column.go +++ b/pkg/cmd/roachtest/operations/add_column.go @@ -20,7 +20,6 @@ import ( type cleanupAddedColumn struct { db, table, column string - locked bool } func (cl *cleanupAddedColumn) Cleanup( @@ -29,10 +28,6 @@ func (cl *cleanupAddedColumn) Cleanup( conn := c.Conn(ctx, o.L(), 1, option.VirtualClusterName(roachtestflags.VirtualCluster)) defer conn.Close() - if cl.locked { - setSchemaLocked(ctx, o, conn, cl.db, cl.table, false /* lock */) - defer setSchemaLocked(ctx, o, conn, cl.db, cl.table, true /* lock */) - } o.Status(fmt.Sprintf("dropping column %s", cl.column)) _, err := conn.ExecContext(ctx, fmt.Sprintf("ALTER TABLE %s.%s DROP COLUMN %s CASCADE", cl.db, cl.table, cl.column)) if err != nil { @@ -63,17 +58,6 @@ func runAddColumn( colQualification += " NOT NULL" } - // If the table's schema is locked, then unlock the table and make sure it will - // be re-locked during cleanup. - // TODO(#129694): Remove schema unlocking/re-locking once automation is internalized. - locked := isSchemaLocked(o, conn, dbName, tableName) - if locked { - setSchemaLocked(ctx, o, conn, dbName, tableName, false /* lock */) - // Re-lock the table if necessary, so that it stays locked during any wait - // period before cleanup. - defer setSchemaLocked(ctx, o, conn, dbName, tableName, true /* lock */) - } - o.Status(fmt.Sprintf("adding column %s to table %s.%s", colName, dbName, tableName)) addColStmt := fmt.Sprintf("ALTER TABLE %s.%s ADD COLUMN %s VARCHAR %s", dbName, tableName, colName, colQualification) _, err := conn.ExecContext(ctx, addColStmt) @@ -82,12 +66,10 @@ func runAddColumn( } o.Status(fmt.Sprintf("column %s created", colName)) - return &cleanupAddedColumn{ db: dbName, table: tableName, column: colName, - locked: locked, } } diff --git a/pkg/cmd/roachtest/operations/add_index.go b/pkg/cmd/roachtest/operations/add_index.go index e67b8f53772c..72f7957954eb 100644 --- a/pkg/cmd/roachtest/operations/add_index.go +++ b/pkg/cmd/roachtest/operations/add_index.go @@ -20,7 +20,6 @@ import ( type cleanupAddedIndex struct { db, table, index string - locked bool } func (cl *cleanupAddedIndex) Cleanup( @@ -29,10 +28,6 @@ func (cl *cleanupAddedIndex) Cleanup( conn := c.Conn(ctx, o.L(), 1, option.VirtualClusterName(roachtestflags.VirtualCluster)) defer conn.Close() - if cl.locked { - setSchemaLocked(ctx, o, conn, cl.db, cl.table, false /* lock */) - defer setSchemaLocked(ctx, o, conn, cl.db, cl.table, true /* lock */) - } o.Status(fmt.Sprintf("dropping index %s", cl.index)) _, err := conn.ExecContext(ctx, fmt.Sprintf("DROP INDEX %s.%s@%s", cl.db, cl.table, cl.index)) if err != nil { @@ -63,15 +58,6 @@ func runAddIndex( o.Fatal(err) } - // If the table's schema is locked, then unlock the table and make sure it will - // be re-locked during cleanup. - // TODO(#129694): Remove schema unlocking/re-locking once automation is internalized. - locked := isSchemaLocked(o, conn, dbName, tableName) - if locked { - setSchemaLocked(ctx, o, conn, dbName, tableName, false /* lock */) - defer setSchemaLocked(ctx, o, conn, dbName, tableName, true /* lock */) - } - indexName := fmt.Sprintf("add_index_op_%d", rng.Uint32()) o.Status(fmt.Sprintf("adding index to column %s in table %s.%s", colName, dbName, tableName)) createIndexStmt := fmt.Sprintf("CREATE INDEX %s ON %s.%s (%s)", indexName, dbName, tableName, colName) @@ -81,12 +67,10 @@ func runAddIndex( } o.Status(fmt.Sprintf("index %s created", indexName)) - return &cleanupAddedIndex{ - db: dbName, - table: tableName, - index: indexName, - locked: locked, + db: dbName, + table: tableName, + index: indexName, } } diff --git a/pkg/cmd/roachtest/operations/utils.go b/pkg/cmd/roachtest/operations/utils.go index 5a507f4ab1c9..6b0f99a17ef9 100644 --- a/pkg/cmd/roachtest/operations/utils.go +++ b/pkg/cmd/roachtest/operations/utils.go @@ -9,7 +9,6 @@ import ( "context" gosql "database/sql" "fmt" - "strings" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/operation" @@ -156,26 +155,3 @@ func pickRandomStore(ctx context.Context, o operation.Operation, conn *gosql.DB, } return stores[rng.Intn(len(stores))] } - -// Returns true if the schema_locked parameter is set on this table. -func isSchemaLocked(o operation.Operation, conn *gosql.DB, db, tbl string) bool { - showTblStmt := fmt.Sprintf("SHOW CREATE %s.%s", db, tbl) - var tblName, createStmt string - err := conn.QueryRow(showTblStmt).Scan(&tblName, &createStmt) - if err != nil { - o.Fatal(err) - } - return strings.Contains(createStmt, "schema_locked = true") -} - -// Set the schema_locked storage parameter. -func setSchemaLocked( - ctx context.Context, o operation.Operation, conn *gosql.DB, db, tbl string, lock bool, -) { - stmt := fmt.Sprintf("ALTER TABLE %s.%s SET (schema_locked=%v)", db, tbl, lock) - o.Status(fmt.Sprintf("setting schema_locked = %v on table %s.%s", lock, db, tbl)) - _, err := conn.ExecContext(ctx, stmt) - if err != nil { - o.Fatal(err) - } -} diff --git a/pkg/cmd/roachtest/tests/activerecord.go b/pkg/cmd/roachtest/tests/activerecord.go index 04e88e7b323a..a527a7f2cd7b 100644 --- a/pkg/cmd/roachtest/tests/activerecord.go +++ b/pkg/cmd/roachtest/tests/activerecord.go @@ -146,7 +146,7 @@ func registerActiveRecord(r registry.Registry) { c, node, "installing bundler", - `cd /mnt/data1/activerecord-cockroachdb-adapter/ && sudo gem install bundler:2.1.4`, + `cd /mnt/data1/activerecord-cockroachdb-adapter/ && sudo gem install bundler:2.4.9`, ); err != nil { t.Fatal(err) } diff --git a/pkg/cmd/roachtest/tests/admission_control_latency.go b/pkg/cmd/roachtest/tests/admission_control_latency.go index 036ce4dcf7e5..7793bc158aab 100644 --- a/pkg/cmd/roachtest/tests/admission_control_latency.go +++ b/pkg/cmd/roachtest/tests/admission_control_latency.go @@ -750,11 +750,6 @@ func (v variations) runTest(ctx context.Context, t test.Test, c cluster.Cluster) `SET CLUSTER SETTING kv.lease.reject_on_leader_unknown.enabled = true`); err != nil { t.Fatal(err) } - // Enable raft tracing. Remove this once raft tracing is the default. - if _, err := db.ExecContext(ctx, - `SET CLUSTER SETTING kv.raft.max_concurrent_traces = '10'`); err != nil { - t.Fatal(err) - } // This isn't strictly necessary, but it would be nice if this test passed at 10s (or lower). if _, err := db.ExecContext(ctx, `SET CLUSTER SETTING server.time_after_store_suspect = '10s'`); err != nil { diff --git a/pkg/cmd/roachtest/tests/follower_reads.go b/pkg/cmd/roachtest/tests/follower_reads.go index 3a2febcff258..b31c0a2c32a8 100644 --- a/pkg/cmd/roachtest/tests/follower_reads.go +++ b/pkg/cmd/roachtest/tests/follower_reads.go @@ -1008,6 +1008,13 @@ func runFollowerReadsMixedVersionGlobalTableTest( // Use a longer upgrade timeout to give the migrations enough time to finish // considering the cross-region latency. mixedversion.UpgradeTimeout(60*time.Minute), + + // This test is flaky when upgrading from v23.1 to v23.2 for follower + // reads in shared-process deployments. There were a number of changes + // to tenant health checks since then which appear to have addressed + // this issue. + mixedversion.MinimumSupportedVersion("v23.2.0"), + // This test does not currently work with shared-process // deployments (#129167), so we do not run it in separate-process // mode either to reduce noise. We should reevaluate once the test diff --git a/pkg/cmd/roachtest/tests/pgjdbc_blocklist.go b/pkg/cmd/roachtest/tests/pgjdbc_blocklist.go index 04ed3d149b3c..2bb0583f28a3 100644 --- a/pkg/cmd/roachtest/tests/pgjdbc_blocklist.go +++ b/pkg/cmd/roachtest/tests/pgjdbc_blocklist.go @@ -399,7 +399,6 @@ var pgjdbcBlockList = blocklist{ `org.postgresql.test.jdbc2.ServerErrorTest.testNotNullConstraint`: "27796", `org.postgresql.test.jdbc2.ServerErrorTest.testPrimaryKey`: "27796", `org.postgresql.test.jdbc2.StatementTest.closeInProgressStatement()`: "unknown", - `org.postgresql.test.jdbc2.StatementTest.concurrentWarningReadAndClear()`: "unknown", `org.postgresql.test.jdbc2.StatementTest.fastCloses()`: "unknown", `org.postgresql.test.jdbc2.StatementTest.parsingSemiColons()`: "unknown", `org.postgresql.test.jdbc2.StatementTest.updateCount()`: "unknown", diff --git a/pkg/cmd/roachtest/tests/ruby_pg.go b/pkg/cmd/roachtest/tests/ruby_pg.go index 663d75c0fae9..ff521005612e 100644 --- a/pkg/cmd/roachtest/tests/ruby_pg.go +++ b/pkg/cmd/roachtest/tests/ruby_pg.go @@ -146,7 +146,7 @@ func registerRubyPG(r registry.Registry) { c, node, "installing bundler", - `cd /mnt/data1/ruby-pg/ && sudo gem install bundler:2.1.4`, + `cd /mnt/data1/ruby-pg/ && sudo gem install bundler:2.4.9`, ); err != nil { t.Fatal(err) } diff --git a/pkg/kv/kvserver/BUILD.bazel b/pkg/kv/kvserver/BUILD.bazel index bcf96f61a8d0..e1ff00259ba2 100644 --- a/pkg/kv/kvserver/BUILD.bazel +++ b/pkg/kv/kvserver/BUILD.bazel @@ -162,7 +162,6 @@ go_library( "//pkg/kv/kvserver/multiqueue", "//pkg/kv/kvserver/raftentry", "//pkg/kv/kvserver/raftlog", - "//pkg/kv/kvserver/rafttrace", "//pkg/kv/kvserver/rangefeed", "//pkg/kv/kvserver/rditer", "//pkg/kv/kvserver/readsummary", @@ -423,6 +422,7 @@ go_test( "//pkg/kv/kvserver/kvflowcontrol/kvflowdispatch", "//pkg/kv/kvserver/kvflowcontrol/kvflowinspectpb", "//pkg/kv/kvserver/kvflowcontrol/node_rac2", + "//pkg/kv/kvserver/kvflowcontrol/rac2", "//pkg/kv/kvserver/kvflowcontrol/replica_rac2", "//pkg/kv/kvserver/kvserverbase", "//pkg/kv/kvserver/kvserverpb", @@ -439,7 +439,6 @@ go_test( "//pkg/kv/kvserver/protectedts/ptutil", "//pkg/kv/kvserver/raftentry", "//pkg/kv/kvserver/raftlog", - "//pkg/kv/kvserver/rafttrace", "//pkg/kv/kvserver/raftutil", "//pkg/kv/kvserver/rangefeed", "//pkg/kv/kvserver/rditer", diff --git a/pkg/kv/kvserver/client_merge_test.go b/pkg/kv/kvserver/client_merge_test.go index 2dd80ea16f67..ae0b218488d4 100644 --- a/pkg/kv/kvserver/client_merge_test.go +++ b/pkg/kv/kvserver/client_merge_test.go @@ -3171,7 +3171,6 @@ func TestMergeQueueWithExternalFiles(t *testing.T) { store, err := s.GetStores().(*kvserver.Stores).GetStore(s.GetFirstStoreID()) require.NoError(t, err) - store.SetMergeQueueActive(true) if skipExternal { verifyUnmergedSoon(t, store, lhsDesc.StartKey, rhsDesc.StartKey) } else { @@ -4293,6 +4292,11 @@ func TestStoreRangeMergeDuringShutdown(t *testing.T) { func verifyMergedSoon(t *testing.T, store *kvserver.Store, lhsStartKey, rhsStartKey roachpb.RKey) { t.Helper() + store.SetMergeQueueActive(true) + defer func() { + store.SetMergeQueueActive(false) + store.MustForceMergeScanAndProcess() // drain any merges that might already be queued + }() testutils.SucceedsSoon(t, func() error { store.MustForceMergeScanAndProcess() repl := store.LookupReplica(rhsStartKey) @@ -4310,6 +4314,11 @@ func verifyUnmergedSoon( t *testing.T, store *kvserver.Store, lhsStartKey, rhsStartKey roachpb.RKey, ) { t.Helper() + store.SetMergeQueueActive(true) + defer func() { + store.SetMergeQueueActive(false) + store.MustForceMergeScanAndProcess() // drain any merges that might already be queued + }() testutils.SucceedsSoon(t, func() error { store.MustForceMergeScanAndProcess() repl := store.LookupReplica(rhsStartKey) @@ -4344,9 +4353,6 @@ func TestMergeQueue(t *testing.T) { WallClock: manualClock, DefaultZoneConfigOverride: &zoneConfig, }, - Store: &kvserver.StoreTestingKnobs{ - DisableScanner: true, - }, }, }, }) @@ -4354,11 +4360,6 @@ func TestMergeQueue(t *testing.T) { conf := zoneConfig.AsSpanConfig() store := tc.GetFirstStoreFromServer(t, 0) - // The cluster with manual replication disables the merge queue, - // so we need to re-enable. - _, err := tc.ServerConn(0).Exec(`SET CLUSTER SETTING kv.range_merge.queue.enabled = true`) - require.NoError(t, err) - store.SetMergeQueueActive(true) split := func(t *testing.T, key roachpb.Key, expirationTime hlc.Timestamp) { t.Helper() @@ -4429,6 +4430,7 @@ func TestMergeQueue(t *testing.T) { kvserver.SplitByLoadEnabled.Override(ctx, &s.ClusterSettings().SV, false) } + store.SetMergeQueueActive(false) // reset merge queue to inactive store.MustForceMergeScanAndProcess() // drain any merges that might already be queued split(t, rhsStartKey.AsRawKey(), hlc.Timestamp{} /* expirationTime */) } @@ -4818,7 +4820,8 @@ func TestMergeQueueSeesNonVoters(t *testing.T) { } var clusterArgs = base.TestClusterArgs{ - // We dont want the replicate queue mucking with our test, so disable it. + // We don't want the replicate queue mucking with our test, so disable it. + // This also disables the merge queue, until it is manually enabled. ReplicationMode: base.ReplicationManual, ServerArgs: base.TestServerArgs{ Knobs: base.TestingKnobs{ @@ -4841,10 +4844,6 @@ func TestMergeQueueSeesNonVoters(t *testing.T) { store, err := tc.Server(0).GetStores().(*kvserver.Stores).GetStore(1) require.Nil(t, err) - // We're going to split the dummy range created above with an empty - // expiration time. Disable the merge queue before splitting so that the - // split ranges aren't immediately merged. - store.SetMergeQueueActive(false) leftDesc, rightDesc := splitDummyRangeInTestCluster( t, tc, dbName, "kv" /* tableName */, hlc.Timestamp{} /* splitExpirationTime */) @@ -4887,7 +4886,6 @@ func TestMergeQueueSeesNonVoters(t *testing.T) { tc.RemoveVotersOrFatal(t, rightDesc.StartKey.AsRawKey(), tc.Target(0)) rightDesc = tc.LookupRangeOrFatal(t, rightDesc.StartKey.AsRawKey()) - store.SetMergeQueueActive(true) verifyMergedSoon(t, store, leftDesc.StartKey, rightDesc.StartKey) }) } @@ -4909,7 +4907,8 @@ func TestMergeQueueWithSlowNonVoterSnaps(t *testing.T) { ctx := context.Background() var delaySnapshotTrap atomic.Value var clusterArgs = base.TestClusterArgs{ - // We dont want the replicate queue mucking with our test, so disable it. + // We don't want the replicate queue mucking with our test, so disable it. + // This also disables the merge queue, until it is manually enabled. ReplicationMode: base.ReplicationManual, ServerArgs: base.TestServerArgs{ Knobs: base.TestingKnobs{ @@ -4945,17 +4944,9 @@ func TestMergeQueueWithSlowNonVoterSnaps(t *testing.T) { numNodes := 3 tc, _ := setupTestClusterWithDummyRange(t, clusterArgs, dbName, tableName, numNodes) defer tc.Stopper().Stop(ctx) - // We're controlling merge queue operation via - // `store.SetMergeQueueActive`, so enable the cluster setting here. - _, err := tc.ServerConn(0).Exec(`SET CLUSTER SETTING kv.range_merge.queue.enabled=true`) - require.NoError(t, err) store, err := tc.Server(0).GetStores().(*kvserver.Stores).GetStore(1) require.Nil(t, err) - // We're going to split the dummy range created above with an empty - // expiration time. Disable the merge queue before splitting so that the - // split ranges aren't immediately merged. - store.SetMergeQueueActive(false) leftDesc, rightDesc := splitDummyRangeInTestCluster( t, tc, dbName, tableName, hlc.Timestamp{}, /* splitExpirationTime */ ) @@ -4972,7 +4963,6 @@ func TestMergeQueueWithSlowNonVoterSnaps(t *testing.T) { time.Sleep(5 * time.Second) return nil }) - store.SetMergeQueueActive(true) verifyMergedSoon(t, store, leftDesc.StartKey, rightDesc.StartKey) } diff --git a/pkg/kv/kvserver/client_raft_log_queue_test.go b/pkg/kv/kvserver/client_raft_log_queue_test.go index a988d1a02970..9a466877c684 100644 --- a/pkg/kv/kvserver/client_raft_log_queue_test.go +++ b/pkg/kv/kvserver/client_raft_log_queue_test.go @@ -20,7 +20,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvpb" "github.com/cockroachdb/cockroach/pkg/kv/kvserver" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" - "github.com/cockroachdb/cockroach/pkg/kv/kvserver/rafttrace" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/rpc" "github.com/cockroachdb/cockroach/pkg/rpc/nodedialer" @@ -34,8 +33,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" - "github.com/cockroachdb/cockroach/pkg/util/tracing" - "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb" "github.com/cockroachdb/errors" "github.com/cockroachdb/pebble/vfs" "github.com/gogo/protobuf/proto" @@ -135,65 +132,6 @@ func TestRaftLogQueue(t *testing.T) { } } -func TestRaftTracing(t *testing.T) { - defer leaktest.AfterTest(t)() - defer log.Scope(t).Close(t) - - // TODO(baptist): Remove this once we change the default to be enabled. - st := cluster.MakeTestingClusterSettings() - rafttrace.MaxConcurrentRaftTraces.Override(context.Background(), &st.SV, 10) - - tc := testcluster.StartTestCluster(t, 3, base.TestClusterArgs{ - ReplicationMode: base.ReplicationManual, - ServerArgs: base.TestServerArgs{ - Settings: st, - RaftConfig: base.RaftConfig{ - RangeLeaseDuration: 24 * time.Hour, // disable lease moves - RaftElectionTimeoutTicks: 1 << 30, // disable elections - }, - }, - }) - defer tc.Stopper().Stop(context.Background()) - store := tc.GetFirstStoreFromServer(t, 0) - - // Write a single value to ensure we have a leader on n1. - key := tc.ScratchRange(t) - _, pErr := kv.SendWrapped(context.Background(), store.TestSender(), putArgs(key, []byte("value"))) - require.NoError(t, pErr.GoError()) - require.NoError(t, tc.WaitForSplitAndInitialization(key)) - // Set to have 3 voters. - tc.AddVotersOrFatal(t, key, tc.Targets(1, 2)...) - tc.WaitForVotersOrFatal(t, key, tc.Targets(1, 2)...) - - for i := 0; i < 100; i++ { - var finish func() tracingpb.Recording - ctx := context.Background() - if i == 50 { - // Trace a random request on a "client" tracer. - ctx, finish = tracing.ContextWithRecordingSpan(ctx, store.GetStoreConfig().Tracer(), "test") - } - _, pErr := kv.SendWrapped(ctx, store.TestSender(), putArgs(key, []byte(fmt.Sprintf("value-%d", i)))) - require.NoError(t, pErr.GoError()) - // Note that this is the clients span, there may be additional logs created after the span is returned. - if finish != nil { - output := finish().String() - // NB: It is hard to get all the messages in an expected order. We - // simply ensure some of the key messages are returned. Also note - // that we want to make sure that the logs are not reported against - // the tracing library, but the line that called into it. - expectedMessages := []string{ - `replica_proposal_buf.* flushing proposal to Raft`, - `replica_proposal_buf.* registering local trace`, - `replica_raft.* 1->2 MsgApp`, - `replica_raft.* 1->3 MsgApp`, - `replica_raft.* AppendThread->1 MsgStorageAppendResp`, - `ack-ing replication success to the client`, - } - require.NoError(t, testutils.MatchInOrder(output, expectedMessages...)) - } - } -} - // TestCrashWhileTruncatingSideloadedEntries emulates a process crash in the // middle of applying a raft log truncation command that removes some entries // from the sideloaded storage. The test expects that storage remains in a diff --git a/pkg/kv/kvserver/flow_control_integration_test.go b/pkg/kv/kvserver/flow_control_integration_test.go index ff37488205d4..f07f1b02b5bd 100644 --- a/pkg/kv/kvserver/flow_control_integration_test.go +++ b/pkg/kv/kvserver/flow_control_integration_test.go @@ -22,6 +22,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvserver" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvflowcontrol" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvflowcontrol/kvflowinspectpb" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvflowcontrol/rac2" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/server" "github.com/cockroachdb/cockroach/pkg/settings/cluster" @@ -2253,6 +2254,10 @@ func TestFlowControlBasicV2(t *testing.T) { n1 := sqlutils.MakeSQLRunner(tc.ServerConn(0)) h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */) + // Reset the token metrics, since a send queue may have instantly + // formed when adding one of the replicas, before being quickly + // drained. + h.resetV2TokenMetrics(ctx) h.comment(`-- Flow token metrics, before issuing the 1MiB replicated write.`) h.query(n1, v2FlowTokensQueryStr) @@ -2342,6 +2347,10 @@ func TestFlowControlRangeSplitMergeV2(t *testing.T) { require.NoError(t, err) h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */) + // Reset the token metrics, since a send queue may have instantly + // formed when adding one of the replicas, before being quickly + // drained. + h.resetV2TokenMetrics(ctx) h.log("sending put request to pre-split range") h.put(ctx, k, 1<<20 /* 1MiB */, testFlowModeToPri(mode)) h.log("sent put request to pre-split range") @@ -2464,6 +2473,10 @@ func TestFlowControlBlockedAdmissionV2(t *testing.T) { require.NoError(t, err) h.enableVerboseRaftMsgLoggingForRange(desc.RangeID) h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */) + // Reset the token metrics, since a send queue may have instantly + // formed when adding one of the replicas, before being quickly + // drained. + h.resetV2TokenMetrics(ctx) h.comment(`-- (Issuing 5 1MiB, 3x replicated write that's not admitted.)`) h.log("sending put requests") @@ -2579,6 +2592,10 @@ func TestFlowControlAdmissionPostSplitMergeV2(t *testing.T) { require.NoError(t, err) h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */) + // Reset the token metrics, since a send queue may have instantly + // formed when adding one of the replicas, before being quickly + // drained. + h.resetV2TokenMetrics(ctx) h.log("sending put request to pre-split range") h.put(ctx, k, 1<<20 /* 1MiB */, testFlowModeToPri(mode)) @@ -2722,6 +2739,10 @@ func TestFlowControlCrashedNodeV2(t *testing.T) { require.NoError(t, err) tc.TransferRangeLeaseOrFatal(t, desc, tc.Target(0)) h.waitForConnectedStreams(ctx, desc.RangeID, 2, 0 /* serverIdx */) + // Reset the token metrics, since a send queue may have instantly + // formed when adding one of the replicas, before being quickly + // drained. + h.resetV2TokenMetrics(ctx) h.comment(`-- (Issuing 5x1MiB, 2x replicated writes that are not admitted.)`) h.log("sending put requests") @@ -2870,6 +2891,10 @@ func TestFlowControlRaftSnapshotV2(t *testing.T) { repl := store.LookupReplica(roachpb.RKey(k)) require.NotNil(t, repl) h.waitForConnectedStreams(ctx, repl.RangeID, 5, 0 /* serverIdx */) + // Reset the token metrics, since a send queue may have instantly + // formed when adding one of the replicas, before being quickly + // drained. + h.resetV2TokenMetrics(ctx) // Set up a key to replicate across the cluster. We're going to modify this // key and truncate the raft logs from that command after killing one of the @@ -3085,6 +3110,10 @@ func TestFlowControlRaftMembershipV2(t *testing.T) { desc, err := tc.LookupRange(k) require.NoError(t, err) h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */) + // Reset the token metrics, since a send queue may have instantly + // formed when adding one of the replicas, before being quickly + // drained. + h.resetV2TokenMetrics(ctx) h.comment(`-- (Issuing 1x1MiB, 3x replicated write that's not admitted.)`) h.put(ctx, k, 1<<20 /* 1MiB */, testFlowModeToPri(mode)) @@ -3224,6 +3253,10 @@ func TestFlowControlRaftMembershipRemoveSelfV2(t *testing.T) { // Make sure the lease is on n1 and that we're triply connected. tc.TransferRangeLeaseOrFatal(t, desc, tc.Target(0)) h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */) + // Reset the token metrics, since a send queue may have instantly + // formed when adding one of the replicas, before being quickly + // drained. + h.resetV2TokenMetrics(ctx) h.comment(`-- (Issuing 1x1MiB, 3x replicated write that's not admitted.)`) h.put(ctx, k, 1<<20 /* 1MiB */, testFlowModeToPri(mode)) @@ -3353,6 +3386,10 @@ func TestFlowControlClassPrioritizationV2(t *testing.T) { desc, err := tc.LookupRange(k) require.NoError(t, err) h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */) + // Reset the token metrics, since a send queue may have instantly + // formed when adding one of the replicas, before being quickly + // drained. + h.resetV2TokenMetrics(ctx) h.comment(`-- (Issuing 1x1MiB, 3x replicated elastic write that's not admitted.)`) h.put(ctx, k, 1<<20 /* 1MiB */, testFlowModeToPri(mode)) @@ -3469,6 +3506,10 @@ func TestFlowControlUnquiescedRangeV2(t *testing.T) { n1 := sqlutils.MakeSQLRunner(tc.ServerConn(0)) h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */) + // Reset the token metrics, since a send queue may have instantly + // formed when adding one of the replicas, before being quickly + // drained. + h.resetV2TokenMetrics(ctx) h.comment(`-- (Issuing 1x1MiB, 3x replicated elastic write that's not admitted.)`) h.put(ctx, k, 1<<20 /* 1MiB */, admissionpb.BulkNormalPri) @@ -3571,6 +3612,10 @@ func TestFlowControlTransferLeaseV2(t *testing.T) { desc, err := tc.LookupRange(k) require.NoError(t, err) h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */) + // Reset the token metrics, since a send queue may have instantly + // formed when adding one of the replicas, before being quickly + // drained. + h.resetV2TokenMetrics(ctx) h.comment(`-- (Issuing 1x1MiB, 3x replicated write that's not admitted.)`) h.put(ctx, k, 1<<20 /* 1MiB */, testFlowModeToPri(mode)) @@ -3664,6 +3709,10 @@ func TestFlowControlLeaderNotLeaseholderV2(t *testing.T) { desc, err := tc.LookupRange(k) require.NoError(t, err) h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */) + // Reset the token metrics, since a send queue may have instantly + // formed when adding one of the replicas, before being quickly + // drained. + h.resetV2TokenMetrics(ctx) h.comment(`-- (Issuing 1x1MiB, 3x replicated write that's not admitted.)`) h.put(ctx, k, 1<<20 /* 1MiB */, testFlowModeToPri(mode)) @@ -3780,6 +3829,10 @@ func TestFlowControlGranterAdmitOneByOneV2(t *testing.T) { desc, err := tc.LookupRange(k) require.NoError(t, err) h.waitForConnectedStreams(ctx, desc.RangeID, 3, 0 /* serverIdx */) + // Reset the token metrics, since a send queue may have instantly + // formed when adding one of the replicas, before being quickly + // drained. + h.resetV2TokenMetrics(ctx) h.comment(`-- (Issuing 1024*1KiB, 3x replicated writes that are not admitted.)`) h.log("sending put requests") @@ -4865,6 +4918,18 @@ func (h *flowControlTestHelper) enableVerboseRaftMsgLoggingForRange(rangeID roac } } +func (h *flowControlTestHelper) resetV2TokenMetrics(ctx context.Context) { + for _, server := range h.tc.Servers { + require.NoError(h.t, server.GetStores().(*kvserver.Stores).VisitStores(func(s *kvserver.Store) error { + s.GetStoreConfig().KVFlowStreamTokenProvider.Metrics().(*rac2.TokenMetrics).TestingClear() + _, err := s.ComputeMetricsPeriodically(ctx, nil, 0) + require.NoError(h.t, err) + s.GetStoreConfig().KVFlowStreamTokenProvider.UpdateMetricGauges() + return nil + })) + } +} + // makeV2EnabledTestFileName is a utility function which returns an updated // filename for the testdata file based on the v2EnabledWhenLeaderLevel. func makeV2EnabledTestFileName( diff --git a/pkg/kv/kvserver/kvflowcontrol/rac2/metrics.go b/pkg/kv/kvserver/kvflowcontrol/rac2/metrics.go index eb515d41da18..de2108c567be 100644 --- a/pkg/kv/kvserver/kvflowcontrol/rac2/metrics.go +++ b/pkg/kv/kvserver/kvflowcontrol/rac2/metrics.go @@ -197,6 +197,34 @@ func NewTokenMetrics() *TokenMetrics { return m } +// TestingClear is used in tests to reset the metrics. +func (m *TokenMetrics) TestingClear() { + // NB: we only clear the counter metrics, as the stream metrics are gauges. + for _, typ := range []TokenType{ + EvalToken, + SendToken, + } { + for _, wc := range []admissionpb.WorkClass{ + admissionpb.RegularWorkClass, + admissionpb.ElasticWorkClass, + } { + m.CounterMetrics[typ].Deducted[wc].Clear() + m.CounterMetrics[typ].Returned[wc].Clear() + m.CounterMetrics[typ].Unaccounted[wc].Clear() + m.CounterMetrics[typ].Disconnected[wc].Clear() + if typ == SendToken { + m.CounterMetrics[typ].SendQueue[0].ForceFlushDeducted.Clear() + for _, wc := range []admissionpb.WorkClass{ + admissionpb.RegularWorkClass, + admissionpb.ElasticWorkClass, + } { + m.CounterMetrics[typ].SendQueue[0].PreventionDeducted[wc].Clear() + } + } + } + } +} + type TokenCounterMetrics struct { Deducted [admissionpb.NumWorkClasses]*metric.Counter Returned [admissionpb.NumWorkClasses]*metric.Counter diff --git a/pkg/kv/kvserver/kvflowcontrol/rac2/range_controller.go b/pkg/kv/kvserver/kvflowcontrol/rac2/range_controller.go index 7458a2ac7beb..b2ccd2c9a52f 100644 --- a/pkg/kv/kvserver/kvflowcontrol/rac2/range_controller.go +++ b/pkg/kv/kvserver/kvflowcontrol/rac2/range_controller.go @@ -71,7 +71,7 @@ type RangeController interface { // // Requires replica.raftMu to be held. HandleSchedulerEventRaftMuLocked( - ctx context.Context, mode RaftMsgAppMode, logSnapshot raft.LogSnapshot) + ctx context.Context, mode RaftMsgAppMode, logSnapshot RaftLogSnapshot) // AdmitRaftMuLocked handles the notification about the given replica's // admitted vector change. No-op if the replica is not known, or the admitted // vector is stale (either in Term, or the indices). @@ -152,6 +152,31 @@ type RaftInterface interface { SendMsgAppRaftMuLocked(replicaID roachpb.ReplicaID, slice raft.LogSlice) (raftpb.Message, bool) } +// RaftLogSnapshot abstract raft.LogSnapshot. +type RaftLogSnapshot interface { + // LogSlice returns a slice containing a prefix of [start, end). It must + // only be called in MsgAppPull mode for followers. The maxSize is required + // to be > 0. + // + // Returns the longest prefix of entries in the [start, end) interval such + // that the total size of the entries does not exceed maxSize. The limit can + // only be exceeded if the first entry is larger than maxSize, in which case + // only this first entry is returned. + // + // Returns an error if the log is truncated beyond the start index, or there + // is some other transient problem. + // + // NB: the [start, end) interval is different from RawNode.LogSlice which + // accepts an open-closed interval. + // + // TODO(#132789): change the semantics so that maxSize can be exceeded not + // only if the first entry is large. It should be ok to exceed maxSize if the + // last entry makes it so. In the underlying storage implementation, we have + // paid the cost of fetching this entry anyway, so there is no need to drop it + // from the result. + LogSlice(start, end uint64, maxSize uint64) (raft.LogSlice, error) +} + // RaftMsgAppMode specifies how Raft (at the leader) generates MsgApps. In // both modes, Raft knows that (Match(i), Next(i)) are in-flight for a // follower i. @@ -347,7 +372,7 @@ type RaftEvent struct { MsgApps map[roachpb.ReplicaID][]raftpb.Message // LogSnapshot must be populated on the leader, when operating in MsgAppPull // mode. It is used (along with RaftInterface) to construct MsgApps. - LogSnapshot raft.LogSnapshot + LogSnapshot RaftLogSnapshot // ReplicasStateInfo contains the state of all replicas. This is used to // determine if the state of a replica has changed, and if so, to update the // flow control state. It also informs the RangeController of a replica's @@ -378,7 +403,7 @@ func RaftEventFromMsgStorageAppendAndMsgApps( replicaID roachpb.ReplicaID, appendMsg raftpb.Message, outboundMsgs []raftpb.Message, - logSnapshot raft.LogSnapshot, + logSnapshot RaftLogSnapshot, msgAppScratch map[roachpb.ReplicaID][]raftpb.Message, replicaStateInfoMap map[roachpb.ReplicaID]ReplicaStateInfo, ) RaftEvent { @@ -794,7 +819,7 @@ type raftEventForReplica struct { newEntries []entryFCState sendingEntries []entryFCState recreateSendStream bool - logSnapshot raft.LogSnapshot + logSnapshot RaftLogSnapshot } // raftEventAppendState is the general state computed from RaftEvent that is @@ -828,7 +853,7 @@ func constructRaftEventForReplica( latestReplicaStateInfo ReplicaStateInfo, existingSendStreamState existingSendStreamState, msgApps []raftpb.Message, - logSnapshot raft.LogSnapshot, + logSnapshot RaftLogSnapshot, scratchSendingEntries []entryFCState, ) (_ raftEventForReplica, scratch []entryFCState) { firstNewEntryIndex, lastNewEntryIndex := uint64(math.MaxUint64), uint64(math.MaxUint64) @@ -1273,7 +1298,7 @@ func (rc *rangeController) computeVoterDirectives( // HandleSchedulerEventRaftMuLocked implements RangeController. func (rc *rangeController) HandleSchedulerEventRaftMuLocked( - ctx context.Context, mode RaftMsgAppMode, logSnapshot raft.LogSnapshot, + ctx context.Context, mode RaftMsgAppMode, logSnapshot RaftLogSnapshot, ) { var scheduledScratch [5]*replicaState // scheduled will contain all the replicas in scheduledMu.replicas, filtered @@ -2273,7 +2298,7 @@ func (rs *replicaState) handleReadyStateRaftMuLocked( // // closedReplica => !scheduleAgain. func (rs *replicaState) scheduledRaftMuLocked( - ctx context.Context, mode RaftMsgAppMode, logSnapshot raft.LogSnapshot, + ctx context.Context, mode RaftMsgAppMode, logSnapshot RaftLogSnapshot, ) (scheduleAgain bool, updateWaiterSets bool) { if rs.desc.ReplicaID == rs.parent.opts.LocalReplicaID { panic("scheduled called on the leader replica") @@ -2328,7 +2353,7 @@ func (rs *replicaState) scheduledRaftMuLocked( // entries not subject to flow control will be tiny. We of course return the // unused tokens for entries not subject to flow control. slice, err := logSnapshot.LogSlice( - rss.mu.sendQueue.indexToSend-1, rss.mu.sendQueue.nextRaftIndex-1, uint64(bytesToSend)) + rss.mu.sendQueue.indexToSend, rss.mu.sendQueue.nextRaftIndex, uint64(bytesToSend)) var msg raftpb.Message if err == nil { var sent bool @@ -2530,7 +2555,7 @@ func (rss *replicaSendStream) handleReadyEntriesRaftMuAndStreamLocked( // NB: this will not do IO since everything here is in the unstable log // (see raft.LogSnapshot.unstable). slice, err := event.logSnapshot.LogSlice( - event.sendingEntries[0].id.index-1, event.sendingEntries[n-1].id.index, math.MaxInt64) + event.sendingEntries[0].id.index, event.sendingEntries[n-1].id.index+1, math.MaxInt64) if err != nil { return false, err } diff --git a/pkg/kv/kvserver/kvflowcontrol/rac2/range_controller_test.go b/pkg/kv/kvserver/kvflowcontrol/rac2/range_controller_test.go index 74431bd41e10..0758567a02fb 100644 --- a/pkg/kv/kvserver/kvflowcontrol/rac2/range_controller_test.go +++ b/pkg/kv/kvserver/kvflowcontrol/rac2/range_controller_test.go @@ -9,7 +9,6 @@ import ( "cmp" "context" "fmt" - "math" "slices" "sort" "strconv" @@ -322,9 +321,6 @@ func (s *testingRCState) getOrInitRange( testRC.mu.evals = make(map[string]*testingRCEval) testRC.mu.outstandingReturns = make(map[roachpb.ReplicaID]kvflowcontrol.Tokens) testRC.mu.quorumPosition = kvflowcontrolpb.RaftLogPosition{Term: 1, Index: 0} - _ = testRC.raftLog.ApplySnapshot(raftpb.Snapshot{ - Metadata: raftpb.SnapshotMetadata{Index: r.nextRaftIndex - 1}, - }) options := RangeControllerOptions{ RangeID: r.rangeID, TenantID: r.tenantID, @@ -374,7 +370,7 @@ type testingRCRange struct { // snapshots contain snapshots of the tracker state for different replicas, // at various points in time. It is used in TestUsingSimulation. snapshots []testingTrackerSnapshot - raftLog raft.MemoryStorage + entries []raftpb.Entry mu struct { syncutil.Mutex @@ -437,8 +433,28 @@ func (r *testingRCRange) ScheduleControllerEvent(rangeID roachpb.RangeID) { r.scheduleControllerEventCount.Add(1) } -func (r *testingRCRange) logSnapshot() raft.LogSnapshot { - return raft.MakeLogSnapshot(&r.raftLog) +func (r *testingRCRange) LogSlice(start, end uint64, maxSize uint64) (raft.LogSlice, error) { + if start >= end { + panic("start >= end") + } + var size uint64 + var entries []raftpb.Entry + for _, entry := range r.entries { + if entry.Index < start || entry.Index >= end { + continue + } + size += uint64(entry.Size()) + // Allow exceeding the size limit only if this is the first entry. + if size > maxSize && len(entries) != 0 { + break + } + entries = append(entries, entry) + if size >= maxSize { + break + } + } + // TODO(pav-kv): use a real LogSnapshot and construct a correct LogSlice. + return raft.MakeLogSlice(entries), nil } func (r *testingRCRange) SendMsgAppRaftMuLocked( @@ -1209,20 +1225,17 @@ func TestRangeController(t *testing.T) { mode = MsgAppPull } for _, event := range parseRaftEvents(t, d.Input) { - entries := make([]raftpb.Entry, len(event.entries)) - for i, entry := range event.entries { - entries[i] = testingCreateEntry(t, entry) - } testRC := state.ranges[event.rangeID] - require.NoError(t, testRC.raftLog.Append(entries)) - raftEvent := RaftEvent{ MsgAppMode: mode, - Entries: entries, + Entries: make([]raftpb.Entry, len(event.entries)), MsgApps: map[roachpb.ReplicaID][]raftpb.Message{}, - LogSnapshot: testRC.logSnapshot(), + LogSnapshot: testRC, ReplicasStateInfo: state.ranges[event.rangeID].replicasStateInfo(), } + for i, entry := range event.entries { + raftEvent.Entries[i] = testingCreateEntry(t, entry) + } msgApp := raftpb.Message{ Type: raftpb.MsgApp, To: 0, @@ -1230,6 +1243,7 @@ func TestRangeController(t *testing.T) { // suffix of entries that were previously appended, down below. Entries: nil, } + testRC.entries = append(testRC.entries, raftEvent.Entries...) func() { testRC.mu.Lock() defer testRC.mu.Unlock() @@ -1246,9 +1260,11 @@ func TestRangeController(t *testing.T) { } else { fromIndex := event.sendingEntryRange[replicaID].fromIndex toIndex := event.sendingEntryRange[replicaID].toIndex - entries, err := testRC.raftLog.Entries(fromIndex, toIndex+1, math.MaxUint64) - require.NoError(t, err) - msgApp.Entries = entries + for _, entry := range testRC.entries { + if entry.Index >= fromIndex && entry.Index <= toIndex { + msgApp.Entries = append(msgApp.Entries, entry) + } + } } raftEvent.MsgApps[replicaID] = append([]raftpb.Message(nil), msgApp) } @@ -1298,7 +1314,7 @@ func TestRangeController(t *testing.T) { if d.HasArg("push-mode") { mode = MsgAppPush } - testRC.rc.HandleSchedulerEventRaftMuLocked(ctx, mode, testRC.logSnapshot()) + testRC.rc.HandleSchedulerEventRaftMuLocked(ctx, mode, testRC) // Sleep for a bit to allow any timers to fire. time.Sleep(20 * time.Millisecond) return state.sendStreamString(roachpb.RangeID(rangeID)) @@ -1549,6 +1565,12 @@ func testingFirst(args ...interface{}) interface{} { return nil } +type testLogSnapshot struct{} + +func (testLogSnapshot) LogSlice(start, end uint64, maxSize uint64) (raft.LogSlice, error) { + return raft.LogSlice{}, nil +} + func TestRaftEventFromMsgStorageAppendAndMsgAppsBasic(t *testing.T) { // raftpb.Entry and raftpb.Message are only partially populated below, which // could be improved in the future. @@ -1591,10 +1613,10 @@ func TestRaftEventFromMsgStorageAppendAndMsgAppsBasic(t *testing.T) { }, } msgAppScratch := map[roachpb.ReplicaID][]raftpb.Message{} - logSnap := raft.LogSnapshot{} + var logSnap testLogSnapshot infoMap := map[roachpb.ReplicaID]ReplicaStateInfo{} checkSnapAndMap := func(event RaftEvent) { - require.Equal(t, logSnap, event.LogSnapshot) + require.Equal(t, logSnap, event.LogSnapshot.(testLogSnapshot)) require.Equal(t, infoMap, event.ReplicasStateInfo) } @@ -1610,7 +1632,7 @@ func TestRaftEventFromMsgStorageAppendAndMsgAppsBasic(t *testing.T) { event = RaftEventFromMsgStorageAppendAndMsgApps( MsgAppPush, 20, raftpb.Message{}, nil, logSnap, msgAppScratch, infoMap) checkSnapAndMap(event) - event.LogSnapshot = raft.LogSnapshot{} + event.LogSnapshot = nil event.ReplicasStateInfo = nil require.Equal(t, RaftEvent{}, event) // Outbound msgs contains no MsgApps for a follower, since the only MsgApp @@ -2262,7 +2284,7 @@ func TestConstructRaftEventForReplica(t *testing.T) { tc.latestReplicaStateInfo, tc.existingSendStreamState, tc.msgApps, - raft.LogSnapshot{}, + nil, tc.scratchSendingEntries, ) }) @@ -2274,7 +2296,7 @@ func TestConstructRaftEventForReplica(t *testing.T) { tc.latestReplicaStateInfo, tc.existingSendStreamState, tc.msgApps, - raft.LogSnapshot{}, + nil, tc.scratchSendingEntries, ) require.Equal(t, tc.expectedRaftEventReplica, gotRaftEventReplica) diff --git a/pkg/kv/kvserver/kvflowcontrol/rac2/store_stream.go b/pkg/kv/kvserver/kvflowcontrol/rac2/store_stream.go index a2d675e8513e..ef6200b4b3be 100644 --- a/pkg/kv/kvserver/kvflowcontrol/rac2/store_stream.go +++ b/pkg/kv/kvserver/kvflowcontrol/rac2/store_stream.go @@ -541,8 +541,8 @@ func (w *sendStreamTokenWatcher) run(_ context.Context) { select { case <-w.stopper.ShouldQuiesce(): return - case <-handle.waitChannel(): - if handle.confirmHaveTokensAndUnblockNextWaiter() { + case <-handle.WaitChannel(): + if handle.ConfirmHaveTokensAndUnblockNextWaiter() { break waiting } } diff --git a/pkg/kv/kvserver/kvflowcontrol/rac2/token_counter.go b/pkg/kv/kvserver/kvflowcontrol/rac2/token_counter.go index a046c0667635..c371652109b1 100644 --- a/pkg/kv/kvserver/kvflowcontrol/rac2/token_counter.go +++ b/pkg/kv/kvserver/kvflowcontrol/rac2/token_counter.go @@ -22,6 +22,40 @@ import ( "github.com/cockroachdb/redact" ) +// TokenWaitingHandle is the interface for waiting for positive tokens from a +// token counter. +// +// TODO(sumeer): remove this interface since there is only one implementation. +type TokenWaitingHandle interface { + // WaitChannel is the channel that will be signaled if tokens are possibly + // available. If signaled, the caller must call + // ConfirmHaveTokensAndUnblockNextWaiter. There is no guarantee of tokens + // being available after this channel is signaled, just that tokens were + // available recently. A typical usage pattern is: + // + // for { + // select { + // case <-handle.WaitChannel(): + // if handle.ConfirmHaveTokensAndUnblockNextWaiter() { + // break + // } + // } + // } + // tokenCounter.Deduct(...) + // + // There is a possibility for races, where multiple goroutines may be + // signaled and deduct tokens, sending the counter into debt. These cases are + // acceptable, as in aggregate the counter provides pacing over time. + WaitChannel() <-chan struct{} + // ConfirmHaveTokensAndUnblockNextWaiter is called to confirm tokens are + // available. True is returned if tokens are available, false otherwise. If + // no tokens are available, the caller can resume waiting using WaitChannel. + ConfirmHaveTokensAndUnblockNextWaiter() bool + // StreamString returns a string representation of the stream. Used for + // tracing. + StreamString() string +} + // tokenCounterPerWorkClass is a helper struct for implementing tokenCounter. // tokens are protected by the mutex in tokenCounter. Operations on the // signalCh may not be protected by that mutex -- see the comment below. @@ -269,16 +303,15 @@ func (t *tokenCounter) limit(wc admissionpb.WorkClass) kvflowcontrol.Tokens { return t.mu.counters[wc].limit } -// TokensAvailable returns true if tokens are available, in which case handle -// is empty and should be ignored. If false, it returns a handle that may be -// used for waiting for tokens to become available. +// TokensAvailable returns true if tokens are available. If false, it returns +// a handle that may be used for waiting for tokens to become available. func (t *tokenCounter) TokensAvailable( wc admissionpb.WorkClass, -) (available bool, handle tokenWaitHandle) { +) (available bool, handle TokenWaitingHandle) { if t.tokens(wc) > 0 { - return true, tokenWaitHandle{} + return true, nil } - return false, tokenWaitHandle{wc: wc, b: t} + return false, waitHandle{wc: wc, b: t} } // TryDeduct attempts to deduct flow tokens for the given work class. If there @@ -325,23 +358,25 @@ func (t *tokenCounter) Return( t.adjust(ctx, wc, tokens, flag) } -// tokenWaitHandle is a handle for waiting for tokens to become available from -// a token counter. -type tokenWaitHandle struct { +// waitHandle is a handle for waiting for tokens to become available from a +// token counter. +type waitHandle struct { wc admissionpb.WorkClass b *tokenCounter } -// waitChannel is the channel that will be signaled if tokens are possibly +var _ TokenWaitingHandle = waitHandle{} + +// WaitChannel is the channel that will be signaled if tokens are possibly // available. If signaled, the caller must call -// confirmHaveTokensAndUnblockNextWaiter. There is no guarantee of tokens being +// ConfirmHaveTokensAndUnblockNextWaiter. There is no guarantee of tokens being // available after this channel is signaled, just that tokens were available // recently. A typical usage pattern is: // // for { // select { -// case <-handle.waitChannel(): -// if handle.confirmHaveTokensAndUnblockNextWaiter() { +// case <-handle.WaitChannel(): +// if handle.ConfirmHaveTokensAndUnblockNextWaiter() { // break // } // } @@ -351,14 +386,14 @@ type tokenWaitHandle struct { // There is a possibility for races, where multiple goroutines may be signaled // and deduct tokens, sending the counter into debt. These cases are // acceptable, as in aggregate the counter provides pacing over time. -func (wh tokenWaitHandle) waitChannel() <-chan struct{} { +func (wh waitHandle) WaitChannel() <-chan struct{} { return wh.b.mu.counters[wh.wc].signalCh } -// confirmHaveTokensAndUnblockNextWaiter is called to confirm tokens are +// ConfirmHaveTokensAndUnblockNextWaiter is called to confirm tokens are // available. True is returned if tokens are available, false otherwise. If no -// tokens are available, the caller can resume waiting using waitChannel. -func (wh tokenWaitHandle) confirmHaveTokensAndUnblockNextWaiter() (haveTokens bool) { +// tokens are available, the caller can resume waiting using WaitChannel. +func (wh waitHandle) ConfirmHaveTokensAndUnblockNextWaiter() (haveTokens bool) { haveTokens = wh.b.tokens(wh.wc) > 0 if haveTokens { // Signal the next waiter if we have tokens available before returning. @@ -367,15 +402,14 @@ func (wh tokenWaitHandle) confirmHaveTokensAndUnblockNextWaiter() (haveTokens bo return haveTokens } -// streamString returns a string representation of the stream. Used for -// tracing. -func (wh tokenWaitHandle) streamString() string { +// StreamString implements TokenWaitingHandle. +func (wh waitHandle) StreamString() string { return wh.b.stream.String() } type tokenWaitingHandleInfo struct { - // Can be empty, in which case no methods should be called on it. - handle tokenWaitHandle + // Can be nil, in which case the wait on this can never succeed. + handle TokenWaitingHandle // requiredWait will be set for the leaseholder and leader for regular work. // For elastic work this will be set for the aforementioned, and all replicas // which are in StateReplicate. @@ -457,8 +491,8 @@ func WaitForEval( requiredWaitCount++ } var chanValue reflect.Value - if h.handle != (tokenWaitHandle{}) { - chanValue = reflect.ValueOf(h.handle.waitChannel()) + if h.handle != nil { + chanValue = reflect.ValueOf(h.handle.WaitChannel()) } // Else, zero Value, so will never be selected. scratch = append(scratch, @@ -495,7 +529,7 @@ func WaitForEval( return ReplicaRefreshWaitSignaled, scratch default: handleInfo := handles[chosen-3] - if available := handleInfo.handle.confirmHaveTokensAndUnblockNextWaiter(); !available { + if available := handleInfo.handle.ConfirmHaveTokensAndUnblockNextWaiter(); !available { // The handle was signaled but does not currently have tokens // available. Continue waiting on this handle. continue @@ -503,7 +537,7 @@ func WaitForEval( if traceIndividualWaits { log.Eventf(ctx, "wait-for-eval: waited until %s tokens available", - handleInfo.handle.streamString()) + handleInfo.handle.StreamString()) } if handleInfo.partOfQuorum { signaledQuorumCount++ diff --git a/pkg/kv/kvserver/kvflowcontrol/rac2/token_counter_test.go b/pkg/kv/kvserver/kvflowcontrol/rac2/token_counter_test.go index 3187ca37a92c..2392a9d96094 100644 --- a/pkg/kv/kvserver/kvflowcontrol/rac2/token_counter_test.go +++ b/pkg/kv/kvserver/kvflowcontrol/rac2/token_counter_test.go @@ -297,7 +297,7 @@ func TestTokenCounter(t *testing.T) { assertStateReset := func(t *testing.T) { available, handle := counter.TokensAvailable(admissionpb.ElasticWorkClass) require.True(t, available) - require.Equal(t, tokenWaitHandle{}, handle) + require.Nil(t, handle) require.Equal(t, limits.regular, counter.tokens(admissionpb.RegularWorkClass)) require.Equal(t, limits.elastic, counter.tokens(admissionpb.ElasticWorkClass)) } @@ -307,11 +307,11 @@ func TestTokenCounter(t *testing.T) { // classes. available, handle := counter.TokensAvailable(admissionpb.RegularWorkClass) require.True(t, available) - require.Equal(t, tokenWaitHandle{}, handle) + require.Nil(t, handle) available, handle = counter.TokensAvailable(admissionpb.ElasticWorkClass) require.True(t, available) - require.Equal(t, tokenWaitHandle{}, handle) + require.Nil(t, handle) assertStateReset(t) }) @@ -326,7 +326,7 @@ func TestTokenCounter(t *testing.T) { // Now there should be no tokens available for regular work class. available, handle := counter.TokensAvailable(admissionpb.RegularWorkClass) require.False(t, available) - require.NotEqual(t, tokenWaitHandle{}, handle) + require.NotNil(t, handle) counter.Return(ctx, admissionpb.RegularWorkClass, limits.regular, AdjNormal) assertStateReset(t) }) @@ -353,18 +353,18 @@ func TestTokenCounter(t *testing.T) { // returned. available, handle := counter.TokensAvailable(admissionpb.RegularWorkClass) require.False(t, available) - require.NotEqual(t, tokenWaitHandle{}, handle) + require.NotNil(t, handle) counter.Return(ctx, admissionpb.RegularWorkClass, limits.regular, AdjNormal) // Wait on the handle to be unblocked and expect that there are tokens // available when the wait channel is signaled. - <-handle.waitChannel() - haveTokens := handle.confirmHaveTokensAndUnblockNextWaiter() + <-handle.WaitChannel() + haveTokens := handle.ConfirmHaveTokensAndUnblockNextWaiter() require.True(t, haveTokens) // Wait on the handle to be unblocked again, this time try deducting such // that there are no tokens available after. counter.Deduct(ctx, admissionpb.RegularWorkClass, limits.regular, AdjNormal) - <-handle.waitChannel() - haveTokens = handle.confirmHaveTokensAndUnblockNextWaiter() + <-handle.WaitChannel() + haveTokens = handle.ConfirmHaveTokensAndUnblockNextWaiter() require.False(t, haveTokens) // Return the tokens deducted from the first wait above. counter.Return(ctx, admissionpb.RegularWorkClass, limits.regular, AdjNormal) @@ -394,14 +394,14 @@ func TestTokenCounter(t *testing.T) { // available. available, handle := counter.TokensAvailable(admissionpb.RegularWorkClass) if !available { - <-handle.waitChannel() + <-handle.WaitChannel() // This may or may not have raced with another goroutine, there's // no guarantee we have tokens here. If we don't have tokens here, // the next call to TryDeduct will fail (unless someone returns // tokens between here and that call), which is harmless. This test // is using TokensAvailable and the returned handle to avoid // busy-waiting. - handle.confirmHaveTokensAndUnblockNextWaiter() + handle.ConfirmHaveTokensAndUnblockNextWaiter() } } @@ -416,8 +416,8 @@ func TestTokenCounter(t *testing.T) { }) } -func (t *tokenCounter) testingHandle() tokenWaitHandle { - return tokenWaitHandle{wc: admissionpb.RegularWorkClass, b: t} +func (t *tokenCounter) testingHandle() waitHandle { + return waitHandle{wc: admissionpb.RegularWorkClass, b: t} } type namedTokenCounter struct { diff --git a/pkg/kv/kvserver/kvflowcontrol/replica_rac2/processor.go b/pkg/kv/kvserver/kvflowcontrol/replica_rac2/processor.go index 1dd2fb8af8f4..e4c457ca0db7 100644 --- a/pkg/kv/kvserver/kvflowcontrol/replica_rac2/processor.go +++ b/pkg/kv/kvserver/kvflowcontrol/replica_rac2/processor.go @@ -15,7 +15,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvflowcontrol/kvflowinspectpb" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvflowcontrol/rac2" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/raftlog" - "github.com/cockroachdb/cockroach/pkg/raft" "github.com/cockroachdb/cockroach/pkg/raft/raftpb" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/util/admission/admissionpb" @@ -354,7 +353,7 @@ type Processor interface { // // raftMu is held. ProcessSchedulerEventRaftMuLocked( - ctx context.Context, mode rac2.RaftMsgAppMode, logSnapshot raft.LogSnapshot) + ctx context.Context, mode rac2.RaftMsgAppMode, logSnapshot rac2.RaftLogSnapshot) // InspectRaftMuLocked returns a handle to inspect the state of the // underlying range controller. It is used to power /inspectz-style debugging @@ -1140,7 +1139,7 @@ func (p *processorImpl) AdmitForEval( // ProcessSchedulerEventRaftMuLocked implements Processor. func (p *processorImpl) ProcessSchedulerEventRaftMuLocked( - ctx context.Context, mode rac2.RaftMsgAppMode, logSnapshot raft.LogSnapshot, + ctx context.Context, mode rac2.RaftMsgAppMode, logSnapshot rac2.RaftLogSnapshot, ) { p.opts.Replica.RaftMuAssertHeld() if p.destroyed { diff --git a/pkg/kv/kvserver/kvflowcontrol/replica_rac2/processor_test.go b/pkg/kv/kvserver/kvflowcontrol/replica_rac2/processor_test.go index 1d4db9902eb1..5efce9a4484e 100644 --- a/pkg/kv/kvserver/kvflowcontrol/replica_rac2/processor_test.go +++ b/pkg/kv/kvserver/kvflowcontrol/replica_rac2/processor_test.go @@ -203,7 +203,7 @@ func (c *testRangeController) HandleRaftEventRaftMuLocked( } func (c *testRangeController) HandleSchedulerEventRaftMuLocked( - _ context.Context, _ rac2.RaftMsgAppMode, _ raft.LogSnapshot, + ctx context.Context, mode rac2.RaftMsgAppMode, logSnapshot rac2.RaftLogSnapshot, ) { panic("HandleSchedulerEventRaftMuLocked is unimplemented") } diff --git a/pkg/kv/kvserver/kvflowcontrol/replica_rac2/raft_node.go b/pkg/kv/kvserver/kvflowcontrol/replica_rac2/raft_node.go index a20de90911c9..381994706f70 100644 --- a/pkg/kv/kvserver/kvflowcontrol/replica_rac2/raft_node.go +++ b/pkg/kv/kvserver/kvflowcontrol/replica_rac2/raft_node.go @@ -73,3 +73,12 @@ func (rn raftNodeForRACv2) SendMsgAppRaftMuLocked( defer rn.r.MuUnlock() return rn.RawNode.SendMsgApp(raftpb.PeerID(replicaID), ls) } + +type RaftLogSnapshot raft.LogSnapshot + +var _ rac2.RaftLogSnapshot = RaftLogSnapshot{} + +// LogSlice implements rac2.RaftLogSnapshot. +func (l RaftLogSnapshot) LogSlice(start, end uint64, maxSize uint64) (raft.LogSlice, error) { + return (raft.LogSnapshot(l)).LogSlice(start-1, end-1, maxSize) +} diff --git a/pkg/kv/kvserver/kvserverpb/raft.proto b/pkg/kv/kvserver/kvserverpb/raft.proto index 5dda2a59f4eb..2a8897e1012d 100644 --- a/pkg/kv/kvserver/kvserverpb/raft.proto +++ b/pkg/kv/kvserver/kvserverpb/raft.proto @@ -48,16 +48,6 @@ message RaftHeartbeat { bool lagging_followers_on_quiesce_accurate = 10; } -// The traced entry from the leader along with the trace and span ID. -message TracedEntry { - uint64 index = 1 [(gogoproto.nullable) = false, - (gogoproto.customtype) = "github.com/cockroachdb/cockroach/pkg/kv/kvpb.RaftIndex"]; - uint64 trace_id = 2 [(gogoproto.nullable) = false, (gogoproto.customname) = "TraceID", - (gogoproto.customtype) = "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb.TraceID"]; - uint64 span_id = 3 [(gogoproto.nullable) = false, (gogoproto.customname) = "SpanID", - (gogoproto.customtype) = "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb.SpanID"]; -} - // RaftMessageRequest is the request used to send raft messages using our // protobuf-based RPC codec. If a RaftMessageRequest has a non-empty number of // heartbeats or heartbeat_resps, the contents of the message field is treated @@ -113,12 +103,6 @@ message RaftMessageRequest { // indices. Used only with RACv2. kv.kvserver.kvflowcontrol.kvflowcontrolpb.AdmittedState admitted_state = 14 [(gogoproto.nullable) = false]; - // TracedEntry is a mapping from Raft index to trace and span ids for this - // request. They are set by the leaseholder and begin tracing on all - // replicas. Currently, traces are not returned to the leaseholder, but - // instead logged to a local log file. - repeated TracedEntry traced_entries = 15 [(gogoproto.nullable) = false]; - reserved 10; } diff --git a/pkg/kv/kvserver/raft.go b/pkg/kv/kvserver/raft.go index 4f1e311004ca..553cf26e4098 100644 --- a/pkg/kv/kvserver/raft.go +++ b/pkg/kv/kvserver/raft.go @@ -267,7 +267,7 @@ func traceProposals(r *Replica, ids []kvserverbase.CmdIDKey, event string) { r.mu.RLock() for _, id := range ids { if prop, ok := r.mu.proposals[id]; ok { - ctxs = append(ctxs, prop.Context()) + ctxs = append(ctxs, prop.ctx) } } r.mu.RUnlock() diff --git a/pkg/kv/kvserver/rafttrace/BUILD.bazel b/pkg/kv/kvserver/rafttrace/BUILD.bazel deleted file mode 100644 index d4038ab33454..000000000000 --- a/pkg/kv/kvserver/rafttrace/BUILD.bazel +++ /dev/null @@ -1,38 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") - -go_library( - name = "rafttrace", - srcs = ["rafttrace.go"], - importpath = "github.com/cockroachdb/cockroach/pkg/kv/kvserver/rafttrace", - visibility = ["//visibility:public"], - deps = [ - "//pkg/kv/kvpb", - "//pkg/kv/kvserver/kvserverpb", - "//pkg/raft", - "//pkg/raft/raftpb", - "//pkg/settings", - "//pkg/settings/cluster", - "//pkg/util/log", - "//pkg/util/syncutil", - "//pkg/util/tracing", - "//pkg/util/tracing/tracingpb", - "@com_github_cockroachdb_logtags//:logtags", - "@com_github_cockroachdb_redact//:redact", - ], -) - -go_test( - name = "rafttrace_test", - srcs = ["rafttrace_test.go"], - embed = [":rafttrace"], - deps = [ - "//pkg/kv/kvpb", - "//pkg/kv/kvserver/kvserverpb", - "//pkg/raft/raftpb", - "//pkg/settings/cluster", - "//pkg/testutils", - "//pkg/util/tracing", - "//pkg/util/tracing/tracingpb", - "@com_github_stretchr_testify//require", - ], -) diff --git a/pkg/kv/kvserver/rafttrace/rafttrace.go b/pkg/kv/kvserver/rafttrace/rafttrace.go deleted file mode 100644 index 2a6d945f8efb..000000000000 --- a/pkg/kv/kvserver/rafttrace/rafttrace.go +++ /dev/null @@ -1,477 +0,0 @@ -// Copyright 2024 The Cockroach Authors. -// -// Use of this software is governed by the CockroachDB Software License -// included in the /LICENSE file. - -package rafttrace - -import ( - "context" - "math" - "sync/atomic" - - "github.com/cockroachdb/cockroach/pkg/kv/kvpb" - "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" - "github.com/cockroachdb/cockroach/pkg/raft" - "github.com/cockroachdb/cockroach/pkg/raft/raftpb" - "github.com/cockroachdb/cockroach/pkg/settings" - "github.com/cockroachdb/cockroach/pkg/settings/cluster" - "github.com/cockroachdb/cockroach/pkg/util/log" - "github.com/cockroachdb/cockroach/pkg/util/syncutil" - "github.com/cockroachdb/cockroach/pkg/util/tracing" - "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb" - "github.com/cockroachdb/logtags" - "github.com/cockroachdb/redact" -) - -// MaxConcurrentRaftTraces is the maximum number of entries that can be traced -// at any time on this store. Additional traces will be ignored until the number -// of traces drops below the limit. Having too many active traces can negatively -// impact performance as we iterate over all of them for some messages. -// -// TODO(baptist): Bump the default to a reasonable value like 10 that balances -// usefulness with performance impact once we have validated the performance -// impact. -var MaxConcurrentRaftTraces = settings.RegisterIntSetting( - settings.SystemOnly, - "kv.raft.max_concurrent_traces", - "the maximum number of tracked raft traces, 0 will disable tracing", - 0, - settings.IntInRange(0, 1000), -) - -// traceValue represents the trace information for a single registration. -type traceValue struct { - traced kvserverpb.TracedEntry - // ctx is a trace specific context used to log events on this trace. - ctx context.Context - - mu struct { - syncutil.Mutex - - // seenMsgAppResp tracks whether a MsgAppResp message has already been - // logged by each replica peer. This limits the size of the log at a - // small risk of missing some important messages in the case of dropped - // messages or reproposals. - seenMsgAppResp map[raftpb.PeerID]bool - - // seenMsgStorageAppendResp tracks whether a MsgStorageAppendResp - // message has already been logged. - seenMsgStorageAppendResp bool - - // propCtx is the underlying proposal context used for tracing to the - // SQL trace. - propCtx context.Context - - // propSpan is the span connected to the propCtx. It must be finished - // when the trace is removed. - propSpan *tracing.Span - } -} - -// logf logs the message to the trace context and the proposal context. The -// proposal context is populated on the leaseholder and is attached to the SQL -// trace. -func (t *traceValue) logf(depth int, format string, args ...interface{}) { - log.InfofDepth(t.ctx, depth+1, format, args...) - - t.mu.Lock() - propCtx := t.mu.propCtx - t.mu.Unlock() - if propCtx != nil { - log.VEventfDepth(propCtx, depth+1, 3, format, args...) - } -} - -// seenMsgAppResp returns true if it hasn't seen an MsgAppResp for this peer. -func (t *traceValue) seenMsgAppResp(p raftpb.PeerID) bool { - t.mu.Lock() - defer t.mu.Unlock() - if t.mu.seenMsgAppResp[p] { - return true - } - t.mu.seenMsgAppResp[p] = true - return false -} - -// seenMsgStorageAppendResp returns true if it hasn't seen a -// MsgStorageAppendResp for this trace. -func (t *traceValue) seenMsgStorageAppendResp() bool { - t.mu.Lock() - defer t.mu.Unlock() - if t.mu.seenMsgStorageAppendResp { - return true - } - t.mu.seenMsgStorageAppendResp = true - return false -} - -// String attempts to balance uniqueness with readability by only keeping the -// lower 16 bits of the trace and span. -func (tv *traceValue) String() string { - return redact.StringWithoutMarkers(tv) -} - -func (tv *traceValue) SafeFormat(w redact.SafePrinter, _ rune) { - w.Printf("i%d/%x.%x", tv.traced.Index, uint16(tv.traced.TraceID), uint16(tv.traced.SpanID)) -} - -// RaftTracer is a utility to trace the lifetime of raft log entries. It may log -// some unrelated entries, since it does not consider entry or leader term. It -// traces at most one MsgAppResp and MsgStorageAppendResp per index which is the -// first one that is past our index entry. This limitation means it may not -// capture all the relevant messages particularly if the term changes. -// -// The library will log in two different ways once to the standard cockroach log -// and once to the SQL trace on the leaseholder. -// TODO(baptist): Look at logging traces on followers and sending back to the -// leader. It would need to be best effort, but might still be useful. -// Alternatively, double-down on distributed trace collection if/when it's -// supported. So that the trace does not need to be plumbed back to the -// leaseholder / txn coordinator. -type RaftTracer struct { - // m is a map of all the currently traced entries for this replica. The - // aggregate size of the map across all replicas is equal to or less than - // numRegisteredStore unless the setting changes in which case we flush all - // entries on the next register call. We add to numRegistered before we - // update m, and delete from m before we remove from numRegistered to keep - // this invariant. On a setting change we flush all existing traces on the - // next call to register. - // TODO(baptist): Look at alternatives to using a map such as a sparse array - // or circular buffer. Specifically, we might be able to save some memory - // allocations. Note that the propCtx in the traceValue is already pulled - // from a pool inside the tracer. - m syncutil.Map[kvpb.RaftIndex, traceValue] - - // numRegisteredStore is the number of currently registered traces for this - // store, not this replica. The number of registered will normally be less - // than the MaxConcurrentRaftTraces setting. If the setting is lowered, we - // flush all traces on all replicas. - numRegisteredStore *atomic.Int64 - - // numRegisteredReplica is the number of currently registered traces for - // this replica. The sum(numRegisteredReplica) <= numRegisteredStore. We set - // numRegisteredReplica to MaxInt32 when we close the tracer to prevent new - // registrations. - // - // TODO(baptist/pav-kv): Look at optimizing to avoid the need for this to be - // an atomic. It likely doesn't need to be atomic since the callers should - // be holding Replica.raftMu and/or Replica.mu. - numRegisteredReplica atomic.Int64 - - // ctx is the ambient context for the replica and is used for remote - // traces. It contains the replica/range information. On each trace we - // additionally append the unique trace/span IDs. - ctx context.Context - st *cluster.Settings - - tracer *tracing.Tracer -} - -// NewRaftTracer creates a new RaftTracer with the given ambient context for the -// replica. -func NewRaftTracer( - ctx context.Context, - tracer *tracing.Tracer, - st *cluster.Settings, - numRegisteredStore *atomic.Int64, -) *RaftTracer { - return &RaftTracer{ctx: ctx, tracer: tracer, st: st, numRegisteredStore: numRegisteredStore} -} - -// reserveSpace checks if should register a new trace. If there are too many -// registered traces it will not register and return false. The soft invariant -// is that numRegisteredStore <= numAllowed which can be temporarily violated if -// MaxConcurrentRaftTraces is lowered. This method will return true if we can -// add one to the number registered for both the store and replica, otherwise it -// will return false. This method is optimized for the `numAllowed == 0` case -// and avoids loading `numRegisteredStore` until after this check.` -func (r *RaftTracer) reserveSpace() bool { - numAllowed := MaxConcurrentRaftTraces.Get(&r.st.SV) - numRegisteredReplica := r.numRegisteredReplica.Load() - - // This can only occur if the numAllowed setting has changed since a - // previous call to reserveSpace. If this happens flush all our current - // traces and don't register this request. Note that when this happens we - // also wont't log this request. - if numRegisteredReplica > numAllowed { - log.Infof(r.ctx, "flushing all traces due to setting change") - r.m.Range(func(index kvpb.RaftIndex, t *traceValue) bool { - r.removeEntry(index) - return true - }) - return false - } - - if numAllowed == 0 { - return false - } - - // The maximum number of traces has been reached for the store. We don't - // register tracing and return false. - numRegisteredStore := r.numRegisteredStore.Load() - if numRegisteredStore >= numAllowed { - return false - } - - // Only increment the number of registered traces if the numRegistered - // hasn't changed. In the case of an ABA update, it does not break the - // invariant since some other trace was registered and deregistered, but - // there is still a slot available. We will not register this trace if - // someone else is concurrently registering a trace on this store, but this - // is acceptable as it is a rare case. - registerSucceeded := r.numRegisteredStore.CompareAndSwap(numRegisteredStore, numRegisteredStore+1) - if registerSucceeded { - // Add one unconditionally to the replica count. - r.numRegisteredReplica.Add(1) - } - // Note we can't assert numRegisteredStore <= numAllowed because if the - // setting is changed it can be temporarily violated on other replicas. - return registerSucceeded -} - -// tryStore attempts to store this value. If the index is already in the map it -// will not store this entry and return false. It will also decrement counters -// that were incremented by reserveSpace. -// This is a rare case where we already have the index in the map. We -// don't want to store this entry, but also need to decrement the -// counter to avoid double tracing. -func (r *RaftTracer) tryStore(tv *traceValue) (*traceValue, bool) { - if existingTv, loaded := r.m.LoadOrStore(tv.traced.Index, tv); loaded { - tv.logf(2, "duplicate registration ignored - existing trace: %s", existingTv) - existingTv.logf(2, "additional registration for same index: %s", tv) - r.destroy(tv) - return existingTv, false - } - return tv, true -} - -// newTraceValue creates a new traceValue for the given traced entry. Note that -// it doesn't pass `propCtx` as the first parameter since this isn't the -// relevant context that should be used for logging and it can be nil. -func (r *RaftTracer) newTraceValue( - te kvserverpb.TracedEntry, propCtx context.Context, propSpan *tracing.Span, -) *traceValue { - tv := &traceValue{traced: te} - tv.ctx = logtags.AddTag(r.ctx, "id", redact.Safe(tv.String())) - tv.mu.seenMsgAppResp = make(map[raftpb.PeerID]bool) - tv.mu.propCtx = propCtx - tv.mu.propSpan = propSpan - return tv -} - -// RegisterRemote registers a remote trace. This is called when we receive a -// raft message over the wire with a request to continue tracing it. -func (r *RaftTracer) RegisterRemote(te kvserverpb.TracedEntry) { - if !r.reserveSpace() { - return - } - // NB: We don't currently return remote traces, if we did, we would pass the - // remote ctx here and trace it. - if tv, created := r.tryStore(r.newTraceValue(te, nil, nil)); created { - tv.logf(1, "registering remote trace %s", tv) - } -} - -// MaybeRegister is called on an entry that has been proposed to raft. This will -// begin logging all subsequent updates to this entry. It returns true if the -// registration is successful. A duplicate registration of the same index is -// considered a success and returns true, however the older registration is kept -// and this registration is ignored. -func (r *RaftTracer) MaybeRegister(ctx context.Context, ent raftpb.Entry) bool { - // If the index is nil, then we can't trace this entry. This can happen if - // there is a leader/leaseholder spilt. We don't have an easy way to handle - // this today, so don't attempt to trace it. - if ent.Index == 0 { - log.VEvent(ctx, 2, "skip registering raft proposal without index") - return false - } - - // Only register the entry if this is a traced context with verbose logging. - span := tracing.SpanFromContext(ctx) - if span == nil || span.RecordingType() != tracingpb.RecordingVerbose { - return false - } - - // This must be the last conditional. If this returns true we must call - // storeEntryWithTracing to not leak a registered permit. - if !r.reserveSpace() { - log.VEvent(ctx, 2, "too many active raft traces, skipping") - return false - } - - ctx, span = r.tracer.StartSpanCtx(ctx, "raft trace", - tracing.WithParent(span), tracing.WithFollowsFrom()) - if tv, created := r.tryStore(r.newTraceValue(kvserverpb.TracedEntry{ - Index: kvpb.RaftIndex(ent.Index), - TraceID: span.TraceID(), - SpanID: span.SpanID(), - }, ctx, span)); created { - tv.logf(1, "registering local trace %s", tv) - } - return true -} - -// MaybeTrace logs the message in every trace it is relevant to. -func (r *RaftTracer) MaybeTrace(m raftpb.Message) []kvserverpb.TracedEntry { - // NB: This check is an optimization to handle the common case where there - // are no registered traces on this replica. - if r.numRegisteredReplica.Load() == 0 { - return nil - } - - switch m.Type { - case raftpb.MsgProp, raftpb.MsgApp, raftpb.MsgStorageAppend, raftpb.MsgStorageApply: - return r.traceIfCovered(m) - case raftpb.MsgAppResp, raftpb.MsgStorageAppendResp, raftpb.MsgStorageApplyResp: - r.traceIfPast(m) - return nil - } - return nil -} - -// removeEntry removes the trace at the given index and decrements the -// registered counters at the replica and store level. -func (r *RaftTracer) removeEntry(index kvpb.RaftIndex) { - tv, found := r.m.LoadAndDelete(index) - if !found { - return - } - // Don't allow additional tracing to this context. - r.destroy(tv) -} - -func (r *RaftTracer) destroy(tv *traceValue) { - r.numRegisteredReplica.Add(-1) - r.numRegisteredStore.Add(-1) - - tv.mu.Lock() - defer tv.mu.Unlock() - if tv.mu.propSpan != nil { - tv.mu.propSpan.Finish() - tv.mu.propCtx = nil - tv.mu.propSpan = nil - } -} - -// Close will unregister all the currently active traces and prevent additional -// traces from being added. It is safe to call multiple times, but should always -// be called at least once when the replica is destroyed to prevent leaking -// traces. -// Note that there could be a race between another caller calling Register and -// us closing the tracer, however we won't allow any new registrations to come -// through after this call. Note that we set this to MaxInt32 instead of -// MaxInt64 to avoid a rare race where another thread is in the middle of -// `reserveSpace` and calls `Add(1)` which cause overflow. -func (r *RaftTracer) Close() { - r.numRegisteredReplica.Store(math.MaxInt32) - - r.m.Range(func(index kvpb.RaftIndex, t *traceValue) bool { - t.logf(2, "cleanup log index %d during Close", index) - r.removeEntry(index) - return true - }) -} - -func peer(p raftpb.PeerID) redact.SafeString { - return redact.SafeString(raft.DescribeTarget(p)) -} - -// traceIfCovered will log the message if it touches any of the registered trace -// points. Additionally it returns any saved trace/span IDs for sending to -// remote nodes. This applies both to messages that the leader sends to -// followers, and messages replicas send to their local storage. -func (r *RaftTracer) traceIfCovered(m raftpb.Message) []kvserverpb.TracedEntry { - if len(m.Entries) == 0 { - return nil - } - minEntryIndex := kvpb.RaftIndex(m.Entries[0].Index) - maxEntryIndex := kvpb.RaftIndex(m.Entries[len(m.Entries)-1].Index) - var tracedEntries []kvserverpb.TracedEntry - r.m.Range(func(index kvpb.RaftIndex, t *traceValue) bool { - // If the traced index is not in the range of the entries, we can skip - // it. We don't need to check each individual entry since they are - // contiguous. - if t.traced.Index < minEntryIndex || t.traced.Index > maxEntryIndex { - return true - } - tracedEntries = append(tracedEntries, t.traced) - // TODO(baptist): Not all the fields are relevant to log for all - // message types. Consider cleaning up what is logged. - t.logf(4, - "%s->%s %v Term:%d Log:%d/%d Entries:[%d-%d]", - peer(m.From), - peer(m.To), - m.Type, - m.Term, - m.LogTerm, - m.Index, - minEntryIndex, - maxEntryIndex, - ) - return true - }) - return tracedEntries -} - -// traceIfPast will log the message to all registered traceValues the message is -// past. It will additionally unregister traces that are no longer useful. This -// call is for events that move the needle/watermark forward (e.g. the log -// storage syncs), but don't have an exact range of entries affected. So, being -// unable to match these events to entries exactly once, we instead check that -// the watermark passed the entry. To protect against overly verbose logging, we -// only allow MsgAppResp to be logged once per peer, and only one -// MsgStorageAppendResp. When we receive a MsgStorageApplyResp we will log and -// unregister the tracing. -func (r *RaftTracer) traceIfPast(m raftpb.Message) { - if m.Reject { - return - } - r.m.Range(func(index kvpb.RaftIndex, t *traceValue) bool { - switch m.Type { - case raftpb.MsgAppResp: - if kvpb.RaftIndex(m.Index) >= index && !t.seenMsgAppResp(m.From) { - t.logf(4, - "%s->%s %v Term:%d Index:%d", - peer(m.From), - peer(m.To), - m.Type, - m.Term, - m.Index, - ) - } - case raftpb.MsgStorageAppendResp: - if kvpb.RaftIndex(m.Index) >= index && !t.seenMsgStorageAppendResp() { - t.logf(4, - "%s->%s %v Log:%d/%d", - peer(m.From), - peer(m.To), - m.Type, - m.LogTerm, - m.Index, - ) - } - case raftpb.MsgStorageApplyResp: - if len(m.Entries) == 0 { - return true - } - // Use the last entry to determine if we should log this message. - msgIndex := m.Entries[len(m.Entries)-1].Index - if kvpb.RaftIndex(msgIndex) >= index { - t.logf(4, - "%s->%s %v LastEntry:%d/%d", - peer(m.From), - peer(m.To), - m.Type, - m.Entries[len(m.Entries)-1].Term, - m.Entries[len(m.Entries)-1].Index, - ) - // We unregister the index here because we are now "done" with - // this entry and don't expect more useful events. - t.logf(4, "unregistered log index %d from tracing", index) - r.removeEntry(index) - } - } - return true - }) -} diff --git a/pkg/kv/kvserver/rafttrace/rafttrace_test.go b/pkg/kv/kvserver/rafttrace/rafttrace_test.go deleted file mode 100644 index 59fdd5a9e1d3..000000000000 --- a/pkg/kv/kvserver/rafttrace/rafttrace_test.go +++ /dev/null @@ -1,344 +0,0 @@ -// Copyright 2024 The Cockroach Authors. -// -// Use of this software is governed by the CockroachDB Software License -// included in the /LICENSE file. - -package rafttrace - -import ( - "context" - "sync/atomic" - "testing" - - "github.com/cockroachdb/cockroach/pkg/kv/kvpb" - "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" - "github.com/cockroachdb/cockroach/pkg/raft/raftpb" - "github.com/cockroachdb/cockroach/pkg/settings/cluster" - "github.com/cockroachdb/cockroach/pkg/testutils" - "github.com/cockroachdb/cockroach/pkg/util/tracing" - "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb" - "github.com/stretchr/testify/require" -) - -func createTracer(count int64) *RaftTracer { - ctx := context.Background() - tracer := tracing.NewTracer() - st := cluster.MakeTestingClusterSettings() - MaxConcurrentRaftTraces.Override(ctx, &st.SV, count) - numRegisteredStore := atomic.Int64{} - return NewRaftTracer(ctx, tracer, st, &numRegisteredStore) -} - -func TestRegisterRemote(t *testing.T) { - rt := createTracer(10) - - te := kvserverpb.TracedEntry{Index: 1, TraceID: 123, SpanID: 456} - rt.RegisterRemote(te) - require.Equal(t, int64(1), rt.numRegisteredStore.Load()) - require.Equal(t, int64(1), rt.numRegisteredReplica.Load()) -} - -func TestMaybeRegisterNoSpan(t *testing.T) { - rt := createTracer(10) - - // Test without a span in context - ctx := context.Background() - require.False(t, rt.MaybeRegister(ctx, raftpb.Entry{Index: 1})) - require.Equal(t, int64(0), rt.numRegisteredStore.Load()) - require.Equal(t, int64(0), rt.numRegisteredReplica.Load()) -} - -func TestMaybeRegisterWithSpan(t *testing.T) { - rt := createTracer(10) - - ctx := context.Background() - // Test with a span in the context. - ctx, span := rt.tracer.StartSpanCtx(ctx, "test-span", tracing.WithRecording(tracingpb.RecordingVerbose)) - defer span.Finish() - - require.True(t, rt.MaybeRegister(ctx, raftpb.Entry{Index: 1})) - require.Equal(t, int64(1), rt.numRegisteredStore.Load()) - require.Equal(t, int64(1), rt.numRegisteredReplica.Load()) -} - -func TestMaybeTraceNoSpan(t *testing.T) { - rt := createTracer(10) - ctx := context.Background() - - ent := raftpb.Entry{Index: 1} - require.False(t, rt.MaybeRegister(ctx, ent)) - require.Empty(t, rt.MaybeTrace(raftpb.Message{Type: raftpb.MsgApp, Entries: []raftpb.Entry{ent}})) -} - -func TestMaybeTraceWithSpan(t *testing.T) { - rt := createTracer(10) - ctx, span := rt.tracer.StartSpanCtx(context.Background(), "test-span", tracing.WithRecording(tracingpb.RecordingVerbose)) - defer span.Finish() - - ent := raftpb.Entry{Index: 1} - require.True(t, rt.MaybeRegister(ctx, ent)) - tracedEntries := rt.MaybeTrace(raftpb.Message{ - Type: raftpb.MsgApp, - Entries: []raftpb.Entry{ent}, - }) - require.Len(t, tracedEntries, 1) - require.Equal(t, kvpb.RaftIndex(1), tracedEntries[0].Index) -} - -func TestClose(t *testing.T) { - rt := createTracer(10) - ctx, span := rt.tracer.StartSpanCtx(context.Background(), "test-span", tracing.WithRecording(tracingpb.RecordingVerbose)) - defer span.Finish() - - require.True(t, rt.MaybeRegister(ctx, raftpb.Entry{Index: 1})) - require.Equal(t, int64(1), rt.numRegisteredStore.Load()) - require.Equal(t, int64(1), rt.numRegisteredReplica.Load()) - - rt.Close() - require.Equal(t, int64(0), rt.numRegisteredStore.Load()) - require.Greater(t, rt.numRegisteredReplica.Load(), int64(1000)) -} - -func TestTwoTracersSharingNumRegisteredStore(t *testing.T) { - numRegisteredStore := atomic.Int64{} - ctx := context.Background() - tracer := tracing.NewTracer() - st := cluster.MakeTestingClusterSettings() - MaxConcurrentRaftTraces.Override(ctx, &st.SV, 3) - - rt1 := NewRaftTracer(ctx, tracer, st, &numRegisteredStore) - rt2 := NewRaftTracer(ctx, tracer, st, &numRegisteredStore) - - // Register a trace in the first tracer. - ctx1, span1 := rt1.tracer.StartSpanCtx(ctx, "test-span-1", tracing.WithRecording(tracingpb.RecordingVerbose)) - defer span1.Finish() - require.True(t, rt1.MaybeRegister(ctx1, raftpb.Entry{Index: 1})) - require.Equal(t, int64(1), rt1.numRegisteredStore.Load()) - require.Equal(t, int64(1), rt1.numRegisteredReplica.Load()) - - // Register a trace in the second tracer. - ctx2, span2 := rt2.tracer.StartSpanCtx(ctx, "test-span-2", tracing.WithRecording(tracingpb.RecordingVerbose)) - defer span2.Finish() - require.True(t, rt2.MaybeRegister(ctx2, raftpb.Entry{Index: 2})) - require.Equal(t, int64(2), rt2.numRegisteredStore.Load()) - require.Equal(t, int64(1), rt2.numRegisteredReplica.Load()) - - // Ensure both tracers share the same numRegisteredStore. - require.Equal(t, rt1.numRegisteredStore, rt2.numRegisteredStore) - - // Close the first tracer and check the counts. - rt1.Close() - require.Equal(t, int64(1), rt2.numRegisteredStore.Load()) - require.Greater(t, rt1.numRegisteredReplica.Load(), int64(1000)) - require.Equal(t, int64(1), rt2.numRegisteredReplica.Load()) - - // Close the second tracer and check the counts. - rt2.Close() - require.Equal(t, int64(0), rt2.numRegisteredStore.Load()) - require.Greater(t, rt2.numRegisteredReplica.Load(), int64(1000)) -} - -func TestLimit(t *testing.T) { - rt := createTracer(2) - ctx1, span1 := rt.tracer.StartSpanCtx(context.Background(), "test-span", tracing.WithRecording(tracingpb.RecordingVerbose)) - defer span1.Finish() - // Only 2 traces are allowed but we attempt to register 3. - require.True(t, rt.MaybeRegister(ctx1, raftpb.Entry{Index: 1})) - require.True(t, rt.MaybeRegister(ctx1, raftpb.Entry{Index: 2})) - require.False(t, rt.MaybeRegister(ctx1, raftpb.Entry{Index: 3})) - rt.Close() - require.Equal(t, int64(0), rt.numRegisteredStore.Load()) - require.Greater(t, rt.numRegisteredReplica.Load(), int64(1000)) -} - -func TestMaybeTraceMsgAppResp(t *testing.T) { - rt := createTracer(10) - ctx, finish := tracing.ContextWithRecordingSpan(context.Background(), rt.tracer, "test") - - require.True(t, rt.MaybeRegister(ctx, raftpb.Entry{Index: 1})) - require.Empty(t, rt.MaybeTrace(raftpb.Message{ - Term: 1, - From: 1, - To: 2, - Type: raftpb.MsgAppResp, - Index: uint64(5), - }), 0) - output := finish().String() - require.NoError(t, testutils.MatchInOrder(output, []string{"1->2 MsgAppResp Term:1 Index:5"}...)) - require.Equal(t, int64(1), rt.numRegisteredStore.Load()) -} - -func TestDupeMsgAppResp(t *testing.T) { - rt := createTracer(10) - ctx, finish := tracing.ContextWithRecordingSpan(context.Background(), rt.tracer, "test") - - ent := raftpb.Entry{Index: 1} - require.True(t, rt.MaybeRegister(ctx, ent)) - require.Empty(t, rt.MaybeTrace(raftpb.Message{ - Term: 1, - From: 1, - To: 2, - Type: raftpb.MsgAppResp, - Index: uint64(5), - })) - // The second message should not trace. - require.Empty(t, rt.MaybeTrace(raftpb.Message{ - Term: 1, - From: 1, - To: 2, - Type: raftpb.MsgAppResp, - Index: uint64(6), - })) - - output := finish().String() - require.NoError(t, testutils.MatchInOrder(output, []string{"1->2 MsgAppResp Term:1 Index:5"}...)) - require.Error(t, testutils.MatchInOrder(output, []string{"1->2 MsgAppResp Term:1 Index:6"}...)) - require.Equal(t, int64(1), rt.numRegisteredStore.Load()) -} - -func TestTraceMsgStorageAppendResp(t *testing.T) { - rt := createTracer(10) - ctx, finish := tracing.ContextWithRecordingSpan(context.Background(), rt.tracer, "test") - - ent := raftpb.Entry{Index: 1} - require.True(t, rt.MaybeRegister(ctx, ent)) - require.Empty(t, rt.MaybeTrace(raftpb.Message{ - From: 1, - To: 2, - Term: 3, - Type: raftpb.MsgStorageAppendResp, - Index: uint64(5), - LogTerm: uint64(4), - })) - - output := finish().String() - require.NoError(t, testutils.MatchInOrder(output, []string{"1->2 MsgStorageAppendResp Log:4/5"}...)) - require.Equal(t, int64(1), rt.numRegisteredStore.Load()) -} - -func TestDupeMsgStorageAppendResp(t *testing.T) { - rt := createTracer(10) - ctx, finish := tracing.ContextWithRecordingSpan(context.Background(), rt.tracer, "test") - - ent := raftpb.Entry{Index: 1} - require.True(t, rt.MaybeRegister(ctx, ent)) - require.Empty(t, rt.MaybeTrace(raftpb.Message{ - From: 1, - To: 2, - Term: 3, - Type: raftpb.MsgStorageAppendResp, - Index: uint64(5), - LogTerm: uint64(4), - })) - // The second messsage should not trace. - require.Empty(t, rt.MaybeTrace(raftpb.Message{ - From: 5, - To: 6, - Term: 7, - Type: raftpb.MsgStorageAppendResp, - Index: uint64(8), - LogTerm: uint64(9), - })) - - output := finish().String() - require.NoError(t, testutils.MatchInOrder(output, []string{"1->2 MsgStorageAppendResp Log:4/5"}...)) - require.Error(t, testutils.MatchInOrder(output, []string{"5->6 MsgStorageAppendResp"}...)) - require.Equal(t, int64(1), rt.numRegisteredStore.Load()) -} - -func TestNoTraceMsgStorageAppendResp(t *testing.T) { - rt := createTracer(10) - ctx, finish := tracing.ContextWithRecordingSpan(context.Background(), rt.tracer, "test") - - ent := raftpb.Entry{Index: 10} - require.True(t, rt.MaybeRegister(ctx, ent)) - - // This doesn't trace since the index is behind the entry index. - require.Empty(t, rt.MaybeTrace(raftpb.Message{ - From: 1, - To: 2, - Term: 3, - Type: raftpb.MsgStorageAppendResp, - Index: uint64(5), - LogTerm: uint64(4), - })) - - output := finish().String() - require.Error(t, testutils.MatchInOrder(output, []string{"MsgStorageAppendResp"}...)) - require.Equal(t, int64(1), rt.numRegisteredStore.Load()) -} - -func TestTraceMsgStorageApplyResp(t *testing.T) { - rt := createTracer(10) - ctx, finish := tracing.ContextWithRecordingSpan(context.Background(), rt.tracer, "test") - - require.True(t, rt.MaybeRegister(ctx, raftpb.Entry{Index: 1})) - require.Empty(t, rt.MaybeTrace( - raftpb.Message{ - From: 1, - To: 2, - Type: raftpb.MsgStorageApplyResp, - Entries: []raftpb.Entry{ - {Term: 1, Index: 1}, - {Term: 2, Index: 4}, - }, - })) - - output := finish().String() - require.NoError(t, testutils.MatchInOrder(output, - []string{ - `1->2 MsgStorageApplyResp LastEntry:2/4`, - `unregistered log index`, - }...)) - require.Equal(t, int64(0), rt.numRegisteredStore.Load()) -} - -func TestDuplicateIndex(t *testing.T) { - rt := createTracer(10) - ctx1, trace1 := tracing.ContextWithRecordingSpan(context.Background(), rt.tracer, "trace1") - require.True(t, rt.MaybeRegister(ctx1, raftpb.Entry{Index: 1})) - require.Equal(t, int64(1), rt.numRegisteredStore.Load()) - require.Equal(t, int64(1), rt.numRegisteredReplica.Load()) - // This returns true indicating that the index is registered, but it doesn't - // add a new registration. - ctx2, trace2 := tracing.ContextWithRecordingSpan(context.Background(), rt.tracer, "trace2") - require.True(t, rt.MaybeRegister(ctx2, raftpb.Entry{Index: 1})) - require.Equal(t, int64(1), rt.numRegisteredStore.Load()) - require.Equal(t, int64(1), rt.numRegisteredReplica.Load()) - - // Unregister the entry with a MsgStorageApplyResp. - require.Empty(t, rt.MaybeTrace( - raftpb.Message{ - From: 1, - To: 2, - Type: raftpb.MsgStorageApplyResp, - Entries: []raftpb.Entry{ - {Term: 1, Index: 1}, - {Term: 2, Index: 4}, - }, - })) - // We expect the logs to go to the first trace. - output1 := trace1().String() - output2 := trace2().String() - require.NoError(t, testutils.MatchInOrder(output1, - []string{ - `1->2 MsgStorageApplyResp LastEntry:2/4`, - `unregistered log index`, - }...)) - require.NoError(t, testutils.MatchInOrder(output1, - []string{ - `additional registration for same index`, - }...)) - require.Error(t, testutils.MatchInOrder(output2, - []string{ - `1->2 MsgStorageApplyResp LastEntry:2/4`, - `unregistered log index`, - }...)) - require.NoError(t, testutils.MatchInOrder(output2, - []string{ - `duplicate registration ignored`, - }...)) - - require.Equal(t, int64(0), rt.numRegisteredStore.Load()) - require.Equal(t, int64(0), rt.numRegisteredReplica.Load()) -} diff --git a/pkg/kv/kvserver/replica.go b/pkg/kv/kvserver/replica.go index 0b2fbc22408d..1eb4a3369dfc 100644 --- a/pkg/kv/kvserver/replica.go +++ b/pkg/kv/kvserver/replica.go @@ -31,7 +31,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/load" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/logstore" - "github.com/cockroachdb/cockroach/pkg/kv/kvserver/rafttrace" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/rangefeed" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/split" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/stateloader" @@ -892,10 +891,6 @@ type Replica struct { // MsgAppPull <=> LazyReplication. // Updated with both raftMu and mu held. currentRACv2Mode rac2.RaftMsgAppMode - - // raftTracer is used to trace raft messages that are sent with a - // tracing context. - raftTracer rafttrace.RaftTracer } // The raft log truncations that are pending. Access is protected by its own diff --git a/pkg/kv/kvserver/replica_application_decoder.go b/pkg/kv/kvserver/replica_application_decoder.go index 5b9cdf49cb5b..3586137f7c7b 100644 --- a/pkg/kv/kvserver/replica_application_decoder.go +++ b/pkg/kv/kvserver/replica_application_decoder.go @@ -145,7 +145,7 @@ func (d *replicaDecoder) createTracingSpans(ctx context.Context) { propCtx := ctx // raft scheduler's ctx var propSp *tracing.Span // If the client has a trace, put a child into propCtx. - if sp := tracing.SpanFromContext(cmd.proposal.Context()); sp != nil { + if sp := tracing.SpanFromContext(cmd.proposal.ctx); sp != nil { propCtx, propSp = sp.Tracer().StartSpanCtx( propCtx, "local proposal", tracing.WithParent(sp), ) diff --git a/pkg/kv/kvserver/replica_application_result.go b/pkg/kv/kvserver/replica_application_result.go index 998f7258d3a5..4f85db924d3c 100644 --- a/pkg/kv/kvserver/replica_application_result.go +++ b/pkg/kv/kvserver/replica_application_result.go @@ -328,6 +328,7 @@ func (r *Replica) makeReproposal(origP *ProposalData) (reproposal *ProposalData, // span "follows from" the proposal's span, if the proposal sticks around // for (some reincarnation of) the command to eventually apply, its trace // will reflect the reproposal as well. + ctx: origP.ctx, idKey: raftlog.MakeCmdIDKey(), proposedAtTicks: 0, // set in registerProposalLocked createdAtTicks: 0, // set in registerProposalLocked @@ -363,8 +364,6 @@ func (r *Replica) makeReproposal(origP *ProposalData) (reproposal *ProposalData, seedProposal: seedP, } - origCtx := origP.Context() - newProposal.ctx.Store(&origCtx) return newProposal, func() { // If the original proposal had an explicit span, it's an async consensus @@ -395,8 +394,7 @@ func (r *Replica) makeReproposal(origP *ProposalData) (reproposal *ProposalData, // // TODO(radu): Should this context be created via tracer.ForkSpan? // We'd need to make sure the span is finished eventually. - ctx := r.AnnotateCtx(context.TODO()) - origP.ctx.Store(&ctx) + origP.ctx = r.AnnotateCtx(context.TODO()) seedP.lastReproposal = newProposal } } diff --git a/pkg/kv/kvserver/replica_application_result_test.go b/pkg/kv/kvserver/replica_application_result_test.go index 51a83b9a50bd..c5f2dfc996b4 100644 --- a/pkg/kv/kvserver/replica_application_result_test.go +++ b/pkg/kv/kvserver/replica_application_result_test.go @@ -37,7 +37,8 @@ func makeProposalData() *ProposalData { AdmissionOriginNode: 1, } - prop := ProposalData{ + return &ProposalData{ + ctx: context.WithValue(context.Background(), struct{}{}, "nonempty-ctx"), sp: &tracing.Span{}, idKey: "deadbeef", proposedAtTicks: 1, @@ -57,9 +58,6 @@ func makeProposalData() *ProposalData { seedProposal: nil, lastReproposal: nil, } - ctx := context.WithValue(context.Background(), struct{}{}, "nonempty-ctx") - prop.ctx.Store(&ctx) - return &prop } func TestProposalDataAndRaftCommandAreConsideredWhenAddingFields(t *testing.T) { @@ -75,8 +73,8 @@ func TestProposalDataAndRaftCommandAreConsideredWhenAddingFields(t *testing.T) { // NB: we can't use zerofields for two reasons: First, we have unexported fields // here, and second, we don't want to check for recursively populated structs (but // only for the top level fields). - require.Equal(t, 10, reflect.Indirect(reflect.ValueOf(prop.command)).NumField()) - require.Equal(t, 19, reflect.Indirect(reflect.ValueOf(prop)).NumField()) + require.Equal(t, 10, reflect.TypeOf(*prop.command).NumField()) + require.Equal(t, 19, reflect.TypeOf(*prop).NumField()) } func TestReplicaMakeReproposalChaininig(t *testing.T) { @@ -86,7 +84,7 @@ func TestReplicaMakeReproposalChaininig(t *testing.T) { var r Replica proposals := make([]*ProposalData, 1, 4) proposals[0] = makeProposalData() - sharedCtx := proposals[0].Context() + sharedCtx := proposals[0].ctx verify := func() { seed := proposals[0] @@ -104,9 +102,9 @@ func TestReplicaMakeReproposalChaininig(t *testing.T) { } // Only the latest reproposal must use the seed context. for _, prop := range proposals[:len(proposals)-1] { - require.NotEqual(t, sharedCtx, prop.Context()) + require.NotEqual(t, sharedCtx, prop.ctx) } - require.Equal(t, sharedCtx, proposals[len(proposals)-1].Context()) + require.Equal(t, sharedCtx, proposals[len(proposals)-1].ctx) } verify() diff --git a/pkg/kv/kvserver/replica_destroy.go b/pkg/kv/kvserver/replica_destroy.go index 3d730e240094..553b5e012fd7 100644 --- a/pkg/kv/kvserver/replica_destroy.go +++ b/pkg/kv/kvserver/replica_destroy.go @@ -181,5 +181,4 @@ func (r *Replica) disconnectReplicationRaftMuLocked(ctx context.Context) { log.Fatalf(ctx, "removing raft group before destroying replica %s", r) } r.mu.internalRaftGroup = nil - r.mu.raftTracer.Close() } diff --git a/pkg/kv/kvserver/replica_init.go b/pkg/kv/kvserver/replica_init.go index 193f98f9e57c..c1691a2d903c 100644 --- a/pkg/kv/kvserver/replica_init.go +++ b/pkg/kv/kvserver/replica_init.go @@ -21,7 +21,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvstorage" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/load" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/logstore" - "github.com/cockroachdb/cockroach/pkg/kv/kvserver/rafttrace" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/split" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/stateloader" "github.com/cockroachdb/cockroach/pkg/raft" @@ -155,7 +154,7 @@ func newUninitializedReplicaWithoutRaftGroup( } // Expose proposal data for external test packages. return store.cfg.TestingKnobs.TestingProposalSubmitFilter(kvserverbase.ProposalFilterArgs{ - Ctx: p.Context(), + Ctx: p.ctx, RangeID: rangeID, StoreID: store.StoreID(), ReplicaID: replicaID, @@ -329,7 +328,6 @@ func (r *Replica) initRaftGroupRaftMuLockedReplicaMuLocked() error { return err } r.mu.internalRaftGroup = rg - r.mu.raftTracer = *rafttrace.NewRaftTracer(ctx, r.Tracer, r.ClusterSettings(), &r.store.concurrentRaftTraces) r.flowControlV2.InitRaftLocked( ctx, replica_rac2.NewRaftNode(rg, (*replicaForRACv2)(r)), rg.LogMark()) return nil diff --git a/pkg/kv/kvserver/replica_proposal.go b/pkg/kv/kvserver/replica_proposal.go index c0261d33b8eb..20881c02b945 100644 --- a/pkg/kv/kvserver/replica_proposal.go +++ b/pkg/kv/kvserver/replica_proposal.go @@ -9,7 +9,6 @@ import ( "context" "os" "path/filepath" - "sync/atomic" "time" "github.com/cockroachdb/cockroach/pkg/keys" @@ -117,15 +116,7 @@ type ProposalData struct { // that during command application one should always use `replicatedCmd.ctx` // for best coverage. `p.ctx` should be used when a `replicatedCmd` is not in // scope, i.e. outside of raft command application. - // - // The context may be updated during the proposal lifecycle but will never - // be nil. To clear out the context, set it to context.Background(). It is - // protected by an atomic pointer because it can be read without holding the - // raftMu. Use ProposalData.Context() to read it. - // - // TODO(baptist): Track down all the places where we read and write ctx and - // determine whether we can convert this back to non-atomic field. - ctx atomic.Pointer[context.Context] + ctx context.Context // An optional tracing span bound to the proposal in the case of async // consensus (it will be referenced by p.ctx). We need to finish this span @@ -225,12 +216,6 @@ type ProposalData struct { lastReproposal *ProposalData } -// Context returns the context associated with the proposal. The context may -// change during the lifetime of the proposal. -func (proposal *ProposalData) Context() context.Context { - return *proposal.ctx.Load() -} - // useReplicationAdmissionControl indicates whether this raft command should // be subject to replication admission control. func (proposal *ProposalData) useReplicationAdmissionControl() bool { @@ -285,8 +270,7 @@ func (proposal *ProposalData) signalProposalResult(pr proposalResult) { // // NB: `proposal.ec.repl` might already have been cleared if we arrive here // through finishApplication. - ctx := context.Background() - proposal.ctx.Store(&ctx) + proposal.ctx = context.Background() } } @@ -1066,13 +1050,13 @@ func (r *Replica) requestToProposal( // Fill out the results even if pErr != nil; we'll return the error below. proposal := &ProposalData{ + ctx: ctx, idKey: idKey, doneCh: make(chan proposalResult, 1), Local: &res.Local, Request: ba, leaseStatus: *st, } - proposal.ctx.Store(&ctx) if needConsensus { proposal.command = &kvserverpb.RaftCommand{ diff --git a/pkg/kv/kvserver/replica_proposal_buf.go b/pkg/kv/kvserver/replica_proposal_buf.go index 5d96eceb0098..633568fa7b5e 100644 --- a/pkg/kv/kvserver/replica_proposal_buf.go +++ b/pkg/kv/kvserver/replica_proposal_buf.go @@ -126,7 +126,6 @@ type singleBatchProposer interface { getReplicaID() roachpb.ReplicaID flowControlHandle(ctx context.Context) kvflowcontrol.Handle onErrProposalDropped([]raftpb.Entry, []*ProposalData, raftpb.StateType) - registerForTracing(*ProposalData, raftpb.Entry) bool } // A proposer is an object that uses a propBuf to coordinate Raft proposals. @@ -256,7 +255,7 @@ func (b *propBuf) Insert(ctx context.Context, p *ProposalData, tok TrackedReques } if log.V(4) { - log.Infof(p.Context(), "submitting proposal %x", p.idKey) + log.Infof(p.ctx, "submitting proposal %x", p.idKey) } // Insert the proposal into the buffer's array. The buffer now takes ownership @@ -572,7 +571,7 @@ func (b *propBuf) FlushLockedWithRaftGroup( Data: p.encodedCommand, }) nextProp++ - log.VEvent(p.Context(), 2, "flushing proposal to Raft") + log.VEvent(p.ctx, 2, "flushing proposal to Raft") // We don't want deduct flow tokens for reproposed commands, and of // course for proposals that didn't integrate with kvflowcontrol. @@ -582,7 +581,7 @@ func (b *propBuf) FlushLockedWithRaftGroup( } else { admitHandles = append(admitHandles, admitEntHandle{ handle: p.raftAdmissionMeta, - pCtx: p.Context(), + pCtx: p.ctx, }) } } @@ -870,34 +869,26 @@ func proposeBatch( // TODO(bdarnell): Handle ErrProposalDropped better. // https://github.com/cockroachdb/cockroach/issues/21849 for _, p := range props { - log.Event(p.Context(), "entry dropped") + if p.ctx != nil { + log.Event(p.ctx, "entry dropped") + } } p.onErrProposalDropped(ents, props, raftGroup.BasicStatus().RaftState) return nil //nolint:returnerrcheck } - if err != nil { - return err - } - // Now that we know what raft log position[1] this proposal is to end up - // in, deduct flow tokens for it. This is done without blocking (we've - // already waited for available flow tokens pre-evaluation). The tokens - // will later be returned once we're informed of the entry being - // admitted below raft. - // - // [1]: We're relying on an undocumented side effect of upstream raft - // API where it populates the index and term for the passed in - // slice of entries. See etcd-io/raft#57. - maybeDeductFlowTokens(ctx, p.flowControlHandle(ctx), handles, ents) - - // Register the proposal with rafttrace. This will add the trace to the raft - // lifecycle. We trace at most one entry per batch, so break after the first - // one is successfully registered. - for i := range ents { - if p.registerForTracing(props[i], ents[i]) { - break - } + if err == nil { + // Now that we know what raft log position[1] this proposal is to end up + // in, deduct flow tokens for it. This is done without blocking (we've + // already waited for available flow tokens pre-evaluation). The tokens + // will later be returned once we're informed of the entry being + // admitted below raft. + // + // [1]: We're relying on an undocumented side effect of upstream raft + // API where it populates the index and term for the passed in + // slice of entries. See etcd-io/raft#57. + maybeDeductFlowTokens(ctx, p.flowControlHandle(ctx), handles, ents) } - return nil + return err } func maybeDeductFlowTokens( @@ -1184,10 +1175,6 @@ func (rp *replicaProposer) closedTimestampTarget() hlc.Timestamp { return (*Replica)(rp).closedTimestampTargetRLocked() } -func (rp *replicaProposer) registerForTracing(p *ProposalData, e raftpb.Entry) bool { - return (*Replica)(rp).mu.raftTracer.MaybeRegister(p.Context(), e) -} - func (rp *replicaProposer) withGroupLocked(fn func(raftGroup proposerRaft) error) error { return (*Replica)(rp).withRaftGroupLocked(func(raftGroup *raft.RawNode) (bool, error) { // We're proposing a command here so there is no need to wake the leader diff --git a/pkg/kv/kvserver/replica_proposal_buf_test.go b/pkg/kv/kvserver/replica_proposal_buf_test.go index 126febaaa575..bdb47cb3a7eb 100644 --- a/pkg/kv/kvserver/replica_proposal_buf_test.go +++ b/pkg/kv/kvserver/replica_proposal_buf_test.go @@ -217,8 +217,6 @@ func (t *testProposer) campaignLocked(ctx context.Context) { } } -func (t *testProposer) registerForTracing(*ProposalData, raftpb.Entry) bool { return true } - func (t *testProposer) rejectProposalWithErrLocked(_ context.Context, _ *ProposalData, err error) { if t.onRejectProposalWithErrLocked == nil { panic(fmt.Sprintf("unexpected rejectProposalWithErrLocked call: err=%v", err)) @@ -303,6 +301,7 @@ func (pc proposalCreator) newProposal(ba *kvpb.BatchRequest) *ProposalData { } } p := &ProposalData{ + ctx: context.Background(), idKey: kvserverbase.CmdIDKey("test-cmd"), command: &kvserverpb.RaftCommand{ ReplicatedEvalResult: kvserverpb.ReplicatedEvalResult{ @@ -314,8 +313,6 @@ func (pc proposalCreator) newProposal(ba *kvpb.BatchRequest) *ProposalData { Request: ba, leaseStatus: pc.lease, } - ctx := context.Background() - p.ctx.Store(&ctx) p.encodedCommand = pc.encodeProposal(p) return p } diff --git a/pkg/kv/kvserver/replica_raft.go b/pkg/kv/kvserver/replica_raft.go index 505fdb840bda..7664c454797a 100644 --- a/pkg/kv/kvserver/replica_raft.go +++ b/pkg/kv/kvserver/replica_raft.go @@ -123,7 +123,7 @@ func (r *Replica) evalAndPropose( idKey := raftlog.MakeCmdIDKey() proposal, pErr := r.requestToProposal(ctx, idKey, ba, g, st, ui) ba = proposal.Request // may have been updated - log.Event(proposal.Context(), "evaluated request") + log.Event(proposal.ctx, "evaluated request") // If the request hit a server-side concurrency retry error, immediately // propagate the error. Don't assume ownership of the concurrency guard. @@ -168,7 +168,7 @@ func (r *Replica) evalAndPropose( // from this point on. proposal.ec = makeReplicatedEndCmds(r, g, *st, timeutil.Now()) - log.VEventf(proposal.Context(), 2, + log.VEventf(proposal.ctx, 2, "proposing command to write %d new keys, %d new values, %d new intents, "+ "write batch size=%d bytes", proposal.command.ReplicatedEvalResult.Delta.KeyCount, @@ -204,9 +204,7 @@ func (r *Replica) evalAndPropose( // Fork the proposal's context span so that the proposal's context // can outlive the original proposer's context. - ctx, sp := tracing.ForkSpan(ctx, "async consensus") - proposal.ctx.Store(&ctx) - proposal.sp = sp + proposal.ctx, proposal.sp = tracing.ForkSpan(ctx, "async consensus") if proposal.sp != nil { // We can't leak this span if we fail to hand the proposal to the // replication layer, so finish it later in this method if we are to @@ -281,7 +279,7 @@ func (r *Replica) evalAndPropose( "command is too large: %d bytes (max: %d)", quotaSize, maxSize, )) } - log.VEventf(proposal.Context(), 2, "acquiring proposal quota (%d bytes)", quotaSize) + log.VEventf(proposal.ctx, 2, "acquiring proposal quota (%d bytes)", quotaSize) var err error proposal.quotaAlloc, err = r.maybeAcquireProposalQuota(ctx, ba, quotaSize) if err != nil { @@ -351,8 +349,7 @@ func (r *Replica) evalAndPropose( } // TODO(radu): Should this context be created via tracer.ForkSpan? // We'd need to make sure the span is finished eventually. - ctx := r.AnnotateCtx(context.TODO()) - last.ctx.Store(&ctx) + last.ctx = r.AnnotateCtx(context.TODO()) } return proposalCh, abandon, idKey, writeBytes, nil } @@ -399,12 +396,12 @@ func (r *Replica) propose( log.Errorf(ctx, "%v", err) return kvpb.NewError(err) } - log.KvDistribution.Infof(p.Context(), "proposing %s", crt) + log.KvDistribution.Infof(p.ctx, "proposing %s", crt) } else if p.command.ReplicatedEvalResult.AddSSTable != nil { - log.VEvent(p.Context(), 4, "sideloadable proposal detected") + log.VEvent(p.ctx, 4, "sideloadable proposal detected") r.store.metrics.AddSSTableProposals.Inc(1) } else if log.V(4) { - log.Infof(p.Context(), "proposing command %x: %s", p.idKey, p.Request.Summary()) + log.Infof(p.ctx, "proposing command %x: %s", p.idKey, p.Request.Summary()) } raftAdmissionMeta := p.raftAdmissionMeta @@ -433,7 +430,7 @@ func (r *Replica) propose( // Too verbose even for verbose logging, so manually enable if you want to // debug proposal sizes. if false { - log.Infof(p.Context(), `%s: proposal: %d + log.Infof(p.ctx, `%s: proposal: %d RaftCommand.ReplicatedEvalResult: %d RaftCommand.ReplicatedEvalResult.Delta: %d RaftCommand.WriteBatch: %d @@ -450,7 +447,7 @@ func (r *Replica) propose( // TODO(tschottdorf): can we mark them so lightstep can group them? const largeProposalEventThresholdBytes = 2 << 19 // 512kb if ln := len(p.encodedCommand); ln > largeProposalEventThresholdBytes { - log.Eventf(p.Context(), "proposal is large: %s", humanizeutil.IBytes(int64(ln))) + log.Eventf(p.ctx, "proposal is large: %s", humanizeutil.IBytes(int64(ln))) } // Insert into the proposal buffer, which passes the command to Raft to be @@ -459,7 +456,7 @@ func (r *Replica) propose( // // NB: we must not hold r.mu while using the proposal buffer, see comment // on the field. - log.VEvent(p.Context(), 2, "submitting proposal to proposal buffer") + log.VEvent(p.ctx, 2, "submitting proposal to proposal buffer") if err := r.mu.proposalBuf.Insert(ctx, p, tok.Move(ctx)); err != nil { return kvpb.NewError(err) } @@ -638,11 +635,6 @@ func (r *Replica) stepRaftGroupRaftMuLocked(req *kvserverpb.RaftMessageRequest) var sideChannelInfo replica_rac2.SideChannelInfoUsingRaftMessageRequest var admittedVector rac2.AdmittedVector err := r.withRaftGroup(func(raftGroup *raft.RawNode) (bool, error) { - // If this message requested tracing, begin tracing it. - for _, e := range req.TracedEntries { - r.mu.raftTracer.RegisterRemote(e) - } - r.mu.raftTracer.MaybeTrace(req.Message) // We're processing an incoming raft message (from a batch that may // include MsgVotes), so don't campaign if we wake up our raft // group. @@ -1007,7 +999,8 @@ func (r *Replica) handleRaftReadyRaftMuLocked( // Even if we don't have a Ready, or entries in Ready, // replica_rac2.Processor may need to do some work. raftEvent := rac2.RaftEventFromMsgStorageAppendAndMsgApps( - rac2ModeForReady, r.ReplicaID(), msgStorageAppend, outboundMsgs, logSnapshot, + rac2ModeForReady, r.ReplicaID(), msgStorageAppend, outboundMsgs, + replica_rac2.RaftLogSnapshot(logSnapshot), r.raftMu.msgAppScratchForFlowControl, replicaStateInfoMap) r.flowControlV2.HandleRaftReadyRaftMuLocked(ctx, raftNodeBasicState, raftEvent) if !hasReady { @@ -1216,7 +1209,6 @@ func (r *Replica) handleRaftReadyRaftMuLocked( } } - r.mu.raftTracer.MaybeTrace(msgStorageAppend) if state, err = s.StoreEntries(ctx, state, app, cb, &stats.append); err != nil { return stats, errors.Wrap(err, "while storing log entries") } @@ -1248,7 +1240,6 @@ func (r *Replica) handleRaftReadyRaftMuLocked( stats.tApplicationBegin = timeutil.Now() if hasMsg(msgStorageApply) { - r.mu.raftTracer.MaybeTrace(msgStorageApply) r.traceEntries(msgStorageApply.Entries, "committed, before applying any entries") err := appTask.ApplyCommittedEntries(ctx) @@ -1569,7 +1560,7 @@ func (r *Replica) processRACv2RangeController(ctx context.Context) { } } r.flowControlV2.ProcessSchedulerEventRaftMuLocked( - ctx, r.mu.currentRACv2Mode, logSnapshot) + ctx, r.mu.currentRACv2Mode, replica_rac2.RaftLogSnapshot(logSnapshot)) } // SendMsgApp implements rac2.MsgAppSender. @@ -1667,7 +1658,7 @@ func (r *Replica) refreshProposalsLocked( // up here too. if p.command.MaxLeaseIndex <= r.shMu.state.LeaseAppliedIndex { r.cleanupFailedProposalLocked(p) - log.Eventf(p.Context(), "retry proposal %x: %s", p.idKey, reason) + log.Eventf(p.ctx, "retry proposal %x: %s", p.idKey, reason) p.finishApplication(ctx, makeProposalResultErr( kvpb.NewAmbiguousResultErrorf( "unable to determine whether command was applied via snapshot", @@ -1735,7 +1726,7 @@ func (r *Replica) refreshProposalsLocked( // definitely required, however. sort.Sort(reproposals) for _, p := range reproposals { - log.Eventf(p.Context(), "re-submitting command %x (MLI %d, CT %s): %s", + log.Eventf(p.ctx, "re-submitting command %x (MLI %d, CT %s): %s", p.idKey, p.command.MaxLeaseIndex, p.command.ClosedTimestamp, reason) if err := r.mu.proposalBuf.ReinsertLocked(ctx, p); err != nil { r.cleanupFailedProposalLocked(p) @@ -1998,7 +1989,6 @@ func (r *Replica) deliverLocalRaftMsgsRaftMuLockedReplicaMuLocked( } for i, m := range localMsgs { - r.mu.raftTracer.MaybeTrace(m) if err := raftGroup.Step(m); err != nil { log.Fatalf(ctx, "unexpected error stepping local raft message [%s]: %v", raft.DescribeMessage(m, raftEntryFormatter), err) @@ -2022,7 +2012,6 @@ func (r *Replica) sendRaftMessage( lastToReplica, lastFromReplica := r.getLastReplicaDescriptors() r.mu.RLock() - traced := r.mu.raftTracer.MaybeTrace(msg) fromReplica, fromErr := r.getReplicaDescriptorByIDRLocked(roachpb.ReplicaID(msg.From), lastToReplica) toReplica, toErr := r.getReplicaDescriptorByIDRLocked(roachpb.ReplicaID(msg.To), lastFromReplica) var startKey roachpb.RKey @@ -2075,7 +2064,6 @@ func (r *Replica) sendRaftMessage( RangeStartKey: startKey, // usually nil UsingRac2Protocol: r.flowControlV2.GetEnabledWhenLeader() >= kvflowcontrol.V2EnabledWhenLeaderV1Encoding, LowPriorityOverride: lowPriorityOverride, - TracedEntries: traced, } // For RACv2, annotate successful MsgAppResp messages with the vector of // admitted log indices, by priority. diff --git a/pkg/kv/kvserver/replica_store_liveness.go b/pkg/kv/kvserver/replica_store_liveness.go index 274e84285028..9ef9929cb1be 100644 --- a/pkg/kv/kvserver/replica_store_liveness.go +++ b/pkg/kv/kvserver/replica_store_liveness.go @@ -18,6 +18,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/settings" "github.com/cockroachdb/cockroach/pkg/util/envutil" "github.com/cockroachdb/cockroach/pkg/util/hlc" + "github.com/cockroachdb/cockroach/pkg/util/log" ) var raftLeaderFortificationFractionEnabled = settings.RegisterFloatSetting( @@ -59,18 +60,12 @@ func (r *replicaRLockedStoreLiveness) getStoreIdent( func (r *replicaRLockedStoreLiveness) SupportFor(replicaID raftpb.PeerID) (raftpb.Epoch, bool) { storeID, ok := r.getStoreIdent(replicaID) if !ok { - return 0, false - } - // TODO(arul): we can remove this once we start to assign storeLiveness in the - // Store constructor. - if r.store.storeLiveness == nil { + ctx := r.AnnotateCtx(context.TODO()) + log.Warningf(ctx, "store not found for replica %d in SupportFor", replicaID) return 0, false } epoch, ok := r.store.storeLiveness.SupportFor(storeID) - if !ok { - return 0, false - } - return raftpb.Epoch(epoch), true + return raftpb.Epoch(epoch), ok } // SupportFrom implements the raftstoreliveness.StoreLiveness interface. @@ -79,6 +74,8 @@ func (r *replicaRLockedStoreLiveness) SupportFrom( ) (raftpb.Epoch, hlc.Timestamp) { storeID, ok := r.getStoreIdent(replicaID) if !ok { + ctx := r.AnnotateCtx(context.TODO()) + log.Warningf(ctx, "store not found for replica %d in SupportFrom", replicaID) return 0, hlc.Timestamp{} } epoch, exp := r.store.storeLiveness.SupportFrom(storeID) diff --git a/pkg/kv/kvserver/replica_test.go b/pkg/kv/kvserver/replica_test.go index 1b307a9cab69..b233127bedd1 100644 --- a/pkg/kv/kvserver/replica_test.go +++ b/pkg/kv/kvserver/replica_test.go @@ -7772,7 +7772,7 @@ func TestReplicaAbandonProposal(t *testing.T) { dropProp := int32(1) tc.repl.mu.Lock() tc.repl.mu.proposalBuf.testing.submitProposalFilter = func(p *ProposalData) (drop bool, _ error) { - if v := p.Context().Value(magicKey{}); v != nil { + if v := p.ctx.Value(magicKey{}); v != nil { cancel() return atomic.LoadInt32(&dropProp) == 1, nil } @@ -7890,7 +7890,7 @@ func TestReplicaRetryRaftProposal(t *testing.T) { tc.repl.mu.Lock() tc.repl.mu.proposalBuf.testing.leaseIndexFilter = func(p *ProposalData) (indexOverride kvpb.LeaseAppliedIndex) { - if v := p.Context().Value(magicKey{}); v != nil { + if v := p.ctx.Value(magicKey{}); v != nil { if curAttempt := atomic.AddInt32(&c, 1); curAttempt == 1 { return wrongLeaseIndex } @@ -7994,7 +7994,7 @@ func TestReplicaCancelRaftCommandProgress(t *testing.T) { abandoned := make(map[kvserverbase.CmdIDKey]struct{}) // protected by repl.mu tc.repl.mu.proposalBuf.testing.submitProposalFilter = func(p *ProposalData) (drop bool, _ error) { if _, ok := abandoned[p.idKey]; ok { - log.Infof(p.Context(), "abandoning command") + log.Infof(p.ctx, "abandoning command") return true, nil } return false, nil @@ -8066,7 +8066,7 @@ func TestReplicaBurstPendingCommandsAndRepropose(t *testing.T) { if atomic.LoadInt32(&dropAll) == 1 { return true, nil } - if v := p.Context().Value(magicKey{}); v != nil { + if v := p.ctx.Value(magicKey{}); v != nil { seenCmds = append(seenCmds, int(p.command.MaxLeaseIndex)) } return false, nil @@ -8098,7 +8098,7 @@ func TestReplicaBurstPendingCommandsAndRepropose(t *testing.T) { } origIndexes := make([]int, 0, num) for _, p := range tc.repl.mu.proposals { - if v := p.Context().Value(magicKey{}); v != nil { + if v := p.ctx.Value(magicKey{}); v != nil { origIndexes = append(origIndexes, int(p.command.MaxLeaseIndex)) } } diff --git a/pkg/kv/kvserver/store.go b/pkg/kv/kvserver/store.go index 46867e63aee3..3ec55dbaf5bb 100644 --- a/pkg/kv/kvserver/store.go +++ b/pkg/kv/kvserver/store.go @@ -942,11 +942,6 @@ type Store struct { // has likely improved). draining atomic.Bool - // concurrentRaftTraces is the number of concurrent raft trace requests that - // are currently registered. This limit is used to prevent extensive raft - // tracing from inadvertently impacting performance. - concurrentRaftTraces atomic.Int64 - // Locking notes: To avoid deadlocks, the following lock order must be // obeyed: baseQueue.mu < Replica.raftMu < Replica.readOnlyCmdMu < Store.mu // < Replica.mu < Replica.unreachablesMu < Store.coalescedMu < Store.scheduler.mu. diff --git a/pkg/kv/kvserver/store_snapshot.go b/pkg/kv/kvserver/store_snapshot.go index 4f83858857e3..4d0aee57e28e 100644 --- a/pkg/kv/kvserver/store_snapshot.go +++ b/pkg/kv/kvserver/store_snapshot.go @@ -735,13 +735,14 @@ func (kvSS *kvBatchSnapshotStrategy) Receive( var prevWriteBytes int64 snapshotQ := s.cfg.KVAdmissionController.GetSnapshotQueue(s.StoreID()) + if snapshotQ == nil { + log.Errorf(ctx, "unable to find snapshot queue for store: %s", s.StoreID()) + } // Using a nil pacer is effectively a noop if snapshot control is disabled. var pacer *admission.SnapshotPacer = nil - if admission.DiskBandwidthForSnapshotIngest.Get(&s.cfg.Settings.SV) { - pacer = admission.NewSnapshotPacer(snapshotQ, s.StoreID()) + if admission.DiskBandwidthForSnapshotIngest.Get(&s.cfg.Settings.SV) && snapshotQ != nil { + pacer = admission.NewSnapshotPacer(snapshotQ) } - // It is safe to call Close() on a nil pacer. - defer pacer.Close() for { timingTag.start("recv") diff --git a/pkg/kv/kvserver/testdata/replica_unavailable_error.txt b/pkg/kv/kvserver/testdata/replica_unavailable_error.txt index aeb6077ecd85..430776562c70 100644 --- a/pkg/kv/kvserver/testdata/replica_unavailable_error.txt +++ b/pkg/kv/kvserver/testdata/replica_unavailable_error.txt @@ -1,3 +1,3 @@ echo ---- -replica unavailable: (n1,s10):1 unable to serve request to r10:‹{a-z}› [(n1,s10):1, (n2,s20):2, next=3, gen=0]: lost quorum (down: (n2,s20):2); closed timestamp: 1136214245.000000000,0 (2006-01-02 15:04:05); raft status: {"id":"0","term":0,"vote":"0","commit":0,"lead":"0","raftState":"StateFollower","applied":0,"progress":{},"leadtransferee":"0"}: probe failed +replica unavailable: (n1,s10):1 unable to serve request to r10:‹{a-z}› [(n1,s10):1, (n2,s20):2, next=3, gen=0]: lost quorum (down: (n2,s20):2); closed timestamp: 1136214245.000000000,0 (2006-01-02 15:04:05); raft status: {"id":"0","term":0,"vote":"0","commit":0,"lead":"0","leadEpoch":"0","raftState":"StateFollower","applied":0,"progress":{},"leadtransferee":"0"}: probe failed diff --git a/pkg/raft/BUILD.bazel b/pkg/raft/BUILD.bazel index 86ce5ee107ae..f188e3af8430 100644 --- a/pkg/raft/BUILD.bazel +++ b/pkg/raft/BUILD.bazel @@ -27,6 +27,7 @@ go_library( "//pkg/raft/tracker", "//pkg/util/hlc", "@com_github_cockroachdb_errors//:errors", + "@com_github_cockroachdb_redact//:redact", "@org_golang_x_exp//maps", ], ) diff --git a/pkg/raft/node_test.go b/pkg/raft/node_test.go index f84e4f1650ef..6760383145bb 100644 --- a/pkg/raft/node_test.go +++ b/pkg/raft/node_test.go @@ -790,25 +790,25 @@ func TestNodeCommitPaginationAfterRestart(t *testing.T) { } s.hardState = persistedHardState - entries := make([]raftpb.Entry, 10) + s.ents = make([]raftpb.Entry, 10) var size uint64 - for i := range entries { + for i := range s.ents { ent := raftpb.Entry{ Term: 1, Index: uint64(i + 1), Type: raftpb.EntryNormal, Data: []byte("a"), } - entries[i] = ent + + s.ents[i] = ent size += uint64(ent.Size()) } - s.ls = LogSlice{term: 1, entries: entries} cfg := newTestConfig(1, 10, 1, s) // Set a MaxSizePerMsg that would suggest to Raft that the last committed entry should // not be included in the initial rd.CommittedEntries. However, our storage will ignore // this and *will* return it (which is how the Commit index ended up being 10 initially). - cfg.MaxSizePerMsg = size - uint64(entries[len(entries)-1].Size()) - 1 + cfg.MaxSizePerMsg = size - uint64(s.ents[len(s.ents)-1].Size()) - 1 rn, err := NewRawNode(cfg) require.NoError(t, err) diff --git a/pkg/raft/quorum/joint.go b/pkg/raft/quorum/joint.go index e806bff5dbcc..14f5c3c2ecb2 100644 --- a/pkg/raft/quorum/joint.go +++ b/pkg/raft/quorum/joint.go @@ -45,6 +45,20 @@ func (c JointConfig) IDs() map[pb.PeerID]struct{} { return m } +// Visit calls the given function for each unique voter ID in the joint +// configuration. +func (c JointConfig) Visit(f func(pb.PeerID)) { + for id := range c[0] { + f(id) + } + for id := range c[1] { + if _, ok := c[0][id]; ok { + continue // skip duplicate + } + f(id) + } +} + // Describe returns a (multi-line) representation of the commit indexes for the // given lookuper. func (c JointConfig) Describe(l AckedIndexer) string { diff --git a/pkg/raft/quorum/quorum_test.go b/pkg/raft/quorum/quorum_test.go index 7c0924b6720a..8da15666ba0e 100644 --- a/pkg/raft/quorum/quorum_test.go +++ b/pkg/raft/quorum/quorum_test.go @@ -6,6 +6,7 @@ package quorum import ( + "slices" "testing" pb "github.com/cockroachdb/cockroach/pkg/raft/raftpb" @@ -134,3 +135,21 @@ func TestLeadSupportExpirationJointConfig(t *testing.T) { require.Equal(t, tc.exp, j.LeadSupportExpiration(tc.support)) } } + +func TestJointConfigVisit(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + j := JointConfig{ + MajorityConfig{1: struct{}{}, 2: struct{}{}, 3: struct{}{}}, + MajorityConfig{2: struct{}{}, 3: struct{}{}, 4: struct{}{}}, + } + + var visited []pb.PeerID + j.Visit(func(id pb.PeerID) { + visited = append(visited, id) + }) + slices.Sort(visited) + + require.Equal(t, []pb.PeerID{1, 2, 3, 4}, visited) +} diff --git a/pkg/raft/raft.go b/pkg/raft/raft.go index 7c5245a6d5ce..8e7e3ce05d52 100644 --- a/pkg/raft/raft.go +++ b/pkg/raft/raft.go @@ -36,6 +36,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/raft/raftstoreliveness" "github.com/cockroachdb/cockroach/pkg/raft/tracker" "github.com/cockroachdb/errors" + "github.com/cockroachdb/redact" "golang.org/x/exp/maps" ) @@ -1418,12 +1419,20 @@ func (r *raft) Step(m pb.Message) error { if m.Type == pb.MsgVote || m.Type == pb.MsgPreVote { force := bytes.Equal(m.Context, []byte(campaignTransfer)) inHeartbeatLease := r.checkQuorum && r.lead != None && r.electionElapsed < r.electionTimeout - // NB: A fortified leader is allowed to bump its term. It'll need to - // re-fortify once if it gets elected at the higher term though, so the - // leader must take care to not regress its supported expiration. However, - // at the follower, we grant the fortified leader our vote at the higher - // term. - inFortifyLease := r.supportingFortifiedLeader() && r.lead != m.From + inFortifyLease := r.supportingFortifiedLeader() && + // NB: A fortified leader is allowed to bump its term. It'll need to + // re-fortify once if it gets elected at the higher term though, so the + // leader must take care to not regress its supported expiration. + // However, at the follower, we grant the fortified leader our vote at + // the higher term. + r.lead != m.From && + // NB: If the peer that's campaigning has an entry in its log with a + // higher term than what we're aware of, then this conclusively proves + // that a new leader was elected at a higher term. We never heard from + // this new leader (otherwise we'd have bumped r.Term in response). + // However, any fortification we're providing to a leader that has been + // since dethroned is pointless. + m.LogTerm <= r.Term if !force && (inHeartbeatLease || inFortifyLease) { // If a server receives a Request{,Pre}Vote message but is still // supporting a fortified leader, it does not update its term or grant @@ -1432,14 +1441,14 @@ func (r *raft) Step(m pb.Message) error { // leader it does not update its term or grant its vote. { // Log why we're ignoring the Request{,Pre}Vote. - var inHeartbeatLeaseMsg string - var inFortifyLeaseMsg string - var sep string + var inHeartbeatLeaseMsg redact.RedactableString + var inFortifyLeaseMsg redact.RedactableString + var sep redact.SafeString if inHeartbeatLease { - inHeartbeatLeaseMsg = fmt.Sprintf("recently received communication from leader (remaining ticks: %d)", r.electionTimeout-r.electionElapsed) + inHeartbeatLeaseMsg = redact.Sprintf("recently received communication from leader (remaining ticks: %d)", r.electionTimeout-r.electionElapsed) } if inFortifyLease { - inFortifyLeaseMsg = fmt.Sprintf("supporting fortified leader %d at epoch %d", r.lead, r.leadEpoch) + inFortifyLeaseMsg = redact.Sprintf("supporting fortified leader %d at epoch %d", r.lead, r.leadEpoch) } if inFortifyLease && inHeartbeatLease { sep = " and " @@ -1551,11 +1560,12 @@ func (r *raft) Step(m pb.Message) error { case pb.MsgVote, pb.MsgPreVote: // We can vote if this is a repeat of a vote we've already cast... canVote := r.Vote == m.From || - // ...we haven't voted and we don't think there's a leader yet in this term... + // ...OR we haven't voted and we don't think there's a leader yet in this + // term... (r.Vote == None && r.lead == None) || - // ...or this is a PreVote for a future term... + // ...OR this is a PreVote for a future term... (m.Type == pb.MsgPreVote && m.Term > r.Term) - // ...and we believe the candidate is up to date. + // ...AND we believe the candidate is up to date. lastID := r.raftLog.lastEntryID() candLastID := entryID{term: m.LogTerm, index: m.Index} if canVote && r.raftLog.isUpToDate(candLastID) { diff --git a/pkg/raft/raft_test.go b/pkg/raft/raft_test.go index ff1d6e732aff..ec0355acd803 100644 --- a/pkg/raft/raft_test.go +++ b/pkg/raft/raft_test.go @@ -863,10 +863,9 @@ func TestCandidateConcede(t *testing.T) { assert.Equal(t, pb.StateFollower, a.state) assert.Equal(t, uint64(1), a.Term) - wantLog := ltoa(newLog(&MemoryStorage{ls: LogSlice{ - term: 1, - entries: []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 1, Data: data}}, - }}, nil)) + wantLog := ltoa(newLog(&MemoryStorage{ + ents: []pb.Entry{{}, {Data: nil, Term: 1, Index: 1}, {Term: 1, Index: 2, Data: data}}, + }, nil)) for i, p := range tt.peers { if sm, ok := p.(*raft); ok { l := ltoa(sm.raftLog) @@ -904,12 +903,9 @@ func TestOldMessages(t *testing.T) { // commit a new entry tt.send(pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) - ents := index(1).terms(1, 2, 3, 3) - ents[3].Data = []byte("somedata") - ilog := newLog(&MemoryStorage{ls: LogSlice{ - term: 3, - entries: ents, - }}, nil) + ents := index(0).terms(0, 1, 2, 3, 3) + ents[4].Data = []byte("somedata") + ilog := newLog(&MemoryStorage{ents: ents}, nil) base := ltoa(ilog) for i, p := range tt.peers { if sm, ok := p.(*raft); ok { @@ -958,10 +954,9 @@ func TestProposal(t *testing.T) { wantLog := newLog(NewMemoryStorage(), raftlogger.RaftLogger) if tt.success { - wantLog = newLog(&MemoryStorage{ls: LogSlice{ - term: 2, - entries: []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 1, Data: data}}, - }}, nil) + wantLog = newLog(&MemoryStorage{ + ents: []pb.Entry{{}, {Data: nil, Term: 1, Index: 1}, {Term: 1, Index: 2, Data: data}}, + }, nil) } base := ltoa(wantLog) for i, p := range tt.peers { @@ -990,10 +985,9 @@ func TestProposalByProxy(t *testing.T) { // propose via follower tt.send(pb.Message{From: 2, To: 2, Type: pb.MsgProp, Entries: []pb.Entry{{Data: []byte("somedata")}}}) - wantLog := newLog(&MemoryStorage{ls: LogSlice{ - term: 1, - entries: []pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 1, Data: data}}, - }}, nil) + wantLog := newLog(&MemoryStorage{ + ents: []pb.Entry{{}, {Data: nil, Term: 1, Index: 1}, {Term: 1, Data: data, Index: 2}}, + }, nil) base := ltoa(wantLog) for i, p := range tt.peers { if sm, ok := p.(*raft); ok { @@ -1376,10 +1370,7 @@ func testRecvMsgVote(t *testing.T, msgType pb.MessageType) { sm.step = stepLeader } sm.Vote = tt.voteFor - sm.raftLog = newLog(&MemoryStorage{ls: LogSlice{ - term: 2, - entries: index(1).terms(2, 2), - }}, nil) + sm.raftLog = newLog(&MemoryStorage{ents: index(0).terms(0, 2, 2)}, nil) // raft.Term is greater than or equal to raft.raftLog.lastTerm. In this // test we're only testing MsgVote responses when the campaigning node @@ -2011,10 +2002,7 @@ func TestLeaderAppResp(t *testing.T) { // sm term is 1 after it becomes the leader. // thus the last log term must be 1 to be committed. sm := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - sm.raftLog = newLog(&MemoryStorage{ls: LogSlice{ - term: 1, - entries: index(1).terms(1, 1), - }}, nil) + sm.raftLog = newLog(&MemoryStorage{ents: index(0).terms(0, 1, 1)}, nil) sm.becomeCandidate() sm.becomeLeader() sm.readMessages() @@ -2137,10 +2125,7 @@ func TestRecvMsgBeat(t *testing.T) { for i, tt := range tests { sm := newTestRaft(1, 10, 1, newTestMemoryStorage(withPeers(1, 2, 3))) - sm.raftLog = newLog(&MemoryStorage{ls: LogSlice{ - term: 1, - entries: index(1).terms(1, 1), - }}, nil) + sm.raftLog = newLog(&MemoryStorage{ents: index(0).terms(0, 1, 1)}, nil) sm.Term = 1 sm.state = tt.state switch tt.state { diff --git a/pkg/raft/raftpb/raft.go b/pkg/raft/raftpb/raft.go index 2169809f9339..b2073df26232 100644 --- a/pkg/raft/raftpb/raft.go +++ b/pkg/raft/raftpb/raft.go @@ -24,12 +24,6 @@ type Epoch int64 // SafeValue implements the redact.SafeValue interface. func (e Epoch) SafeValue() {} -// The enums in raft are all safe for redaction. -func (MessageType) SafeValue() {} -func (EntryType) SafeValue() {} -func (ConfChangeType) SafeValue() {} -func (ConfChangeTransition) SafeValue() {} - // Priority specifies per-entry priorities, that are local to the interaction // between a leader-replica pair, i.e., they are not an invariant of a // particular entry in the raft log (the replica could be the leader itself or diff --git a/pkg/raft/rawnode_test.go b/pkg/raft/rawnode_test.go index 97ba8b7c6988..40dc2981b9fc 100644 --- a/pkg/raft/rawnode_test.go +++ b/pkg/raft/rawnode_test.go @@ -481,7 +481,7 @@ func TestRawNodeStart(t *testing.T) { } storage := NewMemoryStorage() - storage.ls = LogSlice{term: 1, prev: entryID{index: 1, term: 1}} + storage.ents[0].Index = 1 // TODO(tbg): this is a first prototype of what bootstrapping could look // like (without the annoying faux ConfChanges). We want to persist a @@ -500,13 +500,16 @@ func TestRawNodeStart(t *testing.T) { } bootstrap := func(storage appenderStorage, cs pb.ConfState) error { require.NotEmpty(t, cs.Voters, "no voters specified") - fi, li := storage.FirstIndex(), storage.LastIndex() + fi := storage.FirstIndex() require.GreaterOrEqual(t, fi, uint64(2), "FirstIndex >= 2 is prerequisite for bootstrap") - require.Equal(t, fi, li+1, "the log must be empty") - entries, err := storage.Entries(fi, li+1, math.MaxUint64) - require.NoError(t, err) - require.Empty(t, entries, "should not have been able to load any entries") + _, err := storage.Entries(fi, fi, math.MaxUint64) + // TODO(tbg): match exact error + require.Error(t, err, "should not have been able to load first index") + + li := storage.LastIndex() + _, err = storage.Entries(li, li, math.MaxUint64) + require.Error(t, err, "should not have been able to load last index") hs, ics, err := storage.InitialState() require.NoError(t, err) @@ -669,32 +672,34 @@ func TestRawNodeCommitPaginationAfterRestart(t *testing.T) { s := &ignoreSizeHintMemStorage{ MemoryStorage: newTestMemoryStorage(withPeers(1)), } - s.hardState = pb.HardState{ + persistedHardState := pb.HardState{ Term: 1, Vote: 1, Commit: 10, } - entries := make([]pb.Entry, 10) + + s.hardState = persistedHardState + s.ents = make([]pb.Entry, 10) var size uint64 - for i := range entries { + for i := range s.ents { ent := pb.Entry{ Term: 1, Index: uint64(i + 1), Type: pb.EntryNormal, Data: []byte("a"), } - entries[i] = ent + + s.ents[i] = ent size += uint64(ent.Size()) } - s.ls = LogSlice{term: 1, entries: entries} cfg := newTestConfig(1, 10, 1, s) // Set a MaxSizePerMsg that would suggest to Raft that the last committed entry should // not be included in the initial rd.CommittedEntries. However, our storage will ignore // this and *will* return it (which is how the Commit index ended up being 10 initially). - cfg.MaxSizePerMsg = size - uint64(entries[len(entries)-1].Size()) - 1 + cfg.MaxSizePerMsg = size - uint64(s.ents[len(s.ents)-1].Size()) - 1 - s.ls.entries = append(s.ls.entries, pb.Entry{ + s.ents = append(s.ents, pb.Entry{ Term: 1, Index: uint64(11), Type: pb.EntryNormal, diff --git a/pkg/raft/status.go b/pkg/raft/status.go index cb9ac6ad47af..4c5208eff0c0 100644 --- a/pkg/raft/status.go +++ b/pkg/raft/status.go @@ -162,8 +162,8 @@ func getLeadSupportStatus(r *raft) LeadSupportStatus { // MarshalJSON translates the raft status into JSON. func (s Status) MarshalJSON() ([]byte, error) { - j := fmt.Sprintf(`{"id":"%x","term":%d,"vote":"%x","commit":%d,"lead":"%x","raftState":%q,"applied":%d,"progress":{`, - s.ID, s.Term, s.Vote, s.Commit, s.Lead, s.RaftState, s.Applied) + j := fmt.Sprintf(`{"id":"%x","term":%d,"vote":"%x","commit":%d,"lead":"%x","leadEpoch":"%d","raftState":%q,"applied":%d,"progress":{`, + s.ID, s.Term, s.Vote, s.Commit, s.Lead, s.LeadEpoch, s.RaftState, s.Applied) if len(s.Progress) == 0 { j += "}," diff --git a/pkg/raft/storage.go b/pkg/raft/storage.go index 71823fe52de8..608743b1e491 100644 --- a/pkg/raft/storage.go +++ b/pkg/raft/storage.go @@ -132,9 +132,8 @@ type inMemStorageCallStats struct { initialState, firstIndex, lastIndex, entries, term, snapshot int } -// MemoryStorage implements the Storage interface backed by an in-memory slice. -// -// TODO(pav-kv): split into LogStorage and StateStorage. +// MemoryStorage implements the Storage interface backed by an +// in-memory array. type MemoryStorage struct { // Protects access to all fields. Most methods of MemoryStorage are // run on the raft goroutine, but Append() is run on an application @@ -143,21 +142,18 @@ type MemoryStorage struct { hardState pb.HardState snapshot pb.Snapshot - - // ls contains the log entries. - // - // TODO(pav-kv): the term field of the LogSlice is conservatively populated - // to be the last entry term, to keep the LogSlice valid. But it must be - // sourced from the upper layer's last accepted term (which is >= the last - // entry term). - ls LogSlice + // ents[i] has raft log position i+snapshot.Metadata.Index + ents []pb.Entry callStats inMemStorageCallStats } // NewMemoryStorage creates an empty MemoryStorage. func NewMemoryStorage() *MemoryStorage { - return &MemoryStorage{} + return &MemoryStorage{ + // When starting from scratch populate the list with a dummy entry at term zero. + ents: make([]pb.Entry, 1), + } } // InitialState implements the Storage interface. @@ -179,17 +175,22 @@ func (ms *MemoryStorage) Entries(lo, hi, maxSize uint64) ([]pb.Entry, error) { ms.Lock() defer ms.Unlock() ms.callStats.entries++ - - if lo <= ms.ls.prev.index { + offset := ms.ents[0].Index + if lo <= offset { return nil, ErrCompacted - } else if last := ms.ls.lastIndex(); hi > last+1 { - raftlogger.GetLogger().Panicf("entries' hi(%d) is out of bound lastindex(%d)", hi, last) + } + if hi > ms.lastIndex()+1 { + raftlogger.GetLogger().Panicf("entries' hi(%d) is out of bound lastindex(%d)", hi, ms.lastIndex()) + } + // only contains dummy entries. + if len(ms.ents) == 1 { + return nil, ErrUnavailable } - ents := limitSize(ms.ls.sub(lo-1, hi-1), entryEncodingSize(maxSize)) + ents := limitSize(ms.ents[lo-offset:hi-offset], entryEncodingSize(maxSize)) // NB: use the full slice expression to limit what the caller can do with the // returned slice. For example, an append will reallocate and copy this slice - // instead of corrupting the neighbouring entries. + // instead of corrupting the neighbouring ms.ents. return ents[:len(ents):len(ents)], nil } @@ -198,12 +199,14 @@ func (ms *MemoryStorage) Term(i uint64) (uint64, error) { ms.Lock() defer ms.Unlock() ms.callStats.term++ - if i < ms.ls.prev.index { + offset := ms.ents[0].Index + if i < offset { return 0, ErrCompacted - } else if i > ms.ls.lastIndex() { + } + if int(i-offset) >= len(ms.ents) { return 0, ErrUnavailable } - return ms.ls.termAt(i), nil + return ms.ents[i-offset].Term, nil } // LastIndex implements the Storage interface. @@ -211,7 +214,11 @@ func (ms *MemoryStorage) LastIndex() uint64 { ms.Lock() defer ms.Unlock() ms.callStats.lastIndex++ - return ms.ls.lastIndex() + return ms.lastIndex() +} + +func (ms *MemoryStorage) lastIndex() uint64 { + return ms.ents[0].Index + uint64(len(ms.ents)) - 1 } // FirstIndex implements the Storage interface. @@ -219,19 +226,17 @@ func (ms *MemoryStorage) FirstIndex() uint64 { ms.Lock() defer ms.Unlock() ms.callStats.firstIndex++ - return ms.ls.prev.index + 1 + return ms.firstIndex() +} + +func (ms *MemoryStorage) firstIndex() uint64 { + return ms.ents[0].Index + 1 } // LogSnapshot implements the LogStorage interface. func (ms *MemoryStorage) LogSnapshot() LogStorageSnapshot { - // Copy the log slice, and protect MemoryStorage from potential appends to it. - // Both MemoryStorage and the caller can append to the slice, but the full - // slice expression makes sure the two don't corrupt each other's slices. - ls := ms.ls - ls.entries = ls.entries[:len(ls.entries):len(ls.entries)] - // TODO(pav-kv): we don't need all other fields in MemoryStorage. Factor out a - // LogStorage sub-type, and return just the log slice with it. - return &MemoryStorage{ls: ls} + // TODO(pav-kv): return an immutable subset of MemoryStorage. + return ms } // Snapshot implements the Storage interface. @@ -247,19 +252,16 @@ func (ms *MemoryStorage) Snapshot() (pb.Snapshot, error) { func (ms *MemoryStorage) ApplySnapshot(snap pb.Snapshot) error { ms.Lock() defer ms.Unlock() - id := entryID{index: snap.Metadata.Index, term: snap.Metadata.Term} - // Check whether the snapshot is outdated. - if id.index <= ms.snapshot.Metadata.Index { + + //handle check for old snapshot being applied + msIndex := ms.snapshot.Metadata.Index + snapIndex := snap.Metadata.Index + if msIndex >= snapIndex { return ErrSnapOutOfDate } - // The new snapshot represents committed state, so its last entry should be - // consistent with the previously committed one. - if oldTerm := ms.snapshot.Metadata.Term; id.term < oldTerm { - raftlogger.GetLogger().Panicf("snapshot at %+v regresses the term %d", id, oldTerm) - } + ms.snapshot = snap - // TODO(pav-kv): the term must be the last accepted term passed in. - ms.ls = LogSlice{term: id.term, prev: id} + ms.ents = []pb.Entry{{Term: snap.Metadata.Term, Index: snap.Metadata.Index}} return nil } @@ -274,12 +276,15 @@ func (ms *MemoryStorage) CreateSnapshot( defer ms.Unlock() if i <= ms.snapshot.Metadata.Index { return pb.Snapshot{}, ErrSnapOutOfDate - } else if last := ms.ls.lastIndex(); i > last { - raftlogger.GetLogger().Panicf("snapshot %d is out of bound lastindex(%d)", i, last) + } + + offset := ms.ents[0].Index + if i > ms.lastIndex() { + raftlogger.GetLogger().Panicf("snapshot %d is out of bound lastindex(%d)", i, ms.lastIndex()) } ms.snapshot.Metadata.Index = i - ms.snapshot.Metadata.Term = ms.ls.termAt(i) + ms.snapshot.Metadata.Term = ms.ents[i-offset].Term if cs != nil { ms.snapshot.Metadata.ConfState = *cs } @@ -287,60 +292,66 @@ func (ms *MemoryStorage) CreateSnapshot( return ms.snapshot, nil } -// Compact discards all log entries <= index. +// Compact discards all log entries prior to compactIndex. // It is the application's responsibility to not attempt to compact an index // greater than raftLog.applied. -func (ms *MemoryStorage) Compact(index uint64) error { +func (ms *MemoryStorage) Compact(compactIndex uint64) error { ms.Lock() defer ms.Unlock() - if index <= ms.ls.prev.index { + offset := ms.ents[0].Index + if compactIndex <= offset { return ErrCompacted - } else if last := ms.ls.lastIndex(); index > last { - raftlogger.GetLogger().Panicf("compact %d is out of bound lastindex(%d)", index, last) } - ms.ls = ms.ls.forward(index) + if compactIndex > ms.lastIndex() { + raftlogger.GetLogger().Panicf("compact %d is out of bound lastindex(%d)", compactIndex, ms.lastIndex()) + } + + i := compactIndex - offset + // NB: allocate a new slice instead of reusing the old ms.ents. Entries in + // ms.ents are immutable, and can be referenced from outside MemoryStorage + // through slices returned by ms.Entries(). + ents := make([]pb.Entry, 1, uint64(len(ms.ents))-i) + ents[0].Index = ms.ents[i].Index + ents[0].Term = ms.ents[i].Term + ents = append(ents, ms.ents[i+1:]...) + ms.ents = ents return nil } // Append the new entries to storage. -// -// TODO(pav-kv): pass in a LogSlice which carries correctness semantics. +// TODO (xiangli): ensure the entries are continuous and +// entries[0].Index > ms.entries[0].Index func (ms *MemoryStorage) Append(entries []pb.Entry) error { if len(entries) == 0 { return nil } + ms.Lock() defer ms.Unlock() - first := entries[0].Index - if first <= ms.ls.prev.index { - // Can not append at indices <= the compacted index. - return ErrCompacted - } else if last := ms.ls.lastIndex(); first > last+1 { - raftlogger.GetLogger().Panicf("missing log entry [last: %d, append at: %d]", last, first) - } - - // TODO(pav-kv): this must have the correct last accepted term. Pass in the - // logSlice to this append method to update it correctly. - ms.ls.term = entries[len(entries)-1].Term + first := ms.firstIndex() + last := entries[0].Index + uint64(len(entries)) - 1 - if first == ms.ls.lastIndex()+1 { // appending at the end of the log - ms.ls.entries = append(ms.ls.entries, entries...) - } else { // first <= lastIndex, after checks above - prefix := ms.ls.sub(ms.ls.prev.index, first-1) - // NB: protect the suffix of the old slice from rewrites. - ms.ls.entries = append(prefix[:len(prefix):len(prefix)], entries...) + // shortcut if there is no new entry. + if last < first { + return nil + } + // truncate compacted entries + if first > entries[0].Index { + entries = entries[first-entries[0].Index:] } - return nil -} -// MakeLogSnapshot converts the MemoryStorage to a LogSnapshot type serving the -// log from the MemoryStorage snapshot. Only for testing. -func MakeLogSnapshot(ms *MemoryStorage) LogSnapshot { - return LogSnapshot{ - first: ms.FirstIndex(), - storage: ms.LogSnapshot(), - unstable: ms.ls.forward(ms.ls.lastIndex()), - logger: raftlogger.DiscardLogger, + offset := entries[0].Index - ms.ents[0].Index + switch { + case uint64(len(ms.ents)) > offset: + // NB: full slice expression protects ms.ents at index >= offset from + // rewrites, as they may still be referenced from outside MemoryStorage. + ms.ents = append(ms.ents[:offset:offset], entries...) + case uint64(len(ms.ents)) == offset: + ms.ents = append(ms.ents, entries...) + default: + raftlogger.GetLogger().Panicf("missing log entry [last: %d, append at: %d]", + ms.lastIndex(), entries[0].Index) } + return nil } diff --git a/pkg/raft/storage_test.go b/pkg/raft/storage_test.go index 9af5b91a6546..f5a3f7d4d0e7 100644 --- a/pkg/raft/storage_test.go +++ b/pkg/raft/storage_test.go @@ -26,8 +26,7 @@ import ( ) func TestStorageTerm(t *testing.T) { - prev3 := entryID{index: 3, term: 3} - ls := prev3.append(4, 5) + ents := index(3).terms(3, 4, 5) tests := []struct { i uint64 @@ -44,7 +43,8 @@ func TestStorageTerm(t *testing.T) { for _, tt := range tests { t.Run("", func(t *testing.T) { - s := &MemoryStorage{ls: ls} + s := &MemoryStorage{ents: ents} + if tt.wpanic { require.Panics(t, func() { _, _ = s.Term(tt.i) @@ -58,9 +58,7 @@ func TestStorageTerm(t *testing.T) { } func TestStorageEntries(t *testing.T) { - prev3 := entryID{index: 3, term: 3} - ls := prev3.append(4, 5, 6) - ents := ls.entries + ents := index(3).terms(3, 4, 5, 6) tests := []struct { lo, hi, maxsize uint64 @@ -75,17 +73,17 @@ func TestStorageEntries(t *testing.T) { // even if maxsize is zero, the first entry should be returned {4, 7, 0, nil, index(4).terms(4)}, // limit to 2 - {4, 7, uint64(ents[0].Size() + ents[1].Size()), nil, index(4).terms(4, 5)}, + {4, 7, uint64(ents[1].Size() + ents[2].Size()), nil, index(4).terms(4, 5)}, // limit to 2 - {4, 7, uint64(ents[0].Size() + ents[1].Size() + ents[2].Size()/2), nil, index(4).terms(4, 5)}, - {4, 7, uint64(ents[0].Size() + ents[1].Size() + ents[2].Size() - 1), nil, index(4).terms(4, 5)}, + {4, 7, uint64(ents[1].Size() + ents[2].Size() + ents[3].Size()/2), nil, index(4).terms(4, 5)}, + {4, 7, uint64(ents[1].Size() + ents[2].Size() + ents[3].Size() - 1), nil, index(4).terms(4, 5)}, // all - {4, 7, uint64(ents[0].Size() + ents[1].Size() + ents[2].Size()), nil, index(4).terms(4, 5, 6)}, + {4, 7, uint64(ents[1].Size() + ents[2].Size() + ents[3].Size()), nil, index(4).terms(4, 5, 6)}, } for _, tt := range tests { t.Run("", func(t *testing.T) { - s := &MemoryStorage{ls: ls} + s := &MemoryStorage{ents: ents} entries, err := s.Entries(tt.lo, tt.hi, tt.maxsize) require.Equal(t, tt.werr, err) require.Equal(t, tt.wentries, entries) @@ -94,21 +92,23 @@ func TestStorageEntries(t *testing.T) { } func TestStorageLastIndex(t *testing.T) { - s := &MemoryStorage{ls: entryID{index: 3, term: 3}.append(4, 5)} + ents := index(3).terms(3, 4, 5) + s := &MemoryStorage{ents: ents} require.Equal(t, uint64(5), s.LastIndex()) require.NoError(t, s.Append(index(6).terms(5))) require.Equal(t, uint64(6), s.LastIndex()) } func TestStorageFirstIndex(t *testing.T) { - s := &MemoryStorage{ls: entryID{index: 3, term: 3}.append(4, 5)} + ents := index(3).terms(3, 4, 5) + s := &MemoryStorage{ents: ents} require.Equal(t, uint64(4), s.FirstIndex()) require.NoError(t, s.Compact(4)) require.Equal(t, uint64(5), s.FirstIndex()) } func TestStorageCompact(t *testing.T) { - ls := entryID{index: 3, term: 3}.append(4, 5) + ents := index(3).terms(3, 4, 5) tests := []struct { i uint64 @@ -117,25 +117,25 @@ func TestStorageCompact(t *testing.T) { wterm uint64 wlen int }{ - {2, ErrCompacted, 3, 3, 2}, - {3, ErrCompacted, 3, 3, 2}, - {4, nil, 4, 4, 1}, - {5, nil, 5, 5, 0}, + {2, ErrCompacted, 3, 3, 3}, + {3, ErrCompacted, 3, 3, 3}, + {4, nil, 4, 4, 2}, + {5, nil, 5, 5, 1}, } for _, tt := range tests { t.Run("", func(t *testing.T) { - s := &MemoryStorage{ls: ls} + s := &MemoryStorage{ents: ents} require.Equal(t, tt.werr, s.Compact(tt.i)) - require.Equal(t, tt.windex, s.ls.prev.index) - require.Equal(t, tt.wterm, s.ls.prev.term) - require.Equal(t, tt.wlen, len(s.ls.entries)) + require.Equal(t, tt.windex, s.ents[0].Index) + require.Equal(t, tt.wterm, s.ents[0].Term) + require.Equal(t, tt.wlen, len(s.ents)) }) } } func TestStorageCreateSnapshot(t *testing.T) { - ls := entryID{index: 3, term: 3}.append(4, 5) + ents := index(3).terms(3, 4, 5) cs := &pb.ConfState{Voters: []pb.PeerID{1, 2, 3}} data := []byte("data") @@ -151,7 +151,7 @@ func TestStorageCreateSnapshot(t *testing.T) { for _, tt := range tests { t.Run("", func(t *testing.T) { - s := &MemoryStorage{ls: ls} + s := &MemoryStorage{ents: ents} snap, err := s.CreateSnapshot(tt.i, cs, data) require.Equal(t, tt.werr, err) require.Equal(t, tt.wsnap, snap) @@ -160,7 +160,7 @@ func TestStorageCreateSnapshot(t *testing.T) { } func TestStorageAppend(t *testing.T) { - ls := entryID{index: 3, term: 3}.append(4, 5) + ents := index(3).terms(3, 4, 5) tests := []struct { entries []pb.Entry @@ -169,43 +169,49 @@ func TestStorageAppend(t *testing.T) { }{ { index(1).terms(1, 2), - ErrCompacted, - index(4).terms(4, 5), + nil, + index(3).terms(3, 4, 5), }, { index(3).terms(3, 4, 5), - ErrCompacted, - index(4).terms(4, 5), + nil, + index(3).terms(3, 4, 5), }, { - index(4).terms(6, 6), + index(3).terms(3, 6, 6), nil, - index(4).terms(6, 6), + index(3).terms(3, 6, 6), }, { - index(4).terms(4, 5, 5), + index(3).terms(3, 4, 5, 5), nil, - index(4).terms(4, 5, 5), + index(3).terms(3, 4, 5, 5), + }, + // Truncate incoming entries, truncate the existing entries and append. + { + index(2).terms(3, 3, 5), + nil, + index(3).terms(3, 5), }, // Truncate the existing entries and append. { index(4).terms(5), nil, - index(4).terms(5), + index(3).terms(3, 5), }, // Direct append. { index(6).terms(5), nil, - index(4).terms(4, 5, 5), + index(3).terms(3, 4, 5, 5), }, } for _, tt := range tests { t.Run("", func(t *testing.T) { - s := &MemoryStorage{ls: ls} + s := &MemoryStorage{ents: ents} require.Equal(t, tt.werr, s.Append(tt.entries)) - require.Equal(t, tt.wentries, s.ls.entries) + require.Equal(t, tt.wentries, s.ents) }) } } @@ -229,24 +235,3 @@ func TestStorageApplySnapshot(t *testing.T) { tt = tests[i] require.Equal(t, ErrSnapOutOfDate, s.ApplySnapshot(tt)) } - -func TestStorageLogSnapshot(t *testing.T) { - s := NewMemoryStorage() - require.NoError(t, s.Append(index(1).terms(1, 2, 3))) - snap := s.LogSnapshot() - // The snapshot must be immutable regardless of mutations on the storage. - check := func() { - require.Equal(t, uint64(1), snap.FirstIndex()) - require.Equal(t, uint64(3), snap.LastIndex()) - entries, err := snap.Entries(snap.FirstIndex(), snap.LastIndex()+1, math.MaxUint64) - require.NoError(t, err) - require.Equal(t, index(1).terms(1, 2, 3), entries) - } - check() - require.NoError(t, s.Append(index(4).terms(4, 5))) // regular append - check() - require.NoError(t, s.Append(index(2).terms(7, 7, 7))) // truncation and append - check() - require.NoError(t, s.Compact(4)) // compaction - check() -} diff --git a/pkg/raft/tracker/fortificationtracker.go b/pkg/raft/tracker/fortificationtracker.go index b8a889f3f697..2061ea1a4378 100644 --- a/pkg/raft/tracker/fortificationtracker.go +++ b/pkg/raft/tracker/fortificationtracker.go @@ -164,26 +164,31 @@ func (ft *FortificationTracker) computeLeadSupportUntil(state pb.StateType) hlc. if state != pb.StateLeader { panic("computeLeadSupportUntil should only be called by the leader") } + if len(ft.fortification) == 0 { + return hlc.Timestamp{} // fast-path for no fortification + } // TODO(arul): avoid this map allocation as we're calling LeadSupportUntil // from hot paths. supportExpMap := make(map[pb.PeerID]hlc.Timestamp) - for id, supportEpoch := range ft.fortification { - curEpoch, curExp := ft.storeLiveness.SupportFrom(id) - // NB: We can't assert that supportEpoch <= curEpoch because there may be a - // race between a successful MsgFortifyLeaderResp and the store liveness - // heartbeat response that lets the leader know the follower's store is - // supporting the leader's store at the epoch in the MsgFortifyLeaderResp - // message. - if curEpoch == supportEpoch { - supportExpMap[id] = curExp + ft.config.Voters.Visit(func(id pb.PeerID) { + if supportEpoch, ok := ft.fortification[id]; ok { + curEpoch, curExp := ft.storeLiveness.SupportFrom(id) + // NB: We can't assert that supportEpoch <= curEpoch because there may be + // a race between a successful MsgFortifyLeaderResp and the store liveness + // heartbeat response that lets the leader know the follower's store is + // supporting the leader's store at the epoch in the MsgFortifyLeaderResp + // message. + if curEpoch == supportEpoch { + supportExpMap[id] = curExp + } } - } + }) return ft.config.Voters.LeadSupportExpiration(supportExpMap) } // CanDefortify returns whether the caller can safely[1] de-fortify the term -// based on the sate tracked by the FortificationTracker. +// based on the state tracked by the FortificationTracker. // // [1] Without risking regressions in the maximum that's ever been indicated to // the layers above. Or, more simply, without risking regression of leader diff --git a/pkg/raft/types.go b/pkg/raft/types.go index 30cd3ab0f645..20e734563e8b 100644 --- a/pkg/raft/types.go +++ b/pkg/raft/types.go @@ -90,10 +90,6 @@ func (l LogMark) After(other LogMark) bool { // is sourced from a message that was received via transport, or from Storage, // or in a test code that manually hard-codes this struct. In these cases, the // invariants should be validated using the valid() method. -// -// The LogSlice is immutable. The entries slice must not be mutated, but it can -// be appended to in some cases, when the callee protects its underlying slice -// by capping the returned entries slice with a full slice expression. type LogSlice struct { // term is the leader term containing the given entries in its log. term uint64 @@ -103,6 +99,15 @@ type LogSlice struct { entries []pb.Entry } +// MakeLogSlice creates a fake log slice containing the supplied entries. Only +// for testing. +// +// TODO(pav-kv): this is not a correct LogSlice. Remove this function, and help +// construct a correct one. +func MakeLogSlice(entries []pb.Entry) LogSlice { + return LogSlice{entries: entries} +} + // Entries returns the log entries covered by this slice. The returned slice // must not be mutated. func (s LogSlice) Entries() []pb.Entry { diff --git a/pkg/raft/util.go b/pkg/raft/util.go index 2f86a7d651ff..dfad989b062b 100644 --- a/pkg/raft/util.go +++ b/pkg/raft/util.go @@ -200,10 +200,6 @@ func describeMessageWithIndent(indent string, m pb.Message, f EntryFormatter) st return buf.String() } -func DescribeTarget(id pb.PeerID) string { - return describeTarget(id) -} - func describeTarget(id pb.PeerID) string { switch id { case None: diff --git a/pkg/roachprod/install/files/cockroachdb-logging.yaml b/pkg/roachprod/install/files/cockroachdb-logging.yaml index 7fdf4e6889bc..ba57e161e919 100644 --- a/pkg/roachprod/install/files/cockroachdb-logging.yaml +++ b/pkg/roachprod/install/files/cockroachdb-logging.yaml @@ -35,13 +35,13 @@ sinks: channels: [STORAGE] security: channels: [PRIVILEGES, USER_ADMIN] - auditable: true + auditable: false sql-audit: channels: [SENSITIVE_ACCESS] - auditable: true + auditable: false sql-auth: channels: [SESSIONS] - auditable: true + auditable: false sql-exec: channels: [SQL_EXEC] sql-slow: diff --git a/pkg/server/BUILD.bazel b/pkg/server/BUILD.bazel index d98c78705fb7..6533b62ba3ff 100644 --- a/pkg/server/BUILD.bazel +++ b/pkg/server/BUILD.bazel @@ -29,6 +29,7 @@ go_library( "grpc_gateway.go", "grpc_server.go", "hot_ranges.go", + "http_metrics.go", "import_ts.go", "index_usage_stats.go", "init.go", @@ -362,6 +363,7 @@ go_library( "@com_github_nightlyone_lockfile//:lockfile", "@com_github_nytimes_gziphandler//:gziphandler", "@com_github_pires_go_proxyproto//:go-proxyproto", + "@com_github_prometheus_client_model//go", "@com_github_prometheus_common//expfmt", "@in_gopkg_yaml_v2//:yaml_v2", "@org_golang_google_grpc//:go_default_library", @@ -433,6 +435,7 @@ go_test( "graphite_test.go", "grpc_gateway_test.go", "helpers_test.go", + "http_metrics_test.go", "index_usage_stats_test.go", "job_profiler_test.go", "listen_and_update_addrs_test.go", @@ -578,6 +581,7 @@ go_test( "@com_github_dustin_go_humanize//:go-humanize", "@com_github_gogo_protobuf//jsonpb", "@com_github_gogo_protobuf//proto", + "@com_github_gorilla_mux//:mux", "@com_github_grpc_ecosystem_grpc_gateway//runtime:go_default_library", "@com_github_jackc_pgx_v4//:pgx", "@com_github_kr_pretty//:pretty", diff --git a/pkg/server/api_v2.go b/pkg/server/api_v2.go index ebec7c91a345..1a611cb2713e 100644 --- a/pkg/server/api_v2.go +++ b/pkg/server/api_v2.go @@ -103,6 +103,8 @@ func newAPIV2Server(ctx context.Context, opts *apiV2ServerOpts) http.Handler { allowAnonymous := opts.sqlServer.cfg.Insecure authMux := authserver.NewV2Mux(authServer, innerMux, allowAnonymous) outerMux := mux.NewRouter() + serverMetrics := NewServerHttpMetrics(opts.sqlServer.MetricsRegistry(), opts.sqlServer.execCfg.Settings) + serverMetrics.registerMetricsMiddleware(outerMux) systemAdmin, saOk := opts.admin.(*systemAdminServer) systemStatus, ssOk := opts.status.(*systemStatusServer) diff --git a/pkg/server/http_metrics.go b/pkg/server/http_metrics.go new file mode 100644 index 000000000000..8c332f80a3a2 --- /dev/null +++ b/pkg/server/http_metrics.go @@ -0,0 +1,114 @@ +// Copyright 2024 The Cockroach Authors. +// +// Use of this software is governed by the CockroachDB Software License +// included in the /LICENSE file. + +package server + +import ( + "net/http" + "regexp" + "strconv" + + "github.com/cockroachdb/cockroach/pkg/settings" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/util/metric" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/gorilla/mux" + prometheusgo "github.com/prometheus/client_model/go" +) + +const ( + MethodLabel = "method" + PathLabel = "path" + StatusCodeLabel = "statusCode" +) + +var pathVarsRegex = regexp.MustCompile("{([A-z]+)(:[^}]*)?}") + +var serverHTTPMetricsEnabled = settings.RegisterBoolSetting( + settings.ApplicationLevel, + "server.http.metrics.enabled", + "enables to collection of http metrics", + false, +) + +// responseWriter wraps http.ResponseWriter with a statusCode field to provide +// access to the status code in metric reporting. +type responseWriter struct { + http.ResponseWriter + statusCode int +} + +func newResponseWriter(w http.ResponseWriter) *responseWriter { + return &responseWriter{w, http.StatusOK} +} + +// WriteHeader implements http.ResponseWriter +func (rw *responseWriter) WriteHeader(code int) { + rw.statusCode = code + rw.ResponseWriter.WriteHeader(code) +} + +type HttpServerMetrics struct { + RequestMetrics *metric.HistogramVec + registry *metric.Registry + settings *cluster.Settings +} + +func NewServerHttpMetrics(reg *metric.Registry, settings *cluster.Settings) *HttpServerMetrics { + metadata := metric.Metadata{ + Name: "server.http.request.duration.nanos", + Help: "Duration of an HTTP request in nanoseconds.", + Measurement: "Duration", + Unit: metric.Unit_NANOSECONDS, + MetricType: prometheusgo.MetricType_HISTOGRAM, + } + + histogramVec := metric.NewExportedHistogramVec( + metadata, + metric.ResponseTime30sBuckets, + []string{MethodLabel, PathLabel, StatusCodeLabel}) + reg.AddMetric(histogramVec) + return &HttpServerMetrics{ + RequestMetrics: histogramVec, + registry: reg, + settings: settings, + } +} + +// registerMetricsMiddleware registers a middleware function on to the provided mux.Router to +// capture metrics on http requests. The underlying metric uses a metric.HistogramVec, which +// isn't recorded in tsdb. +func (m *HttpServerMetrics) registerMetricsMiddleware(router *mux.Router) { + metricsMiddleWare := func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if !serverHTTPMetricsEnabled.Get(&m.settings.SV) { + next.ServeHTTP(w, r) + } else { + route := mux.CurrentRoute(r) + path, _ := route.GetPathTemplate() + rw := newResponseWriter(w) + sw := timeutil.NewStopWatch() + sw.Start() + next.ServeHTTP(rw, r) + sw.Stop() + m.RequestMetrics.Observe(map[string]string{ + "path": formatPathVars(path), + "method": r.Method, + "statusCode": strconv.Itoa(rw.statusCode), + }, float64(sw.Elapsed().Nanoseconds())) + } + }) + } + router.Use(metricsMiddleWare) +} + +// formatPathVars replaces named path variables with just the +// variable name, wrapped in <>. Any variable regex will be +// removed. For example: +// "/api/v2/database_metadata/{database_id:[0-9]+}" is +// turned into" "/api/v2/database_metadata/" +func formatPathVars(path string) string { + return pathVarsRegex.ReplaceAllString(path, "<$1>") +} diff --git a/pkg/server/http_metrics_test.go b/pkg/server/http_metrics_test.go new file mode 100644 index 000000000000..3a0d94c30c05 --- /dev/null +++ b/pkg/server/http_metrics_test.go @@ -0,0 +1,262 @@ +// Copyright 2024 The Cockroach Authors. +// +// Use of this software is governed by the CockroachDB Software License +// included in the /LICENSE file. + +package server + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "strconv" + "testing" + "time" + + "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/metric" + "github.com/gorilla/mux" + prometheusgo "github.com/prometheus/client_model/go" + "github.com/stretchr/testify/require" +) + +func TestRegisterMetricsMiddleware(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + t.Run("cluster settings", func(t *testing.T) { + clusterSettings := cluster.MakeTestingClusterSettings() + serverMetrics := NewServerHttpMetrics(metric.NewRegistry(), clusterSettings) + router := mux.NewRouter() + serverMetrics.registerMetricsMiddleware(router) + router.Handle("/", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(time.Millisecond) + w.WriteHeader(http.StatusOK) + })) + server := httptest.NewServer(router) + defer server.Close() + rr := httptest.NewRecorder() + req, err := http.NewRequest("GET", server.URL+"/", nil) + require.NoError(t, err) + + router.ServeHTTP(rr, req) + metrics := serverMetrics.RequestMetrics.ToPrometheusMetrics() + require.Len(t, metrics, 0) + + serverHTTPMetricsEnabled.Override(context.Background(), &clusterSettings.SV, true) + router.ServeHTTP(rr, req) + metrics = serverMetrics.RequestMetrics.ToPrometheusMetrics() + require.Len(t, metrics, 1) + assertPrometheusMetrics(t, metrics, map[string]uint64{ + fmt.Sprintf("%s GET %s", strconv.Itoa(http.StatusOK), "/"): uint64(1), + }) + + serverHTTPMetricsEnabled.Override(context.Background(), &clusterSettings.SV, false) + router.ServeHTTP(rr, req) + metrics = serverMetrics.RequestMetrics.ToPrometheusMetrics() + require.Len(t, metrics, 1) + assertPrometheusMetrics(t, metrics, map[string]uint64{ + fmt.Sprintf("%s GET %s", strconv.Itoa(http.StatusOK), "/"): uint64(1), + }) + + }) + t.Run("metrics", func(t *testing.T) { + clusterSettings := cluster.MakeTestingClusterSettings() + serverHTTPMetricsEnabled.Override(context.Background(), &clusterSettings.SV, true) + serverMetrics := NewServerHttpMetrics(metric.NewRegistry(), clusterSettings) + pathUri := "/mypath/{path_var:[0-9]+}/" + router := mux.NewRouter() + serverMetrics.registerMetricsMiddleware(router) + shouldFail := false + handlerFunc := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(time.Millisecond) + if shouldFail { + w.WriteHeader(http.StatusInternalServerError) + } else { + w.WriteHeader(http.StatusOK) + } + }) + router.Handle(pathUri, handlerFunc).Methods(http.MethodGet, http.MethodPost) + server := httptest.NewServer(router) + defer server.Close() + getReq1, err := http.NewRequest("GET", server.URL+"/mypath/1/", nil) + require.NoError(t, err) + getReq2, err := http.NewRequest("GET", server.URL+"/mypath/2/", nil) + require.NoError(t, err) + postReq2, err := http.NewRequest("POST", server.URL+"/mypath/2/", nil) + require.NoError(t, err) + putReq3, err := http.NewRequest("PUT", server.URL+"/mypath/1/", nil) + require.NoError(t, err) + rr := httptest.NewRecorder() + router.ServeHTTP(rr, getReq1) + router.ServeHTTP(rr, getReq1) + router.ServeHTTP(rr, getReq2) + router.ServeHTTP(rr, postReq2) + router.ServeHTTP(rr, putReq3) + + shouldFail = true + router.ServeHTTP(rr, postReq2) + + metrics := serverMetrics.RequestMetrics.ToPrometheusMetrics() + // putReq3 won't be recorded because `PUT /mypath/1/` isn't a valid route + require.Len(t, metrics, 3) + assertPrometheusMetrics(t, metrics, map[string]uint64{ + fmt.Sprintf("%s GET %s", strconv.Itoa(http.StatusOK), formatPathVars(pathUri)): uint64(3), + fmt.Sprintf("%s POST %s", strconv.Itoa(http.StatusOK), formatPathVars(pathUri)): uint64(1), + fmt.Sprintf("%s POST %s", strconv.Itoa(http.StatusInternalServerError), formatPathVars(pathUri)): uint64(1), + }) + }) +} + +func TestFormatPathVars(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + type testcase struct { + name string + path string + expectedPath string + } + testcases := []testcase{ + {name: "no variable", path: "/testpath/", expectedPath: "/testpath/"}, + {name: "variable with regex", path: "/testpath/{param:[0-9]+}/", expectedPath: "/testpath//"}, + {name: "multiple variables with regex", path: "/testpath/{param:[0-9]+}/{other_param:[\\w]}", expectedPath: "/testpath//"}, + {name: "variable without regex", path: "/testpath/{param}/", expectedPath: "/testpath//"}, + {name: "multiple variable without regex", path: "/testpath/{param}/{other_Param}/", expectedPath: "/testpath///"}, + {name: "mixed variables", path: "/testpath/{param:[\\w]}/{otherParam}", expectedPath: "/testpath//"}, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.expectedPath, formatPathVars(tc.path)) + }) + } +} + +func BenchmarkHTTPMetrics(b *testing.B) { + defer leaktest.AfterTest(b)() + defer log.Scope(b).Close(b) + + b.Run("Metrics enabled", func(b *testing.B) { + b.StopTimer() + b.ResetTimer() + clusterSettings := cluster.MakeTestingClusterSettings() + serverHTTPMetricsEnabled.Override(context.Background(), &clusterSettings.SV, true) + serverMetrics := NewServerHttpMetrics(metric.NewRegistry(), clusterSettings) + server, router := newBenchmarkServer("/{param}/", serverMetrics) + defer server.Close() + r1, err := http.NewRequest("GET", server.URL+"/1/", nil) + require.NoError(b, err) + r2, err := http.NewRequest("GET", server.URL+"/2/", nil) + require.NoError(b, err) + r3, err := http.NewRequest("POST", server.URL+"/2/", nil) + require.NoError(b, err) + r4, err := http.NewRequest("PUT", server.URL+"/1/", nil) + require.NoError(b, err) + rr := httptest.NewRecorder() + b.StartTimer() + for i := 0; i < b.N; i++ { + router.ServeHTTP(rr, r1) + router.ServeHTTP(rr, r2) + router.ServeHTTP(rr, r3) + router.ServeHTTP(rr, r4) + } + require.Len(b, serverMetrics.RequestMetrics.ToPrometheusMetrics(), 2) + }) + + b.Run("Metrics disabled", func(b *testing.B) { + b.StopTimer() + b.ResetTimer() + clusterSettings := cluster.MakeTestingClusterSettings() + serverHTTPMetricsEnabled.Override(context.Background(), &clusterSettings.SV, false) + serverMetrics := NewServerHttpMetrics(metric.NewRegistry(), clusterSettings) + server, router := newBenchmarkServer("/{param}/", serverMetrics) + defer server.Close() + r1, err := http.NewRequest("GET", server.URL+"/1/", nil) + require.NoError(b, err) + r2, err := http.NewRequest("GET", server.URL+"/2/", nil) + require.NoError(b, err) + r3, err := http.NewRequest("POST", server.URL+"/2/", nil) + require.NoError(b, err) + r4, err := http.NewRequest("PUT", server.URL+"/1/", nil) + require.NoError(b, err) + rr := httptest.NewRecorder() + b.StartTimer() + for i := 0; i < b.N; i++ { + router.ServeHTTP(rr, r1) + router.ServeHTTP(rr, r2) + router.ServeHTTP(rr, r3) + router.ServeHTTP(rr, r4) + } + require.Len(b, serverMetrics.RequestMetrics.ToPrometheusMetrics(), 0) + }) + + b.Run("No Middleware", func(b *testing.B) { + b.StopTimer() + b.ResetTimer() + server, router := newBenchmarkServer("/{param}/", nil) + defer server.Close() + r1, err := http.NewRequest("GET", server.URL+"/1/", nil) + require.NoError(b, err) + r2, err := http.NewRequest("GET", server.URL+"/2/", nil) + require.NoError(b, err) + r3, err := http.NewRequest("POST", server.URL+"/2/", nil) + require.NoError(b, err) + r4, err := http.NewRequest("PUT", server.URL+"/1/", nil) + require.NoError(b, err) + rr := httptest.NewRecorder() + b.StartTimer() + for i := 0; i < b.N; i++ { + router.ServeHTTP(rr, r1) + router.ServeHTTP(rr, r2) + router.ServeHTTP(rr, r3) + router.ServeHTTP(rr, r4) + } + }) +} + +func newBenchmarkServer( + route string, serverMetrics *HttpServerMetrics, +) (*httptest.Server, *mux.Router) { + router := mux.NewRouter() + router.Handle(route, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(time.Millisecond) + w.WriteHeader(http.StatusOK) + })).Methods(http.MethodGet, http.MethodPost) + if serverMetrics != nil { + serverMetrics.registerMetricsMiddleware(router) + } + return httptest.NewServer(router), router +} +func assertPrometheusMetrics( + t *testing.T, metrics []*prometheusgo.Metric, expected map[string]uint64, +) { + t.Helper() + actual := map[string]*prometheusgo.Histogram{} + for _, m := range metrics { + var method, path, statusCode string + for _, l := range m.Label { + switch *l.Name { + case MethodLabel: + method = *l.Value + case PathLabel: + path = *l.Value + case StatusCodeLabel: + statusCode = *l.Value + } + } + histogram := m.Histogram + require.NotNil(t, histogram, "expected histogram") + key := fmt.Sprintf("%s %s %s", statusCode, method, path) + actual[key] = histogram + } + + for key, val := range expected { + histogram, ok := actual[key] + require.True(t, ok) + require.Greater(t, *histogram.SampleSum, float64(0), "expected `%s` to have a SampleSum > 0", key) + require.Equal(t, val, *histogram.SampleCount, "expected `%s` to have SampleCount of %d", key, val) + } +} diff --git a/pkg/sql/alter_default_privileges.go b/pkg/sql/alter_default_privileges.go index 64683b9c36ae..d47c020fc020 100644 --- a/pkg/sql/alter_default_privileges.go +++ b/pkg/sql/alter_default_privileges.go @@ -134,10 +134,12 @@ func (n *alterDefaultPrivilegesNode) startExec(params runParams) error { return err } + var hasAdmin bool + if hasAdmin, err = params.p.HasAdminRole(params.ctx); err != nil { + return err + } if n.n.ForAllRoles { - if hasAdmin, err := params.p.HasAdminRole(params.ctx); err != nil { - return err - } else if !hasAdmin { + if !hasAdmin { return pgerror.Newf(pgcode.InsufficientPrivilege, "only users with the admin role are allowed to ALTER DEFAULT PRIVILEGES FOR ALL ROLES") } @@ -145,7 +147,7 @@ func (n *alterDefaultPrivilegesNode) startExec(params runParams) error { // You can change default privileges only for objects that will be created // by yourself or by roles that you are a member of. for _, targetRole := range targetRoles { - if targetRole != params.p.User() { + if targetRole != params.p.User() && !hasAdmin { memberOf, err := params.p.MemberOfWithAdminOption(params.ctx, params.p.User()) if err != nil { return err @@ -153,7 +155,7 @@ func (n *alterDefaultPrivilegesNode) startExec(params runParams) error { if _, found := memberOf[targetRole]; !found { return pgerror.Newf(pgcode.InsufficientPrivilege, - "must be a member of %s", targetRole.Normalized()) + "must be an admin or member of %s", targetRole.Normalized()) } } } diff --git a/pkg/sql/alter_table.go b/pkg/sql/alter_table.go index 924d6b5316ab..861d979b035c 100644 --- a/pkg/sql/alter_table.go +++ b/pkg/sql/alter_table.go @@ -2330,8 +2330,17 @@ func checkSchemaChangeIsAllowed(desc catalog.TableDescriptor, n tree.Statement) if desc.IsSchemaLocked() && !tree.IsSetOrResetSchemaLocked(n) { return sqlerrors.NewSchemaChangeOnLockedTableErr(desc.GetName()) } - if len(desc.TableDesc().LDRJobIDs) > 0 && !tree.IsAllowedLDRSchemaChange(n) { - return sqlerrors.NewDisallowedSchemaChangeOnLDRTableErr(desc.GetName(), desc.TableDesc().LDRJobIDs) + if len(desc.TableDesc().LDRJobIDs) > 0 { + var virtualColNames []string + for _, col := range desc.NonDropColumns() { + if col.IsVirtual() { + virtualColNames = append(virtualColNames, col.GetName()) + } + } + if !tree.IsAllowedLDRSchemaChange(n, virtualColNames) { + return sqlerrors.NewDisallowedSchemaChangeOnLDRTableErr(desc.GetName(), desc.TableDesc().LDRJobIDs) + + } } return nil } diff --git a/pkg/sql/catalog/bootstrap/testdata/testdata b/pkg/sql/catalog/bootstrap/testdata/testdata index 5cffbf9621e9..c720aa8c9f7e 100644 --- a/pkg/sql/catalog/bootstrap/testdata/testdata +++ b/pkg/sql/catalog/bootstrap/testdata/testdata @@ -1,7 +1,7 @@ -system hash=f93eb889512719710d1c75bff8a77ce6ad6c4e837319053f6f149ca13749d710 +system hash=f02637ca2ab3fa50efc1a4884f2406b8a0aad72a3f6249c5461e6e922a2e2491 ---- [{"key":"8b"} -,{"key":"8b89898a89","value":"0312470a0673797374656d10011a250a0d0a0561646d696e1080101880100a0c0a04726f6f7410801018801012046e6f646518032200280140004a006a0a08d8843d1002180020167000"} +,{"key":"8b89898a89","value":"0312450a0673797374656d10011a250a0d0a0561646d696e1080101880100a0c0a04726f6f7410801018801012046e6f646518032200280140004a006a0808181002180020167000"} ,{"key":"8b898b8a89","value":"030a94030a0a64657363726970746f721803200128013a0042270a02696410011a0c08011040180030005014600020003000680070007800800100880100980100422f0a0a64657363726970746f7210021a0c08081000180030005011600020013000680070007800800100880100980100480352710a077072696d61727910011801220269642a0a64657363726970746f72300140004a10080010001a00200028003000380040005a0070027a0408002000800100880100900104980101a20106080012001800a80100b20100ba0100c00100c80100d00101e00100e901000000000000000060026a210a0b0a0561646d696e102018200a0a0a04726f6f741020182012046e6f64651803800101880103980100b201130a077072696d61727910001a02696420012800b201240a1066616d5f325f64657363726970746f7210021a0a64657363726970746f7220022802b80103c20100e80100f2010408001200f801008002009202009a0200b20200b80200c0021dc80200e00200800300880302a80300b00300d00300d80300e00300f80300880400"} ,{"key":"8b898c8a89","value":"030acd050a0575736572731804200128013a00422d0a08757365726e616d6510011a0c0807100018003000501960002000300068007000780080010088010098010042330a0e68617368656450617373776f726410021a0c0808100018003000501160002001300068007000780080010088010098010042320a066973526f6c6510031a0c08001000180030005010600020002a0566616c73653000680070007800800100880100980100422c0a07757365725f696410041a0c080c100018003000501a60002000300068007000780080010088010098010048055290010a077072696d617279100118012208757365726e616d652a0e68617368656450617373776f72642a066973526f6c652a07757365725f6964300140004a10080010001a00200028003000380040005a007002700370047a0408002000800100880100900104980101a20106080012001800a80100b20100ba0100c00100c80100d00102e00100e90100000000000000005a740a1175736572735f757365725f69645f696478100218012207757365725f69643004380140004a10080010001a00200028003000380040005a007a0408002000800100880100900103980100a20106080012001800a80100b20100ba0100c00100c80100d00101e00100e901000000000000000060036a250a0d0a0561646d696e10e00318e0030a0c0a04726f6f7410e00318e00312046e6f64651803800101880103980100b201240a077072696d61727910001a08757365726e616d651a07757365725f6964200120042804b2012c0a1466616d5f325f68617368656450617373776f726410021a0e68617368656450617373776f726420022802b2011c0a0c66616d5f335f6973526f6c6510031a066973526f6c6520032803b80104c20100e80100f2010408001200f801008002009202009a0200b20200b80200c0021dc80200e00200800300880303a80300b00300d00300d80300e00300f80300880400"} ,{"key":"8b898d8a89","value":"030a83030a057a6f6e65731805200128013a0042270a02696410011a0c08011040180030005014600020003000680070007800800100880100980100422b0a06636f6e66696710021a0c080810001800300050116000200130006800700078008001008801009801004803526d0a077072696d61727910011801220269642a06636f6e666967300140004a10080010001a00200028003000380040005a0070027a0408002000800100880100900104980101a20106080012001800a80100b20100ba0100c00100c80100d00101e00100e901000000000000000060026a250a0d0a0561646d696e10e00318e0030a0c0a04726f6f7410e00318e00312046e6f64651803800101880103980100b201130a077072696d61727910001a02696420012800b2011c0a0c66616d5f325f636f6e66696710021a06636f6e66696720022802b80103c20100e80100f2010408001200f801008002009202009a0200b20200b80200c0021dc80200e00200800300880302a80300b00300d00300d80300e00300f80300880400"} @@ -198,10 +198,10 @@ system hash=f93eb889512719710d1c75bff8a77ce6ad6c4e837319053f6f149ca13749d710 ,{"key":"cb"} ] -tenant hash=ec31fb2e5b85fbb8da0beded6f174ff0a8196088aebe8bf5cdeacb07689b6d6a +tenant hash=e025f38b283dfb401584c95355095420047d10496ec2e9bf009b4a7d8fd09b5c ---- [{"key":""} -,{"key":"8b89898a89","value":"0312470a0673797374656d10011a250a0d0a0561646d696e1080101880100a0c0a04726f6f7410801018801012046e6f646518032200280140004a006a0a08d8843d1002180020167000"} +,{"key":"8b89898a89","value":"0312450a0673797374656d10011a250a0d0a0561646d696e1080101880100a0c0a04726f6f7410801018801012046e6f646518032200280140004a006a0808181002180020167000"} ,{"key":"8b898b8a89","value":"030a94030a0a64657363726970746f721803200128013a0042270a02696410011a0c08011040180030005014600020003000680070007800800100880100980100422f0a0a64657363726970746f7210021a0c08081000180030005011600020013000680070007800800100880100980100480352710a077072696d61727910011801220269642a0a64657363726970746f72300140004a10080010001a00200028003000380040005a0070027a0408002000800100880100900104980101a20106080012001800a80100b20100ba0100c00100c80100d00101e00100e901000000000000000060026a210a0b0a0561646d696e102018200a0a0a04726f6f741020182012046e6f64651803800101880103980100b201130a077072696d61727910001a02696420012800b201240a1066616d5f325f64657363726970746f7210021a0a64657363726970746f7220022802b80103c20100e80100f2010408001200f801008002009202009a0200b20200b80200c0021dc80200e00200800300880302a80300b00300d00300d80300e00300f80300880400"} ,{"key":"8b898c8a89","value":"030acd050a0575736572731804200128013a00422d0a08757365726e616d6510011a0c0807100018003000501960002000300068007000780080010088010098010042330a0e68617368656450617373776f726410021a0c0808100018003000501160002001300068007000780080010088010098010042320a066973526f6c6510031a0c08001000180030005010600020002a0566616c73653000680070007800800100880100980100422c0a07757365725f696410041a0c080c100018003000501a60002000300068007000780080010088010098010048055290010a077072696d617279100118012208757365726e616d652a0e68617368656450617373776f72642a066973526f6c652a07757365725f6964300140004a10080010001a00200028003000380040005a007002700370047a0408002000800100880100900104980101a20106080012001800a80100b20100ba0100c00100c80100d00102e00100e90100000000000000005a740a1175736572735f757365725f69645f696478100218012207757365725f69643004380140004a10080010001a00200028003000380040005a007a0408002000800100880100900103980100a20106080012001800a80100b20100ba0100c00100c80100d00101e00100e901000000000000000060036a250a0d0a0561646d696e10e00318e0030a0c0a04726f6f7410e00318e00312046e6f64651803800101880103980100b201240a077072696d61727910001a08757365726e616d651a07757365725f6964200120042804b2012c0a1466616d5f325f68617368656450617373776f726410021a0e68617368656450617373776f726420022802b2011c0a0c66616d5f335f6973526f6c6510031a066973526f6c6520032803b80104c20100e80100f2010408001200f801008002009202009a0200b20200b80200c0021dc80200e00200800300880303a80300b00300d00300d80300e00300f80300880400"} ,{"key":"8b898d8a89","value":"030a83030a057a6f6e65731805200128013a0042270a02696410011a0c08011040180030005014600020003000680070007800800100880100980100422b0a06636f6e66696710021a0c080810001800300050116000200130006800700078008001008801009801004803526d0a077072696d61727910011801220269642a06636f6e666967300140004a10080010001a00200028003000380040005a0070027a0408002000800100880100900104980101a20106080012001800a80100b20100ba0100c00100c80100d00101e00100e901000000000000000060026a250a0d0a0561646d696e10e00318e0030a0c0a04726f6f7410e00318e00312046e6f64651803800101880103980100b201130a077072696d61727910001a02696420012800b2011c0a0c66616d5f325f636f6e66696710021a06636f6e66696720022802b80103c20100e80100f2010408001200f801008002009202009a0200b20200b80200c0021dc80200e00200800300880302a80300b00300d00300d80300e00300f80300880400"} diff --git a/pkg/sql/catalog/systemschema_test/testdata/bootstrap_system b/pkg/sql/catalog/systemschema_test/testdata/bootstrap_system index c4269bef4d09..5f2f1074e4c2 100644 --- a/pkg/sql/catalog/systemschema_test/testdata/bootstrap_system +++ b/pkg/sql/catalog/systemschema_test/testdata/bootstrap_system @@ -674,7 +674,7 @@ schema_telemetry ---- {"database":{"name":"defaultdb","id":100,"modificationTime":{"wallTime":"0"},"version":"1","privileges":{"users":[{"userProto":"admin","privileges":"2","withGrantOption":"2"},{"userProto":"public","privileges":"2048"},{"userProto":"root","privileges":"2","withGrantOption":"2"}],"ownerProto":"root","version":3},"schemas":{"public":{"id":101}},"defaultPrivileges":{}}} {"database":{"name":"postgres","id":102,"modificationTime":{"wallTime":"0"},"version":"1","privileges":{"users":[{"userProto":"admin","privileges":"2","withGrantOption":"2"},{"userProto":"public","privileges":"2048"},{"userProto":"root","privileges":"2","withGrantOption":"2"}],"ownerProto":"root","version":3},"schemas":{"public":{"id":103}},"defaultPrivileges":{}}} -{"database":{"name":"system","id":1,"modificationTime":{"wallTime":"0"},"version":"1","privileges":{"users":[{"userProto":"admin","privileges":"2048","withGrantOption":"2048"},{"userProto":"root","privileges":"2048","withGrantOption":"2048"}],"ownerProto":"node","version":3},"systemDatabaseSchemaVersion":{"majorVal":1000024,"minorVal":2,"internal":22}}} +{"database":{"name":"system","id":1,"modificationTime":{"wallTime":"0"},"version":"1","privileges":{"users":[{"userProto":"admin","privileges":"2048","withGrantOption":"2048"},{"userProto":"root","privileges":"2048","withGrantOption":"2048"}],"ownerProto":"node","version":3},"systemDatabaseSchemaVersion":{"majorVal":24,"minorVal":2,"internal":22}}} {"table":{"name":"comments","id":24,"version":"1","modificationTime":{},"parentId":1,"unexposedParentSchemaId":29,"columns":[{"name":"type","id":1,"type":{"family":"IntFamily","width":64,"oid":20}},{"name":"object_id","id":2,"type":{"family":"IntFamily","width":64,"oid":20}},{"name":"sub_id","id":3,"type":{"family":"IntFamily","width":64,"oid":20}},{"name":"comment","id":4,"type":{"family":"StringFamily","oid":25}}],"nextColumnId":5,"families":[{"name":"primary","columnNames":["type","object_id","sub_id"],"columnIds":[1,2,3]},{"name":"fam_4_comment","id":4,"columnNames":["comment"],"columnIds":[4],"defaultColumnId":4}],"nextFamilyId":5,"primaryIndex":{"name":"primary","id":1,"unique":true,"version":4,"keyColumnNames":["type","object_id","sub_id"],"keyColumnDirections":["ASC","ASC","ASC"],"storeColumnNames":["comment"],"keyColumnIds":[1,2,3],"storeColumnIds":[4],"foreignKey":{},"interleave":{},"partitioning":{},"encodingType":1,"sharded":{},"geoConfig":{},"constraintId":1},"nextIndexId":2,"privileges":{"users":[{"userProto":"admin","privileges":"480","withGrantOption":"480"},{"userProto":"public","privileges":"32"},{"userProto":"root","privileges":"480","withGrantOption":"480"}],"ownerProto":"node","version":3},"nextMutationId":1,"formatVersion":3,"replacementOf":{"time":{}},"createAsOfTime":{},"nextConstraintId":2}} {"table":{"name":"database_role_settings","id":44,"version":"1","modificationTime":{},"parentId":1,"unexposedParentSchemaId":29,"columns":[{"name":"database_id","id":1,"type":{"family":"OidFamily","oid":26}},{"name":"role_name","id":2,"type":{"family":"StringFamily","oid":25}},{"name":"settings","id":3,"type":{"family":"ArrayFamily","arrayElemType":"StringFamily","oid":1009,"arrayContents":{"family":"StringFamily","oid":25}}},{"name":"role_id","id":4,"type":{"family":"OidFamily","oid":26}}],"nextColumnId":5,"families":[{"name":"primary","columnNames":["database_id","role_name","settings","role_id"],"columnIds":[1,2,3,4]}],"nextFamilyId":1,"primaryIndex":{"name":"primary","id":1,"unique":true,"version":4,"keyColumnNames":["database_id","role_name"],"keyColumnDirections":["ASC","ASC"],"storeColumnNames":["settings","role_id"],"keyColumnIds":[1,2],"storeColumnIds":[3,4],"foreignKey":{},"interleave":{},"partitioning":{},"encodingType":1,"sharded":{},"geoConfig":{},"constraintId":2},"indexes":[{"name":"database_role_settings_database_id_role_id_key","id":2,"unique":true,"version":3,"keyColumnNames":["database_id","role_id"],"keyColumnDirections":["ASC","ASC"],"storeColumnNames":["settings"],"keyColumnIds":[1,4],"keySuffixColumnIds":[2],"storeColumnIds":[3],"foreignKey":{},"interleave":{},"partitioning":{},"sharded":{},"geoConfig":{},"constraintId":1}],"nextIndexId":3,"privileges":{"users":[{"userProto":"admin","privileges":"480","withGrantOption":"480"},{"userProto":"root","privileges":"480","withGrantOption":"480"}],"ownerProto":"node","version":3},"nextMutationId":1,"formatVersion":3,"replacementOf":{"time":{}},"createAsOfTime":{},"nextConstraintId":3}} {"table":{"name":"descriptor","id":3,"version":"1","modificationTime":{},"parentId":1,"unexposedParentSchemaId":29,"columns":[{"name":"id","id":1,"type":{"family":"IntFamily","width":64,"oid":20}},{"name":"descriptor","id":2,"type":{"family":"BytesFamily","oid":17},"nullable":true}],"nextColumnId":3,"families":[{"name":"primary","columnNames":["id"],"columnIds":[1]},{"name":"fam_2_descriptor","id":2,"columnNames":["descriptor"],"columnIds":[2],"defaultColumnId":2}],"nextFamilyId":3,"primaryIndex":{"name":"primary","id":1,"unique":true,"version":4,"keyColumnNames":["id"],"keyColumnDirections":["ASC"],"storeColumnNames":["descriptor"],"keyColumnIds":[1],"storeColumnIds":[2],"foreignKey":{},"interleave":{},"partitioning":{},"encodingType":1,"sharded":{},"geoConfig":{},"constraintId":1},"nextIndexId":2,"privileges":{"users":[{"userProto":"admin","privileges":"32","withGrantOption":"32"},{"userProto":"root","privileges":"32","withGrantOption":"32"}],"ownerProto":"node","version":3},"nextMutationId":1,"formatVersion":3,"replacementOf":{"time":{}},"createAsOfTime":{},"nextConstraintId":2}} diff --git a/pkg/sql/catalog/systemschema_test/testdata/bootstrap_tenant b/pkg/sql/catalog/systemschema_test/testdata/bootstrap_tenant index c4269bef4d09..5f2f1074e4c2 100644 --- a/pkg/sql/catalog/systemschema_test/testdata/bootstrap_tenant +++ b/pkg/sql/catalog/systemschema_test/testdata/bootstrap_tenant @@ -674,7 +674,7 @@ schema_telemetry ---- {"database":{"name":"defaultdb","id":100,"modificationTime":{"wallTime":"0"},"version":"1","privileges":{"users":[{"userProto":"admin","privileges":"2","withGrantOption":"2"},{"userProto":"public","privileges":"2048"},{"userProto":"root","privileges":"2","withGrantOption":"2"}],"ownerProto":"root","version":3},"schemas":{"public":{"id":101}},"defaultPrivileges":{}}} {"database":{"name":"postgres","id":102,"modificationTime":{"wallTime":"0"},"version":"1","privileges":{"users":[{"userProto":"admin","privileges":"2","withGrantOption":"2"},{"userProto":"public","privileges":"2048"},{"userProto":"root","privileges":"2","withGrantOption":"2"}],"ownerProto":"root","version":3},"schemas":{"public":{"id":103}},"defaultPrivileges":{}}} -{"database":{"name":"system","id":1,"modificationTime":{"wallTime":"0"},"version":"1","privileges":{"users":[{"userProto":"admin","privileges":"2048","withGrantOption":"2048"},{"userProto":"root","privileges":"2048","withGrantOption":"2048"}],"ownerProto":"node","version":3},"systemDatabaseSchemaVersion":{"majorVal":1000024,"minorVal":2,"internal":22}}} +{"database":{"name":"system","id":1,"modificationTime":{"wallTime":"0"},"version":"1","privileges":{"users":[{"userProto":"admin","privileges":"2048","withGrantOption":"2048"},{"userProto":"root","privileges":"2048","withGrantOption":"2048"}],"ownerProto":"node","version":3},"systemDatabaseSchemaVersion":{"majorVal":24,"minorVal":2,"internal":22}}} {"table":{"name":"comments","id":24,"version":"1","modificationTime":{},"parentId":1,"unexposedParentSchemaId":29,"columns":[{"name":"type","id":1,"type":{"family":"IntFamily","width":64,"oid":20}},{"name":"object_id","id":2,"type":{"family":"IntFamily","width":64,"oid":20}},{"name":"sub_id","id":3,"type":{"family":"IntFamily","width":64,"oid":20}},{"name":"comment","id":4,"type":{"family":"StringFamily","oid":25}}],"nextColumnId":5,"families":[{"name":"primary","columnNames":["type","object_id","sub_id"],"columnIds":[1,2,3]},{"name":"fam_4_comment","id":4,"columnNames":["comment"],"columnIds":[4],"defaultColumnId":4}],"nextFamilyId":5,"primaryIndex":{"name":"primary","id":1,"unique":true,"version":4,"keyColumnNames":["type","object_id","sub_id"],"keyColumnDirections":["ASC","ASC","ASC"],"storeColumnNames":["comment"],"keyColumnIds":[1,2,3],"storeColumnIds":[4],"foreignKey":{},"interleave":{},"partitioning":{},"encodingType":1,"sharded":{},"geoConfig":{},"constraintId":1},"nextIndexId":2,"privileges":{"users":[{"userProto":"admin","privileges":"480","withGrantOption":"480"},{"userProto":"public","privileges":"32"},{"userProto":"root","privileges":"480","withGrantOption":"480"}],"ownerProto":"node","version":3},"nextMutationId":1,"formatVersion":3,"replacementOf":{"time":{}},"createAsOfTime":{},"nextConstraintId":2}} {"table":{"name":"database_role_settings","id":44,"version":"1","modificationTime":{},"parentId":1,"unexposedParentSchemaId":29,"columns":[{"name":"database_id","id":1,"type":{"family":"OidFamily","oid":26}},{"name":"role_name","id":2,"type":{"family":"StringFamily","oid":25}},{"name":"settings","id":3,"type":{"family":"ArrayFamily","arrayElemType":"StringFamily","oid":1009,"arrayContents":{"family":"StringFamily","oid":25}}},{"name":"role_id","id":4,"type":{"family":"OidFamily","oid":26}}],"nextColumnId":5,"families":[{"name":"primary","columnNames":["database_id","role_name","settings","role_id"],"columnIds":[1,2,3,4]}],"nextFamilyId":1,"primaryIndex":{"name":"primary","id":1,"unique":true,"version":4,"keyColumnNames":["database_id","role_name"],"keyColumnDirections":["ASC","ASC"],"storeColumnNames":["settings","role_id"],"keyColumnIds":[1,2],"storeColumnIds":[3,4],"foreignKey":{},"interleave":{},"partitioning":{},"encodingType":1,"sharded":{},"geoConfig":{},"constraintId":2},"indexes":[{"name":"database_role_settings_database_id_role_id_key","id":2,"unique":true,"version":3,"keyColumnNames":["database_id","role_id"],"keyColumnDirections":["ASC","ASC"],"storeColumnNames":["settings"],"keyColumnIds":[1,4],"keySuffixColumnIds":[2],"storeColumnIds":[3],"foreignKey":{},"interleave":{},"partitioning":{},"sharded":{},"geoConfig":{},"constraintId":1}],"nextIndexId":3,"privileges":{"users":[{"userProto":"admin","privileges":"480","withGrantOption":"480"},{"userProto":"root","privileges":"480","withGrantOption":"480"}],"ownerProto":"node","version":3},"nextMutationId":1,"formatVersion":3,"replacementOf":{"time":{}},"createAsOfTime":{},"nextConstraintId":3}} {"table":{"name":"descriptor","id":3,"version":"1","modificationTime":{},"parentId":1,"unexposedParentSchemaId":29,"columns":[{"name":"id","id":1,"type":{"family":"IntFamily","width":64,"oid":20}},{"name":"descriptor","id":2,"type":{"family":"BytesFamily","oid":17},"nullable":true}],"nextColumnId":3,"families":[{"name":"primary","columnNames":["id"],"columnIds":[1]},{"name":"fam_2_descriptor","id":2,"columnNames":["descriptor"],"columnIds":[2],"defaultColumnId":2}],"nextFamilyId":3,"primaryIndex":{"name":"primary","id":1,"unique":true,"version":4,"keyColumnNames":["id"],"keyColumnDirections":["ASC"],"storeColumnNames":["descriptor"],"keyColumnIds":[1],"storeColumnIds":[2],"foreignKey":{},"interleave":{},"partitioning":{},"encodingType":1,"sharded":{},"geoConfig":{},"constraintId":1},"nextIndexId":2,"privileges":{"users":[{"userProto":"admin","privileges":"32","withGrantOption":"32"},{"userProto":"root","privileges":"32","withGrantOption":"32"}],"ownerProto":"node","version":3},"nextMutationId":1,"formatVersion":3,"replacementOf":{"time":{}},"createAsOfTime":{},"nextConstraintId":2}} diff --git a/pkg/sql/catalog/tabledesc/logical_replication_helpers.go b/pkg/sql/catalog/tabledesc/logical_replication_helpers.go index d9f36606d9c7..75a48d0627b8 100644 --- a/pkg/sql/catalog/tabledesc/logical_replication_helpers.go +++ b/pkg/sql/catalog/tabledesc/logical_replication_helpers.go @@ -6,6 +6,7 @@ package tabledesc import ( + "bytes" "cmp" "slices" "strings" @@ -14,6 +15,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" + "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/errors" ) @@ -52,7 +54,31 @@ func CheckLogicalReplicationCompatibility( return pgerror.Wrapf(err, pgcode.InvalidTableDefinition, cannotLDRMsg) } } + if err := checkOutboundReferences(dst); err != nil { + return pgerror.Wrapf(err, pgcode.InvalidTableDefinition, cannotLDRMsg) + } + + return nil +} +// checkOutboundReferences verifies that the table descriptor does not +// reference any user-defined functions, sequences, or triggers. +func checkOutboundReferences(dst *descpb.TableDescriptor) error { + for _, col := range dst.Columns { + if len(col.UsesSequenceIds) > 0 { + return errors.Newf("table %s references sequences with IDs %v", dst.Name, col.UsesSequenceIds) + } + if len(col.UsesFunctionIds) > 0 { + return errors.Newf("table %s references functions with IDs %v", dst.Name, col.UsesFunctionIds) + } + } + if len(dst.Triggers) > 0 { + triggerNames := make([]string, len(dst.Triggers)) + for i, trigger := range dst.Triggers { + triggerNames[i] = trigger.Name + } + return errors.Newf("table %s references triggers [%s]", dst.Name, strings.Join(triggerNames, ", ")) + } return nil } @@ -179,20 +205,76 @@ func checkSrcDstColsMatch(src *descpb.TableDescriptor, dst *descpb.TableDescript ) } - if dstCol.Type.UserDefined() { + if err := checkTypesMatch(srcCol.Type, dstCol.Type); err != nil { + return errors.Wrapf(err, + "destination table %s column %s has type %s, but the source table %s has type %s", + dst.Name, dstCol.Name, dstCol.Type.SQLStringForError(), src.Name, srcCol.Type.SQLStringForError(), + ) + } + } + return nil +} + +// checkTypesMatch checks that the source and destination types match. Enums +// need to be equal in both physical and logical representations. +func checkTypesMatch(srcTyp *types.T, dstTyp *types.T) error { + switch { + case dstTyp.TypeMeta.EnumData != nil: + if srcTyp.TypeMeta.EnumData == nil { return errors.Newf( - "destination table %s column %s has user-defined type %s", - dst.Name, dstCol.Name, dstCol.Type.SQLStringForError(), + "destination type %s is an ENUM, but the source type %s is not", + dstTyp.SQLStringForError(), srcTyp.SQLStringForError(), + ) + } + if !slices.Equal(srcTyp.TypeMeta.EnumData.LogicalRepresentations, dstTyp.TypeMeta.EnumData.LogicalRepresentations) { + return errors.Newf( + "destination type %s has logical representations %v, but the source type %s has %v", + dstTyp.SQLStringForError(), dstTyp.TypeMeta.EnumData.LogicalRepresentations, + srcTyp.SQLStringForError(), srcTyp.TypeMeta.EnumData.LogicalRepresentations, + ) + } + if !slices.EqualFunc( + srcTyp.TypeMeta.EnumData.PhysicalRepresentations, dstTyp.TypeMeta.EnumData.PhysicalRepresentations, + func(x, y []byte) bool { return bytes.Equal(x, y) }, + ) { + return errors.Newf( + "destination type %s and source type %s have mismatched physical representations", + dstTyp.SQLStringForError(), srcTyp.SQLStringForError(), ) } - if !srcCol.Type.Identical(dstCol.Type) { + case len(dstTyp.TupleContents()) > 0: + if len(srcTyp.TupleContents()) == 0 { return errors.Newf( - "destination table %s column %s has type %s, but the source table %s has type %s", - dst.Name, dstCol.Name, dstCol.Type.SQLStringForError(), src.Name, srcCol.Type.SQLStringForError(), + "destination type %s is a tuple, but the source type %s is not", + dstTyp.SQLStringForError(), srcTyp.SQLStringForError(), + ) + } + if len(dstTyp.TupleContents()) != len(srcTyp.TupleContents()) { + return errors.Newf( + "destination type %s has %d tuple elements, but the source type %s has %d tuple elements", + dstTyp.SQLStringForError(), len(dstTyp.TupleContents()), + srcTyp.SQLStringForError(), len(srcTyp.TupleContents()), + ) + } + for i := range dstTyp.TupleContents() { + if err := checkTypesMatch(srcTyp.TupleContents()[i], dstTyp.TupleContents()[i]); err != nil { + return errors.Wrapf(err, + "destination type %s tuple element %d does not match source type %s tuple element %d", + dstTyp.SQLStringForError(), i, srcTyp.SQLStringForError(), i, + ) + } + } + + default: + if !srcTyp.Identical(dstTyp) { + return errors.Newf( + "destination type %s does not match source type %s", + dstTyp.SQLStringForError(), srcTyp.SQLStringForError(), ) } } + return nil } diff --git a/pkg/sql/conn_executor.go b/pkg/sql/conn_executor.go index ead2b0e60bc7..f9996e9ce514 100644 --- a/pkg/sql/conn_executor.go +++ b/pkg/sql/conn_executor.go @@ -4435,6 +4435,8 @@ type StatementCounters struct { UpdateCount telemetry.CounterWithMetric InsertCount telemetry.CounterWithMetric DeleteCount telemetry.CounterWithMetric + // CRUDQueryCount includes all 4 CRUD statements above. + CRUDQueryCount telemetry.CounterWithMetric // Transaction operations. TxnBeginCount telemetry.CounterWithMetric @@ -4499,6 +4501,8 @@ func makeStartedStatementCounters(internal bool) StatementCounters { getMetricMeta(MetaInsertStarted, internal)), DeleteCount: telemetry.NewCounterWithMetric( getMetricMeta(MetaDeleteStarted, internal)), + CRUDQueryCount: telemetry.NewCounterWithMetric( + getMetricMeta(MetaCRUDStarted, internal)), DdlCount: telemetry.NewCounterWithMetric( getMetricMeta(MetaDdlStarted, internal)), CopyCount: telemetry.NewCounterWithMetric( @@ -4542,6 +4546,8 @@ func makeExecutedStatementCounters(internal bool) StatementCounters { getMetricMeta(MetaInsertExecuted, internal)), DeleteCount: telemetry.NewCounterWithMetric( getMetricMeta(MetaDeleteExecuted, internal)), + CRUDQueryCount: telemetry.NewCounterWithMetric( + getMetricMeta(MetaCRUDExecuted, internal)), DdlCount: telemetry.NewCounterWithMetric( getMetricMeta(MetaDdlExecuted, internal)), CopyCount: telemetry.NewCounterWithMetric( @@ -4562,12 +4568,16 @@ func (sc *StatementCounters) incrementCount(ex *connExecutor, stmt tree.Statemen sc.TxnBeginCount.Inc() case *tree.Select: sc.SelectCount.Inc() + sc.CRUDQueryCount.Inc() case *tree.Update: sc.UpdateCount.Inc() + sc.CRUDQueryCount.Inc() case *tree.Insert: sc.InsertCount.Inc() + sc.CRUDQueryCount.Inc() case *tree.Delete: sc.DeleteCount.Inc() + sc.CRUDQueryCount.Inc() case *tree.CommitTransaction: sc.TxnCommitCount.Inc() case *tree.RollbackTransaction: diff --git a/pkg/sql/conn_executor_exec.go b/pkg/sql/conn_executor_exec.go index e4420bbd8e52..fb604beedc22 100644 --- a/pkg/sql/conn_executor_exec.go +++ b/pkg/sql/conn_executor_exec.go @@ -505,7 +505,7 @@ func (ex *connExecutor) execStmtInOpenState( if notice, err := ex.server.cfg.LicenseEnforcer.MaybeFailIfThrottled(ctx, curOpen); err != nil { return makeErrEvent(err) } else if notice != nil { - res.BufferNotice(notice) + p.BufferClientNotice(ctx, notice) } } } diff --git a/pkg/sql/exec_log.go b/pkg/sql/exec_log.go index 86f167465613..1ecb31dd94ab 100644 --- a/pkg/sql/exec_log.go +++ b/pkg/sql/exec_log.go @@ -359,58 +359,56 @@ func (p *planner) maybeLogStatementInternal( defer releaseSampledQuery(sampledQuery) *sampledQuery = eventpb.SampledQuery{ - CommonSQLExecDetails: execDetails, - SkippedQueries: skippedQueries, - CostEstimate: p.curPlan.instrumentation.costEstimate, - Distribution: p.curPlan.instrumentation.distribution.String(), - PlanGist: p.curPlan.instrumentation.planGist.String(), - SessionID: p.extendedEvalCtx.SessionID.String(), - Database: p.CurrentDatabase(), - StatementID: p.stmt.QueryID.String(), - TransactionID: txnID, - StatementFingerprintID: stmtFingerprintID.String(), - MaxFullScanRowsEstimate: p.curPlan.instrumentation.maxFullScanRows, - TotalScanRowsEstimate: p.curPlan.instrumentation.totalScanRows, - OutputRowsEstimate: p.curPlan.instrumentation.outputRows, - StatsAvailable: p.curPlan.instrumentation.statsAvailable, - NanosSinceStatsCollected: int64(p.curPlan.instrumentation.nanosSinceStatsCollected), - BytesRead: p.curPlan.instrumentation.topLevelStats.bytesRead, - RowsRead: p.curPlan.instrumentation.topLevelStats.rowsRead, - RowsWritten: p.curPlan.instrumentation.topLevelStats.rowsWritten, - InnerJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.InnerJoin]), - LeftOuterJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.LeftOuterJoin]), - FullOuterJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.FullOuterJoin]), - SemiJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.LeftSemiJoin]), - AntiJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.LeftAntiJoin]), - IntersectAllJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.IntersectAllJoin]), - ExceptAllJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.ExceptAllJoin]), - HashJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.HashJoin]), - CrossJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.CrossJoin]), - IndexJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.IndexJoin]), - LookupJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.LookupJoin]), - MergeJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.MergeJoin]), - InvertedJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.InvertedJoin]), - ApplyJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.ApplyJoin]), - ZigZagJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.ZigZagJoin]), - ContentionNanos: queryLevelStats.ContentionTime.Nanoseconds(), - Regions: queryLevelStats.Regions, - SQLInstanceIDs: queryLevelStats.SQLInstanceIDs, - KVNodeIDs: queryLevelStats.KVNodeIDs, - UsedFollowerRead: queryLevelStats.UsedFollowerRead, - NetworkBytesSent: queryLevelStats.NetworkBytesSent, - MaxMemUsage: queryLevelStats.MaxMemUsage, - MaxDiskUsage: queryLevelStats.MaxDiskUsage, - KVBytesRead: queryLevelStats.KVBytesRead, - KVPairsRead: queryLevelStats.KVPairsRead, - KVRowsRead: queryLevelStats.KVRowsRead, - KvTimeNanos: queryLevelStats.KVTime.Nanoseconds(), - KvGrpcCalls: queryLevelStats.KVBatchRequestsIssued, - NetworkMessages: queryLevelStats.NetworkMessages, - CpuTimeNanos: queryLevelStats.CPUTime.Nanoseconds(), - IndexRecommendations: indexRecs, - // TODO(mgartner): Use a slice of struct{uint64, uint64} instead of - // converting to strings. - Indexes: p.curPlan.instrumentation.indexesUsed.Strings(), + CommonSQLExecDetails: execDetails, + SkippedQueries: skippedQueries, + CostEstimate: p.curPlan.instrumentation.costEstimate, + Distribution: p.curPlan.instrumentation.distribution.String(), + PlanGist: p.curPlan.instrumentation.planGist.String(), + SessionID: p.extendedEvalCtx.SessionID.String(), + Database: p.CurrentDatabase(), + StatementID: p.stmt.QueryID.String(), + TransactionID: txnID, + StatementFingerprintID: stmtFingerprintID.String(), + MaxFullScanRowsEstimate: p.curPlan.instrumentation.maxFullScanRows, + TotalScanRowsEstimate: p.curPlan.instrumentation.totalScanRows, + OutputRowsEstimate: p.curPlan.instrumentation.outputRows, + StatsAvailable: p.curPlan.instrumentation.statsAvailable, + NanosSinceStatsCollected: int64(p.curPlan.instrumentation.nanosSinceStatsCollected), + BytesRead: p.curPlan.instrumentation.topLevelStats.bytesRead, + RowsRead: p.curPlan.instrumentation.topLevelStats.rowsRead, + RowsWritten: p.curPlan.instrumentation.topLevelStats.rowsWritten, + InnerJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.InnerJoin]), + LeftOuterJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.LeftOuterJoin]), + FullOuterJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.FullOuterJoin]), + SemiJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.LeftSemiJoin]), + AntiJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.LeftAntiJoin]), + IntersectAllJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.IntersectAllJoin]), + ExceptAllJoinCount: int64(p.curPlan.instrumentation.joinTypeCounts[descpb.ExceptAllJoin]), + HashJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.HashJoin]), + CrossJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.CrossJoin]), + IndexJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.IndexJoin]), + LookupJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.LookupJoin]), + MergeJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.MergeJoin]), + InvertedJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.InvertedJoin]), + ApplyJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.ApplyJoin]), + ZigZagJoinCount: int64(p.curPlan.instrumentation.joinAlgorithmCounts[exec.ZigZagJoin]), + ContentionNanos: queryLevelStats.ContentionTime.Nanoseconds(), + Regions: queryLevelStats.Regions, + SQLInstanceIDs: queryLevelStats.SQLInstanceIDs, + KVNodeIDs: queryLevelStats.KVNodeIDs, + UsedFollowerRead: queryLevelStats.UsedFollowerRead, + NetworkBytesSent: queryLevelStats.NetworkBytesSent, + MaxMemUsage: queryLevelStats.MaxMemUsage, + MaxDiskUsage: queryLevelStats.MaxDiskUsage, + KVBytesRead: queryLevelStats.KVBytesRead, + KVPairsRead: queryLevelStats.KVPairsRead, + KVRowsRead: queryLevelStats.KVRowsRead, + KvTimeNanos: queryLevelStats.KVTime.Nanoseconds(), + KvGrpcCalls: queryLevelStats.KVBatchRequestsIssued, + NetworkMessages: queryLevelStats.NetworkMessages, + CpuTimeNanos: queryLevelStats.CPUTime.Nanoseconds(), + IndexRecommendations: indexRecs, + Indexes: p.curPlan.instrumentation.indexesUsed, ScanCount: int64(p.curPlan.instrumentation.scanCounts[exec.ScanCount]), ScanWithStatsCount: int64(p.curPlan.instrumentation.scanCounts[exec.ScanWithStatsCount]), ScanWithStatsForecastCount: int64(p.curPlan.instrumentation.scanCounts[exec.ScanWithStatsForecastCount]), diff --git a/pkg/sql/exec_util.go b/pkg/sql/exec_util.go index 55ce0f7652f3..057cf70cd005 100644 --- a/pkg/sql/exec_util.go +++ b/pkg/sql/exec_util.go @@ -833,7 +833,7 @@ var ( // Below are the metadata for the statement started counters. MetaQueryStarted = metric.Metadata{ Name: "sql.query.started.count", - Help: "Number of SQL queries started", + Help: "Number of SQL operations started including queries, and transaction control statements", Measurement: "SQL Statements", Unit: metric.Unit_COUNT, } @@ -885,6 +885,12 @@ var ( Measurement: "SQL Statements", Unit: metric.Unit_COUNT, } + MetaCRUDStarted = metric.Metadata{ + Name: "sql.crud_query.started.count", + Help: "Number of SQL SELECT, INSERT, UPDATE, DELETE statements started", + Measurement: "SQL Statements", + Unit: metric.Unit_COUNT, + } MetaSavepointStarted = metric.Metadata{ Name: "sql.savepoint.started.count", Help: "Number of SQL SAVEPOINT statements started", @@ -949,7 +955,7 @@ var ( // Below are the metadata for the statement executed counters. MetaQueryExecuted = metric.Metadata{ Name: "sql.query.count", - Help: "Number of SQL queries executed", + Help: "Number of SQL operations started including queries, and transaction control statements", Measurement: "SQL Statements", Unit: metric.Unit_COUNT, } @@ -1001,6 +1007,12 @@ var ( Measurement: "SQL Statements", Unit: metric.Unit_COUNT, } + MetaCRUDExecuted = metric.Metadata{ + Name: "sql.crud_query.count", + Help: "Number of SQL SELECT, INSERT, UPDATE, DELETE statements successfully executed", + Measurement: "SQL Statements", + Unit: metric.Unit_COUNT, + } MetaSavepointExecuted = metric.Metadata{ Name: "sql.savepoint.count", Help: "Number of SQL SAVEPOINT statements successfully executed", diff --git a/pkg/sql/executor_statement_metrics.go b/pkg/sql/executor_statement_metrics.go index d2d7d302ea81..5056ec6559be 100644 --- a/pkg/sql/executor_statement_metrics.go +++ b/pkg/sql/executor_statement_metrics.go @@ -208,10 +208,8 @@ func (ex *connExecutor) recordStatementSummary( EndTime: phaseTimes.GetSessionPhaseTime(sessionphase.PlannerStartExecStmt).Add(svcLatRaw), FullScan: fullScan, ExecStats: queryLevelStats, - // TODO(mgartner): Use a slice of struct{uint64, uint64} instead of - // converting to strings. - Indexes: planner.instrumentation.indexesUsed.Strings(), - Database: planner.SessionData().Database, + Indexes: planner.instrumentation.indexesUsed, + Database: planner.SessionData().Database, } stmtFingerprintID, err := diff --git a/pkg/sql/importer/BUILD.bazel b/pkg/sql/importer/BUILD.bazel index 0653253b0609..f3c798eaeb59 100644 --- a/pkg/sql/importer/BUILD.bazel +++ b/pkg/sql/importer/BUILD.bazel @@ -17,7 +17,6 @@ go_library( "import_processor.go", "import_processor_planning.go", "import_table_creation.go", - "import_type_resolver.go", "read_import_avro.go", "read_import_base.go", "read_import_csv.go", @@ -32,6 +31,7 @@ go_library( visibility = ["//visibility:public"], deps = [ "//pkg/base", + "//pkg/ccl/crosscluster", "//pkg/cloud", "//pkg/cloud/cloudprivilege", "//pkg/clusterversion", @@ -93,7 +93,6 @@ go_library( "//pkg/sql/sem/tree", "//pkg/sql/sessiondata", "//pkg/sql/sqlclustersettings", - "//pkg/sql/sqlerrors", "//pkg/sql/sqltelemetry", "//pkg/sql/stats", "//pkg/sql/types", diff --git a/pkg/sql/importer/read_import_base.go b/pkg/sql/importer/read_import_base.go index 2065079cb29f..963323a86d91 100644 --- a/pkg/sql/importer/read_import_base.go +++ b/pkg/sql/importer/read_import_base.go @@ -18,6 +18,7 @@ import ( "sync/atomic" "time" + "github.com/cockroachdb/cockroach/pkg/ccl/crosscluster" "github.com/cockroachdb/cockroach/pkg/cloud" "github.com/cockroachdb/cockroach/pkg/kv" "github.com/cockroachdb/cockroach/pkg/kv/kvpb" @@ -55,7 +56,7 @@ func runImport( // Install type metadata in all of the import tables. spec = protoutil.Clone(spec).(*execinfrapb.ReadImportDataSpec) - importResolver := MakeImportTypeResolver(spec.Types) + importResolver := crosscluster.MakeCrossClusterTypeResolver(spec.Types) for _, table := range spec.Tables { cpy := tabledesc.NewBuilder(table.Desc).BuildCreatedMutableTable() if err := typedesc.HydrateTypesInDescriptor(ctx, cpy, importResolver); err != nil { diff --git a/pkg/sql/instrumentation.go b/pkg/sql/instrumentation.go index 4f8eaca6af36..e6c76d858f1a 100644 --- a/pkg/sql/instrumentation.go +++ b/pkg/sql/instrumentation.go @@ -26,7 +26,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/idxrecommendations" "github.com/cockroachdb/cockroach/pkg/sql/isql" "github.com/cockroachdb/cockroach/pkg/sql/opt/exec" - "github.com/cockroachdb/cockroach/pkg/sql/opt/exec/execbuilder" "github.com/cockroachdb/cockroach/pkg/sql/opt/exec/explain" "github.com/cockroachdb/cockroach/pkg/sql/opt/indexrec" "github.com/cockroachdb/cockroach/pkg/sql/opt/optbuilder" @@ -226,7 +225,7 @@ type instrumentationHelper struct { scanCounts [exec.NumScanCountTypes]int // indexesUsed list the indexes used in the query with format tableID@indexID. - indexesUsed execbuilder.IndexesUsed + indexesUsed []string // schemachangerMode indicates which schema changer mode was used to execute // the query. diff --git a/pkg/sql/logictest/REPOSITORIES.bzl b/pkg/sql/logictest/REPOSITORIES.bzl index 723c82b35307..df19ff800d02 100644 --- a/pkg/sql/logictest/REPOSITORIES.bzl +++ b/pkg/sql/logictest/REPOSITORIES.bzl @@ -7,17 +7,17 @@ CONFIG_DARWIN_AMD64 = "darwin-10.9-amd64" CONFIG_DARWIN_ARM64 = "darwin-11.0-arm64" _CONFIGS = [ - ("24.1.5", [ - (CONFIG_DARWIN_AMD64, "b6aba8395510ac2506c6cb82e2661d6d3476ff7c132016fdc823b165cbea3549"), - (CONFIG_DARWIN_ARM64, "7b2cc8e3a53945d97bc5afd4b7457ff4962633bae9b71945ffd6e2659fa2bf5a"), - (CONFIG_LINUX_AMD64, "731f9ade47b19119136049816edd12167423cb993ee19349fa6ce51157b9fbfc"), - (CONFIG_LINUX_ARM64, "7ed4d67c60f1b54ed522fbdecfb4907904be6e043df6e6596bfb2894e7d82f87"), + ("24.1.6", [ + (CONFIG_DARWIN_AMD64, "0d900af86357f5883ce10935bae7ea00e16ffc2d7875e56491e6d731f4565d9d"), + (CONFIG_DARWIN_ARM64, "985e67e66bc29955f1547f7cc0748db5532ab0c57628bdf1ce3df3c9c1fc072a"), + (CONFIG_LINUX_AMD64, "1120fae532f5e31411d8df06c9dac337b8116f1b167988ec2da675770c65a329"), + (CONFIG_LINUX_ARM64, "9d913a9080bc777645aa8a6c009f717f500856f8b3b740d5bd9e8918ddd0d88a"), ]), - ("24.2.3", [ - (CONFIG_DARWIN_AMD64, "f3d59ed7367c8b4d8420bd1cae9f50a58114d18945ef984805403d44943447d0"), - (CONFIG_DARWIN_ARM64, "5e70e89ef21217a80a532499f5b07618269f1ad1399732d4a55c09a71554f048"), - (CONFIG_LINUX_AMD64, "637d0ada1db52e57f5cbbe19a7defcff0d538d43b771ae8da7ceba326686d64c"), - (CONFIG_LINUX_ARM64, "2892c8d34e89909b871baf9c1b147c827f3b3b78285602aac33789f79fdfa210"), + ("24.2.4", [ + (CONFIG_DARWIN_AMD64, "84f7dc8d5b38acb2bcf61005e1eef658a640ff4da107ef4ea9bf8feda36b3bb3"), + (CONFIG_DARWIN_ARM64, "c65aa4cefe1006cec67305350d69cf5c536a13e0aec06e2b508cae0578bca421"), + (CONFIG_LINUX_AMD64, "fa4a5696f0abd766993d5ded9b6c2cc899701be56afbf6d737baf4841c9e7bc1"), + (CONFIG_LINUX_ARM64, "66291ab21b9e94edf1d2c594ddbdf1ceea1c3270525982cff9be5ac1544c0281"), ]), ] diff --git a/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_schema b/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_schema index f4f6ed67e2af..5df8fb612911 100644 --- a/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_schema +++ b/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_schema @@ -125,10 +125,6 @@ user root statement ok USE d -# root must be a member of testuser to ALTER DEFAULT PRIVILEGES FOR ROLE testuser. -statement ok -GRANT testuser TO root - statement ok ALTER DEFAULT PRIVILEGES FOR ROLE testuser REVOKE ALL ON SCHEMAS FROM testuser, testuser2 diff --git a/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_sequence b/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_sequence index ec5bfb44bfca..78e876b93d37 100644 --- a/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_sequence +++ b/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_sequence @@ -140,9 +140,6 @@ user root statement ok USE d -statement ok -GRANT testuser TO root - statement ok ALTER DEFAULT PRIVILEGES FOR ROLE testuser REVOKE ALL ON SEQUENCES FROM testuser, testuser2 diff --git a/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_table b/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_table index 891c60655c40..1a82f70e6c4a 100644 --- a/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_table +++ b/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_table @@ -185,9 +185,6 @@ use d statement ok GRANT CREATE ON DATABASE d TO testuser -statement ok -GRANT testuser TO root - statement ok ALTER DEFAULT PRIVILEGES FOR ROLE testuser GRANT SELECT ON TABLES to testuser, testuser2 @@ -288,7 +285,7 @@ user testuser2 statement ok USE d -statement error pq: must be a member of root +statement error pq: must be an admin or member of root ALTER DEFAULT PRIVILEGES FOR ROLE root GRANT SELECT ON TABLES TO testuser # Ensure you can ALTER DEFAULT PRIVILEGES for multiple roles. @@ -365,3 +362,22 @@ ALTER DEFAULT PRIVILEGES FOR ROLE public REVOKE SELECT ON TABLES FROM testuser2, # Can specify PUBLIC as a grantee. statement ok ALTER DEFAULT PRIVILEGES REVOKE SELECT ON TABLES FROM public + +# Admins can ALTER DEFAULT PRIVILEGES for any role. +user root + +# Confirm that root is not a member of testuser. We avoid using pg_has_role +# to check, since that has a special case for all admin users. +query TTB +SELECT role, inheriting_member, member_is_explicit +FROM crdb_internal.kv_inherited_role_members +WHERE inheriting_member = 'root' +ORDER BY role +---- +admin root true + +statement ok +ALTER DEFAULT PRIVILEGES FOR ROLE testuser GRANT ALL ON TABLES TO testuser2 + +statement ok +ALTER DEFAULT PRIVILEGES FOR ROLE testuser REVOKE ALL ON TABLES FROM testuser2 diff --git a/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_type b/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_type index 3f7dc6982703..dd3062f0ee9f 100644 --- a/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_type +++ b/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_for_type @@ -115,9 +115,6 @@ user root statement ok USE d -statement ok -GRANT testuser TO root - statement ok ALTER DEFAULT PRIVILEGES FOR ROLE testuser REVOKE ALL ON TYPES FROM testuser, testuser2 diff --git a/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_in_schema b/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_in_schema index 4bfbdd1038b9..22d74fd6a92a 100644 --- a/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_in_schema +++ b/pkg/sql/logictest/testdata/logic_test/alter_default_privileges_in_schema @@ -12,9 +12,6 @@ CREATE USER testuser2 statement ok GRANT CREATE ON DATABASE test TO testuser -statement ok -GRANT testuser TO root - user testuser # Test on public schema. diff --git a/pkg/sql/logictest/testdata/logic_test/crdb_internal_catalog b/pkg/sql/logictest/testdata/logic_test/crdb_internal_catalog index ab1c65f9b63e..5cdf428a8692 100644 --- a/pkg/sql/logictest/testdata/logic_test/crdb_internal_catalog +++ b/pkg/sql/logictest/testdata/logic_test/crdb_internal_catalog @@ -105,7 +105,7 @@ skipif config local-mixed-24.2 query IT SELECT id, strip_volatile(descriptor) FROM crdb_internal.kv_catalog_descriptor ORDER BY id ---- -1 {"database": {"id": 1, "name": "system", "privileges": {"ownerProto": "node", "users": [{"privileges": "2048", "userProto": "admin", "withGrantOption": "2048"}, {"privileges": "2048", "userProto": "root", "withGrantOption": "2048"}], "version": 3}, "systemDatabaseSchemaVersion": {"internal": 22, "majorVal": 1000024, "minorVal": 2}, "version": "1"}} +1 {"database": {"id": 1, "name": "system", "privileges": {"ownerProto": "node", "users": [{"privileges": "2048", "userProto": "admin", "withGrantOption": "2048"}, {"privileges": "2048", "userProto": "root", "withGrantOption": "2048"}], "version": 3}, "systemDatabaseSchemaVersion": {"internal": 22, "majorVal": 24, "minorVal": 2}, "version": "1"}} 3 {"table": {"columns": [{"id": 1, "name": "id", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 2, "name": "descriptor", "nullable": true, "type": {"family": "BytesFamily", "oid": 17}}], "formatVersion": 3, "id": 3, "name": "descriptor", "nextColumnId": 3, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "parentId": 1, "primaryIndex": {"constraintId": 1, "encodingType": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "keyColumnDirections": ["ASC"], "keyColumnIds": [1], "keyColumnNames": ["id"], "name": "primary", "partitioning": {}, "sharded": {}, "storeColumnIds": [2], "storeColumnNames": ["descriptor"], "unique": true, "version": 4}, "privileges": {"ownerProto": "node", "users": [{"privileges": "32", "userProto": "admin", "withGrantOption": "32"}, {"privileges": "32", "userProto": "root", "withGrantOption": "32"}], "version": 3}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 29, "version": "1"}} 4 {"table": {"columns": [{"id": 1, "name": "username", "type": {"family": "StringFamily", "oid": 25}}, {"id": 2, "name": "hashedPassword", "nullable": true, "type": {"family": "BytesFamily", "oid": 17}}, {"defaultExpr": "false", "id": 3, "name": "isRole", "type": {"oid": 16}}, {"id": 4, "name": "user_id", "type": {"family": "OidFamily", "oid": 26}}], "formatVersion": 3, "id": 4, "indexes": [{"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 2, "interleave": {}, "keyColumnDirections": ["ASC"], "keyColumnIds": [4], "keyColumnNames": ["user_id"], "keySuffixColumnIds": [1], "name": "users_user_id_idx", "partitioning": {}, "sharded": {}, "unique": true, "version": 3}], "name": "users", "nextColumnId": 5, "nextConstraintId": 3, "nextIndexId": 3, "nextMutationId": 1, "parentId": 1, "primaryIndex": {"constraintId": 2, "encodingType": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "keyColumnDirections": ["ASC"], "keyColumnIds": [1], "keyColumnNames": ["username"], "name": "primary", "partitioning": {}, "sharded": {}, "storeColumnIds": [2, 3, 4], "storeColumnNames": ["hashedPassword", "isRole", "user_id"], "unique": true, "version": 4}, "privileges": {"ownerProto": "node", "users": [{"privileges": "480", "userProto": "admin", "withGrantOption": "480"}, {"privileges": "480", "userProto": "root", "withGrantOption": "480"}], "version": 3}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 29, "version": "2"}} 5 {"table": {"columns": [{"id": 1, "name": "id", "type": {"family": "IntFamily", "oid": 20, "width": 64}}, {"id": 2, "name": "config", "nullable": true, "type": {"family": "BytesFamily", "oid": 17}}], "formatVersion": 3, "id": 5, "name": "zones", "nextColumnId": 3, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "parentId": 1, "primaryIndex": {"constraintId": 1, "encodingType": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "keyColumnDirections": ["ASC"], "keyColumnIds": [1], "keyColumnNames": ["id"], "name": "primary", "partitioning": {}, "sharded": {}, "storeColumnIds": [2], "storeColumnNames": ["config"], "unique": true, "version": 4}, "privileges": {"ownerProto": "node", "users": [{"privileges": "480", "userProto": "admin", "withGrantOption": "480"}, {"privileges": "480", "userProto": "root", "withGrantOption": "480"}], "version": 3}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 29, "version": "1"}} diff --git a/pkg/sql/logictest/testdata/logic_test/reassign_owned_by b/pkg/sql/logictest/testdata/logic_test/reassign_owned_by index 0646f2101caf..4e8756a81530 100644 --- a/pkg/sql/logictest/testdata/logic_test/reassign_owned_by +++ b/pkg/sql/logictest/testdata/logic_test/reassign_owned_by @@ -243,8 +243,19 @@ user root statement ok REVOKE CREATE ON DATABASE test FROM testuser, testuser2; DROP ROLE testuser; + +# Ownership of the public schema was transferred to testuser2. + +statement error role testuser2 cannot be dropped because some objects depend on it\nowner of schema test.public +DROP ROLE testuser2 + +statement ok +REASSIGN OWNED BY testuser2 TO root + +statement ok DROP ROLE testuser2 + # ------------------------------------------------------------------------------ # Make sure only objects in the current database are reassigned diff --git a/pkg/sql/logictest/testdata/logic_test/show_default_privileges b/pkg/sql/logictest/testdata/logic_test/show_default_privileges index 2b51a2d9072e..d65c2e22001b 100644 --- a/pkg/sql/logictest/testdata/logic_test/show_default_privileges +++ b/pkg/sql/logictest/testdata/logic_test/show_default_privileges @@ -226,7 +226,6 @@ use test2; CREATE USER testuser2; statement ok -GRANT testuser TO root; ALTER DEFAULT PRIVILEGES FOR ROLE testuser GRANT DROP, ZONECONFIG ON TABLES TO foo WITH GRANT OPTION; query TBTTTB colnames,rowsort diff --git a/pkg/sql/opt/exec/execbuilder/BUILD.bazel b/pkg/sql/opt/exec/execbuilder/BUILD.bazel index b06cb525c606..ad8b1921d6ed 100644 --- a/pkg/sql/opt/exec/execbuilder/BUILD.bazel +++ b/pkg/sql/opt/exec/execbuilder/BUILD.bazel @@ -47,6 +47,7 @@ go_library( "//pkg/sql/sqlerrors", "//pkg/sql/sqltelemetry", "//pkg/sql/types", + "//pkg/util", "//pkg/util/buildutil", "//pkg/util/encoding", "//pkg/util/errorutil", diff --git a/pkg/sql/opt/exec/execbuilder/builder.go b/pkg/sql/opt/exec/execbuilder/builder.go index c529e9ea3f81..34830973e177 100644 --- a/pkg/sql/opt/exec/execbuilder/builder.go +++ b/pkg/sql/opt/exec/execbuilder/builder.go @@ -7,8 +7,6 @@ package execbuilder import ( "context" - "slices" - "strconv" "time" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" @@ -170,41 +168,7 @@ type Builder struct { IsANSIDML bool // IndexesUsed list the indexes used in query with the format tableID@indexID. - IndexesUsed -} - -// IndexesUsed is a list of indexes used in a query. -type IndexesUsed struct { - indexes []struct { - tableID cat.StableID - indexID cat.StableID - } -} - -// add adds the given index to the list, if it is not already present. -func (iu *IndexesUsed) add(tableID, indexID cat.StableID) { - s := struct { - tableID cat.StableID - indexID cat.StableID - }{tableID, indexID} - if !slices.Contains(iu.indexes, s) { - iu.indexes = append(iu.indexes, s) - } -} - -// Strings returns a slice of strings with the format tableID@indexID for each -// index in the list. -// -// TODO(mgartner): Use a slice of struct{uint64, uint64} instead of converting -// to strings. -func (iu *IndexesUsed) Strings() []string { - res := make([]string, len(iu.indexes)) - const base = 10 - for i, u := range iu.indexes { - res[i] = strconv.FormatUint(uint64(u.tableID), base) + "@" + - strconv.FormatUint(uint64(u.indexID), base) - } - return res + IndexesUsed []string } // New constructs an instance of the execution node builder using the diff --git a/pkg/sql/opt/exec/execbuilder/relational.go b/pkg/sql/opt/exec/execbuilder/relational.go index 8a70b344376e..ecdc5b305c1a 100644 --- a/pkg/sql/opt/exec/execbuilder/relational.go +++ b/pkg/sql/opt/exec/execbuilder/relational.go @@ -38,6 +38,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sqlerrors" "github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry" "github.com/cockroachdb/cockroach/pkg/sql/types" + "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/cockroach/pkg/util/buildutil" "github.com/cockroachdb/cockroach/pkg/util/encoding" "github.com/cockroachdb/cockroach/pkg/util/errorutil" @@ -755,7 +756,7 @@ func (b *Builder) buildScan(scan *memo.ScanExpr) (_ execPlan, outputCols colOrdM return execPlan{}, colOrdMap{}, errors.AssertionFailedf("expected inverted index scan to have a constraint") } - b.IndexesUsed.add(tab.ID(), idx.ID()) + b.IndexesUsed = util.CombineUnique(b.IndexesUsed, []string{fmt.Sprintf("%d@%d", tab.ID(), idx.ID())}) // Save if we planned a full (large) table/index scan on the builder so that // the planner can be made aware later. We only do this for non-virtual @@ -2296,7 +2297,7 @@ func (b *Builder) buildIndexJoin( // TODO(radu): the distsql implementation of index join assumes that the input // starts with the PK columns in order (#40749). pri := tab.Index(cat.PrimaryIndex) - b.IndexesUsed.add(tab.ID(), pri.ID()) + b.IndexesUsed = util.CombineUnique(b.IndexesUsed, []string{fmt.Sprintf("%d@%d", tab.ID(), pri.ID())}) keyCols := make([]exec.NodeColumnOrdinal, pri.KeyColumnCount()) for i := range keyCols { keyCols[i], err = getNodeColumnOrdinal(inputCols, join.Table.ColumnID(pri.Column(i).Ordinal())) @@ -2674,7 +2675,7 @@ func (b *Builder) buildLookupJoin( tab := md.Table(join.Table) idx := tab.Index(join.Index) - b.IndexesUsed.add(tab.ID(), idx.ID()) + b.IndexesUsed = util.CombineUnique(b.IndexesUsed, []string{fmt.Sprintf("%d@%d", tab.ID(), idx.ID())}) locking, err := b.buildLocking(join.Table, join.Locking) if err != nil { @@ -2854,7 +2855,7 @@ func (b *Builder) buildInvertedJoin( md := b.mem.Metadata() tab := md.Table(join.Table) idx := tab.Index(join.Index) - b.IndexesUsed.add(tab.ID(), idx.ID()) + b.IndexesUsed = util.CombineUnique(b.IndexesUsed, []string{fmt.Sprintf("%d@%d", tab.ID(), idx.ID())}) prefixEqCols := make([]exec.NodeColumnOrdinal, len(join.PrefixKeyCols)) for i, c := range join.PrefixKeyCols { @@ -2996,8 +2997,10 @@ func (b *Builder) buildZigzagJoin( rightTable := md.Table(join.RightTable) leftIndex := leftTable.Index(join.LeftIndex) rightIndex := rightTable.Index(join.RightIndex) - b.IndexesUsed.add(leftTable.ID(), leftIndex.ID()) - b.IndexesUsed.add(rightTable.ID(), rightIndex.ID()) + b.IndexesUsed = util.CombineUnique(b.IndexesUsed, + []string{fmt.Sprintf("%d@%d", leftTable.ID(), leftIndex.ID())}) + b.IndexesUsed = util.CombineUnique(b.IndexesUsed, + []string{fmt.Sprintf("%d@%d", rightTable.ID(), rightIndex.ID())}) leftEqCols := make([]exec.TableColumnOrdinal, len(join.LeftEqCols)) rightEqCols := make([]exec.TableColumnOrdinal, len(join.RightEqCols)) diff --git a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index index 755a2b10e0bd..88e158ccdbfb 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/inverted_index +++ b/pkg/sql/opt/exec/execbuilder/testdata/inverted_index @@ -2681,55 +2681,58 @@ ALTER TABLE d INJECT STATISTICS '[ } ]'; -# Filter with a fully-specified array. This should use a minimal inverted index -# scan. +# Filter with a fully-specified array. This should use a zigzag join. query T EXPLAIN SELECT a FROM d WHERE b @> '[1, 2]' ORDER BY a ---- distribution: local vectorized: true · -• filter +• sort │ estimated row count: 1,247 -│ filter: b @> '[1, 2]' +│ order: +a │ -└── • index join - │ estimated row count: 1,020 +└── • lookup join + │ estimated row count: 1,247 │ table: d@d_pkey + │ equality: (a) = (a) + │ equality cols are key │ - └── • sort - │ estimated row count: 1,020 - │ order: +a - │ - └── • scan - estimated row count: 1,020 (1.0% of the table; stats collected ago) - table: d@foo_inv - spans: 1 span + └── • zigzag join + estimated row count: 1,247 + left table: d@foo_inv + left columns: (a, b_inverted_key) + left fixed values: 1 column + right table: d@foo_inv + right columns: (a, b_inverted_key) + right fixed values: 1 column # Combine predicates with AND. Should have the same output as b @> '[1, 2]'. -# This should use a minimal inverted index scan. +# This should use a zigzag join. query T EXPLAIN SELECT a FROM d WHERE b @> '[1]' AND b @> '[2]' ORDER BY a ---- distribution: local vectorized: true · -• filter +• sort │ estimated row count: 1,247 -│ filter: (b @> '[1]') AND (b @> '[2]') +│ order: +a │ -└── • index join - │ estimated row count: 1,020 +└── • lookup join + │ estimated row count: 1,247 │ table: d@d_pkey + │ equality: (a) = (a) + │ equality cols are key │ - └── • sort - │ estimated row count: 1,020 - │ order: +a - │ - └── • scan - estimated row count: 1,020 (1.0% of the table; stats collected ago) - table: d@foo_inv - spans: 1 span + └── • zigzag join + estimated row count: 1,247 + left table: d@foo_inv + left columns: (a, b_inverted_key) + left fixed values: 1 column + right table: d@foo_inv + right columns: (a, b_inverted_key) + right fixed values: 1 column # Filter with a nested array. This index expression is not tight. # This should use a zigzag join. diff --git a/pkg/sql/opt/memo/testdata/stats/inverted-array b/pkg/sql/opt/memo/testdata/stats/inverted-array index 86d81db43fb9..680a6dcbec4f 100644 --- a/pkg/sql/opt/memo/testdata/stats/inverted-array +++ b/pkg/sql/opt/memo/testdata/stats/inverted-array @@ -6,10 +6,10 @@ CREATE TABLE t ( ) ---- -# Histogram boundaries are for arrays with values 1, 2, and 3, including some -# empty arrays. The row_count is lower than the sum of the histogram buckets -# num_eq's because some rows can have multiple inverted index entries, for -# example `{1, 2}`. There are: +# Histogram boundaries are for JSON values `{}`, `{1}`, `{2}`, `{3}`. The +# row_count is lower than the sum of the histogram buckets num_eq's because some +# rows can have multiple inverted index entries, for example `{1, 2}`. There +# are: # # - 1000 rows total # - 10 empty arrays diff --git a/pkg/sql/opt/memo/testdata/stats/inverted-json b/pkg/sql/opt/memo/testdata/stats/inverted-json index 257ea6dda3f0..758827c11ad5 100644 --- a/pkg/sql/opt/memo/testdata/stats/inverted-json +++ b/pkg/sql/opt/memo/testdata/stats/inverted-json @@ -747,13 +747,28 @@ select │ ├── stats: [rows=4e-07] │ ├── key: (1) │ ├── fd: (1)-->(2) - │ └── scan t@j_idx,inverted + │ └── inverted-filter │ ├── columns: k:1(int!null) - │ ├── inverted constraint: /5/1 - │ │ └── spans: ["a"/"b"/"c", "a"/"b"/"c"] - │ ├── stats: [rows=4e-07, distinct(5)=4e-07, null(5)=0] - │ │ histogram(5)= - │ └── key: (1) + │ ├── inverted expression: /5 + │ │ ├── tight: false, unique: true + │ │ ├── union spans: empty + │ │ └── INTERSECTION + │ │ ├── span expression + │ │ │ ├── tight: true, unique: true + │ │ │ └── union spans: ["a"/"b"/"c", "a"/"b"/"c"] + │ │ └── span expression + │ │ ├── tight: true, unique: true + │ │ └── union spans: ["a"/"d"/"e", "a"/"d"/"e"] + │ ├── stats: [rows=4e-07] + │ ├── key: (1) + │ └── scan t@j_idx,inverted + │ ├── columns: k:1(int!null) j_inverted_key:5(encodedkey!null) + │ ├── inverted constraint: /5/1 + │ │ └── spans + │ │ ├── ["a"/"b"/"c", "a"/"b"/"c"] + │ │ └── ["a"/"d"/"e", "a"/"d"/"e"] + │ └── stats: [rows=4e-07, distinct(1)=4e-07, null(1)=0, distinct(5)=4e-07, null(5)=0] + │ histogram(5)= └── filters └── (j:2->'a') = '{"b": "c", "d": "e"}' [type=bool, outer=(2), immutable] @@ -773,13 +788,44 @@ select │ ├── stats: [rows=4e-07] │ ├── key: (1) │ ├── fd: (1)-->(2) - │ └── scan t@j_idx,inverted + │ └── inverted-filter │ ├── columns: k:1(int!null) - │ ├── inverted constraint: /5/1 - │ │ └── spans: ["a"/Arr/"b", "a"/Arr/"b"] - │ ├── stats: [rows=4e-07, distinct(5)=4e-07, null(5)=0] - │ │ histogram(5)= - │ └── key: (1) + │ ├── inverted expression: /5 + │ │ ├── tight: false, unique: true + │ │ ├── union spans: empty + │ │ └── INTERSECTION + │ │ ├── span expression + │ │ │ ├── tight: true, unique: true + │ │ │ ├── union spans: empty + │ │ │ └── INTERSECTION + │ │ │ ├── span expression + │ │ │ │ ├── tight: true, unique: true + │ │ │ │ ├── union spans: empty + │ │ │ │ └── INTERSECTION + │ │ │ │ ├── span expression + │ │ │ │ │ ├── tight: true, unique: true + │ │ │ │ │ └── union spans: ["a"/Arr/"b", "a"/Arr/"b"] + │ │ │ │ └── span expression + │ │ │ │ ├── tight: true, unique: true + │ │ │ │ └── union spans: ["a"/Arr/"c", "a"/Arr/"c"] + │ │ │ └── span expression + │ │ │ ├── tight: true, unique: true + │ │ │ └── union spans: ["a"/Arr/"d", "a"/Arr/"d"] + │ │ └── span expression + │ │ ├── tight: true, unique: true + │ │ └── union spans: ["a"/Arr/"e", "a"/Arr/"e"] + │ ├── stats: [rows=4e-07] + │ ├── key: (1) + │ └── scan t@j_idx,inverted + │ ├── columns: k:1(int!null) j_inverted_key:5(encodedkey!null) + │ ├── inverted constraint: /5/1 + │ │ └── spans + │ │ ├── ["a"/Arr/"b", "a"/Arr/"b"] + │ │ ├── ["a"/Arr/"c", "a"/Arr/"c"] + │ │ ├── ["a"/Arr/"d", "a"/Arr/"d"] + │ │ └── ["a"/Arr/"e", "a"/Arr/"e"] + │ └── stats: [rows=4e-07, distinct(1)=4e-07, null(1)=0, distinct(5)=4e-07, null(5)=0] + │ histogram(5)= └── filters └── (j:2->'a') = '["b", "c", "d", "e"]' [type=bool, outer=(2), immutable] @@ -800,13 +846,36 @@ select │ ├── stats: [rows=4e-07] │ ├── key: (1) │ ├── fd: (1)-->(2) - │ └── scan t@j_idx,inverted + │ └── inverted-filter │ ├── columns: k:1(int!null) - │ ├── inverted constraint: /5/1 - │ │ └── spans: ["a"/"b"/Arr/"c", "a"/"b"/Arr/"c"] - │ ├── stats: [rows=4e-07, distinct(5)=4e-07, null(5)=0] - │ │ histogram(5)= - │ └── key: (1) + │ ├── inverted expression: /5 + │ │ ├── tight: false, unique: true + │ │ ├── union spans: empty + │ │ └── INTERSECTION + │ │ ├── span expression + │ │ │ ├── tight: true, unique: true + │ │ │ ├── union spans: empty + │ │ │ └── INTERSECTION + │ │ │ ├── span expression + │ │ │ │ ├── tight: true, unique: true + │ │ │ │ └── union spans: ["a"/"b"/Arr/"c", "a"/"b"/Arr/"c"] + │ │ │ └── span expression + │ │ │ ├── tight: true, unique: true + │ │ │ └── union spans: ["a"/"b"/Arr/"d", "a"/"b"/Arr/"d"] + │ │ └── span expression + │ │ ├── tight: true, unique: true + │ │ └── union spans: ["a"/"b"/Arr/"e", "a"/"b"/Arr/"e"] + │ ├── stats: [rows=4e-07] + │ ├── key: (1) + │ └── scan t@j_idx,inverted + │ ├── columns: k:1(int!null) j_inverted_key:5(encodedkey!null) + │ ├── inverted constraint: /5/1 + │ │ └── spans + │ │ ├── ["a"/"b"/Arr/"c", "a"/"b"/Arr/"c"] + │ │ ├── ["a"/"b"/Arr/"d", "a"/"b"/Arr/"d"] + │ │ └── ["a"/"b"/Arr/"e", "a"/"b"/Arr/"e"] + │ └── stats: [rows=4e-07, distinct(1)=4e-07, null(1)=0, distinct(5)=4e-07, null(5)=0] + │ histogram(5)= └── filters └── (j:2->'a') = '{"b": ["c", "d", "e"]}' [type=bool, outer=(2), immutable] @@ -1088,13 +1157,28 @@ select │ ├── stats: [rows=4e-07] │ ├── key: (1) │ ├── fd: (1)-->(2) - │ └── scan t@j_idx,inverted + │ └── inverted-filter │ ├── columns: k:1(int!null) - │ ├── inverted constraint: /5/1 - │ │ └── spans: ["a"/Arr/1, "a"/Arr/1] - │ ├── stats: [rows=4e-07, distinct(5)=4e-07, null(5)=0] - │ │ histogram(5)= - │ └── key: (1) + │ ├── inverted expression: /5 + │ │ ├── tight: false, unique: true + │ │ ├── union spans: empty + │ │ └── INTERSECTION + │ │ ├── span expression + │ │ │ ├── tight: true, unique: true + │ │ │ └── union spans: ["a"/Arr/1, "a"/Arr/1] + │ │ └── span expression + │ │ ├── tight: true, unique: true + │ │ └── union spans: ["a"/Arr/2, "a"/Arr/2] + │ ├── stats: [rows=4e-07] + │ ├── key: (1) + │ └── scan t@j_idx,inverted + │ ├── columns: k:1(int!null) j_inverted_key:5(encodedkey!null) + │ ├── inverted constraint: /5/1 + │ │ └── spans + │ │ ├── ["a"/Arr/1, "a"/Arr/1] + │ │ └── ["a"/Arr/2, "a"/Arr/2] + │ └── stats: [rows=4e-07, distinct(1)=4e-07, null(1)=0, distinct(5)=4e-07, null(5)=0] + │ histogram(5)= └── filters └── (j:2->'a') @> '[1, 2]' [type=bool, outer=(2), immutable] diff --git a/pkg/sql/opt/props/histogram.go b/pkg/sql/opt/props/histogram.go index 2f015223ad69..0af4302b0557 100644 --- a/pkg/sql/opt/props/histogram.go +++ b/pkg/sql/opt/props/histogram.go @@ -121,66 +121,6 @@ func (h *Histogram) ValuesCount() float64 { return count } -// EqEstimate returns the estimated number of rows that equal the given -// datum. If the datum is equal to a bucket's upperbound, it returns the -// bucket's NumEq. If the datum falls in the range of a bucket's upper and lower -// bounds, it returns the bucket's NumRange divided by the bucket's -// DistinctRange. Otherwise, if the datum does not fall into any bucket in the -// histogram or any comparison between the datum and a bucket's upperbound -// results in an error, then it returns the total number of values in the -// histogram divided by the total number of distinct values. -func (h *Histogram) EqEstimate(ctx context.Context, d tree.Datum) float64 { - // Find the bucket belonging to the datum. It is the first bucket where the - // datum is less than or equal to the upperbound. - bucketIdx := binarySearch(len(h.buckets), func(i int) (bool, error) { - cmp, err := d.Compare(ctx, h.evalCtx, h.upperBound(i)) - return cmp <= 0, err - }) - if bucketIdx < len(h.buckets) { - if cmp, err := d.Compare(ctx, h.evalCtx, h.upperBound(bucketIdx)); err == nil { - if cmp == 0 { - return h.numEq(bucketIdx) - } - if bucketIdx != 0 { - if h.distinctRange(bucketIdx) == 0 { - // Avoid dividing by zero. - return 0 - } - return h.numRange(bucketIdx) / h.distinctRange(bucketIdx) - } - // The value d is less than the upper bound of the first bucket, so - // it is outside the bounds of the histogram. Fallback to the total - // number of values divided by the total number of distinct values. - } - } - totalDistinct := h.DistinctValuesCount() - if totalDistinct == 0 { - // Avoid dividing by zero. - return 0 - } - return h.ValuesCount() / h.DistinctValuesCount() -} - -// binarySearch extends sort.Search to allow the search function to return an -// error. It returns the smallest index i in [0, n) at which f(i) is true, -// assuming that on the range [0, n), f(i) == true implies f(i+1) == true. If -// there is no such index, or if f returns an error for any invocation, it -// returns n. -func binarySearch(n int, f func(int) (bool, error)) (idx int) { - defer func() { - if r := recover(); r != nil { - idx = n - } - }() - return sort.Search(n, func(i int) bool { - res, err := f(i) - if err != nil { - panic(err) - } - return res - }) -} - // DistinctValuesCount returns the estimated number of distinct values in the // histogram. func (h *Histogram) DistinctValuesCount() float64 { diff --git a/pkg/sql/opt/props/histogram_test.go b/pkg/sql/opt/props/histogram_test.go index 7b29d7a082ae..f5ec9a58675f 100644 --- a/pkg/sql/opt/props/histogram_test.go +++ b/pkg/sql/opt/props/histogram_test.go @@ -24,55 +24,6 @@ import ( "github.com/cockroachdb/errors" ) -func TestEqEstimate(t *testing.T) { - ctx := context.Background() - evalCtx := eval.MakeTestingEvalContext(cluster.MakeTestingClusterSettings()) - - emptyHist := &Histogram{} - emptyHist.Init(&evalCtx, opt.ColumnID(1), []cat.HistogramBucket{}) - - if eq := emptyHist.EqEstimate(ctx, tree.NewDInt(0)); eq != 0 { - t.Errorf("expected %f but found %f", 0.0, eq) - } - - // 0 1 3 3 4 5 0 0 40 35 - // <--- 1 --- 10 --- 25 --- 30 ---- 42 - histData := []cat.HistogramBucket{ - {NumRange: 0, DistinctRange: 0, NumEq: 1, UpperBound: tree.NewDInt(1)}, - {NumRange: 3, DistinctRange: 2, NumEq: 3, UpperBound: tree.NewDInt(10)}, - {NumRange: 4, DistinctRange: 2, NumEq: 5, UpperBound: tree.NewDInt(25)}, - {NumRange: 0, DistinctRange: 0, NumEq: 0, UpperBound: tree.NewDInt(30)}, - {NumRange: 40, DistinctRange: 7, NumEq: 35, UpperBound: tree.NewDInt(42)}, - } - h := &Histogram{} - h.Init(&evalCtx, opt.ColumnID(1), histData) - - testData := []struct { - datum tree.Datum - expected float64 - }{ - {tree.NewDInt(1), 1}, - {tree.NewDInt(9), 3.0 / 2}, - {tree.NewDInt(10), 3}, - {tree.NewDInt(11), 4.0 / 2}, - {tree.NewDInt(25), 5}, - {tree.NewDInt(28), 0}, - {tree.NewDInt(30), 0}, - {tree.NewDInt(35), 40.0 / 7}, - {tree.NewDInt(42), 35}, - // Use an all-bucket average for values outside the bounds of the - // histogram. - {tree.NewDInt(0), h.ValuesCount() / h.DistinctValuesCount()}, - {tree.NewDInt(43), h.ValuesCount() / h.DistinctValuesCount()}, - } - - for i, tc := range testData { - if eq := h.EqEstimate(ctx, tc.datum); eq != tc.expected { - t.Errorf("testcase %d: expected %f but found %f", i, tc.expected, eq) - } - } -} - func TestCanFilter(t *testing.T) { ctx := context.Background() evalCtx := eval.MakeTestingEvalContext(cluster.MakeTestingClusterSettings()) diff --git a/pkg/sql/opt/xform/rules/select.opt b/pkg/sql/opt/xform/rules/select.opt index 87f3e8a2cb53..03e44ded434e 100644 --- a/pkg/sql/opt/xform/rules/select.opt +++ b/pkg/sql/opt/xform/rules/select.opt @@ -53,22 +53,6 @@ => (GenerateInvertedIndexScans $scanPrivate $filters) -# GenerateMinimalInvertedIndexScans is similar to GenerateInvertedIndexScans. It -# differs by trying to generate an inverted index scan that spans the fewest -# index keys, rather than generating scans that span all index keys in the -# expression and performing set operations on them before an index-join. -[GenerateMinimalInvertedIndexScans, Explore] -(Select - $input:(Scan - $scanPrivate:* & - (IsCanonicalScan $scanPrivate) & - (HasInvertedIndexes $scanPrivate) - ) - $filters:* -) -=> -(GenerateMinimalInvertedIndexScans $input $scanPrivate $filters) - # GenerateTrigramSimilarityInvertedIndexScans generates scans on inverted # trigram indexes that are constrained by similarity filters (e.g., # `s & % 'foo'`). It is similar conceptually to GenerateInvertedIndexScans, but diff --git a/pkg/sql/opt/xform/select_funcs.go b/pkg/sql/opt/xform/select_funcs.go index 4002df08b56e..0591c45fab88 100644 --- a/pkg/sql/opt/xform/select_funcs.go +++ b/pkg/sql/opt/xform/select_funcs.go @@ -6,7 +6,6 @@ package xform import ( - "context" "sort" "github.com/cockroachdb/cockroach/pkg/sql/inverted" @@ -16,7 +15,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/opt/invertedidx" "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" "github.com/cockroachdb/cockroach/pkg/sql/opt/partition" - "github.com/cockroachdb/cockroach/pkg/sql/opt/props" "github.com/cockroachdb/cockroach/pkg/sql/opt/props/physical" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/types" @@ -841,47 +839,6 @@ func (c *CustomFuncs) GenerateInvertedIndexScans( scanPrivate *memo.ScanPrivate, filters memo.FiltersExpr, ) { - c.generateInvertedIndexScansImpl( - grp, - nil, /* input */ - scanPrivate, - filters, - false, /* minimizeSpans */ - ) -} - -// GenerateMinimalInvertedIndexScans is similar to GenerateInvertedIndexScans. -// It differs by trying to generate an inverted index scan that spans the fewest -// index keys, rather than generating scans that span all index keys in the -// expression and performing set operations on them before an index-join. It -// currently only works on JSON and array inverted indexes. -// -// TODO(mgartner): It may be simpler to implement these scans with constraints -// rather than inverted spans. It may also allow more fine-grained control over -// the remaining filters applied after the scan. -func (c *CustomFuncs) GenerateMinimalInvertedIndexScans( - grp memo.RelExpr, - required *physical.Required, - input memo.RelExpr, - scanPrivate *memo.ScanPrivate, - filters memo.FiltersExpr, -) { - c.generateInvertedIndexScansImpl(grp, input, scanPrivate, filters, true /* minimizeSpans */) -} - -// generateInvertedIndexScansImpl is the implementation of -// GenerateInvertedIndexScans and GenerateMinimalInvertedIndexScans. -func (c *CustomFuncs) generateInvertedIndexScansImpl( - grp memo.RelExpr, - input memo.RelExpr, - scanPrivate *memo.ScanPrivate, - filters memo.FiltersExpr, - minimizeSpans bool, -) { - if input == nil && minimizeSpans { - panic(errors.AssertionFailedf("expected non-nil input required to reduce spans")) - } - var pkCols opt.ColSet var sb indexScanBuilder sb.Init(c, scanPrivate.Table) @@ -897,17 +854,8 @@ func (c *CustomFuncs) generateInvertedIndexScansImpl( var iter scanIndexIter iter.Init(c.e.evalCtx, c.e, c.e.mem, &c.im, scanPrivate, filters, rejectNonInvertedIndexes) iter.ForEach(func(index cat.Index, filters memo.FiltersExpr, indexCols opt.ColSet, _ bool, _ memo.ProjectionsExpr) { - invColID := scanPrivate.Table.ColumnID(index.InvertedColumn().InvertedSourceColumnOrdinal()) - invColTypeFamily := c.e.f.Metadata().ColumnMeta(invColID).Type.Family() - jsonOrArray := invColTypeFamily == types.JsonFamily || invColTypeFamily == types.ArrayFamily - - // Only attempt to reduce spans for JSON and array inverted indexes. - if minimizeSpans && !jsonOrArray { - return - } - // Check whether the filter can constrain the index. - spanExpr, con, remainingFilters, pfState, ok := invertedidx.TryFilterInvertedIndex( + spanExpr, constraint, remainingFilters, pfState, ok := invertedidx.TryFilterInvertedIndex( c.e.ctx, c.e.evalCtx, c.e.f, filters, optionalFilters, scanPrivate.Table, index, tabMeta.ComputedCols, c.checkCancellation, ) @@ -916,18 +864,6 @@ func (c *CustomFuncs) generateInvertedIndexScansImpl( // generated. return } - if minimizeSpans { - newSpanExpr, ok := reduceInvertedSpans(c.e.ctx, input, scanPrivate.Table, index, spanExpr) - if !ok { - // The span expression could not be reduced, so skip this index. - // An inverted index scan may still be generated for it when - // minimizeSpans=false. - return - } - spanExpr = newSpanExpr - // If the span was reduced, the original filters must be applied. - remainingFilters = filters - } spansToRead := spanExpr.SpansToRead // Override the filters with remainingFilters. If the index is a // multi-column inverted index, the non-inverted prefix columns are @@ -953,7 +889,7 @@ func (c *CustomFuncs) generateInvertedIndexScansImpl( newScanPrivate := *scanPrivate newScanPrivate.Distribution.Regions = nil newScanPrivate.Index = index.Ordinal() - newScanPrivate.SetConstraint(c.e.ctx, c.e.evalCtx, con) + newScanPrivate.SetConstraint(c.e.ctx, c.e.evalCtx, constraint) newScanPrivate.InvertedConstraint = spansToRead if scanPrivate.Flags.NoIndexJoin { @@ -999,86 +935,6 @@ func (c *CustomFuncs) generateInvertedIndexScansImpl( }) } -// reduceInvertedSpans attempts to reduce the spans-to-scan in the given span -// expression by finding the lowest cardinality, conjunctive span. If the given -// span expression cannot be reduced, ok=false is returned. -func reduceInvertedSpans( - ctx context.Context, - grp memo.RelExpr, - tabID opt.TableID, - index cat.Index, - spanExpr *inverted.SpanExpression, -) (newSpan *inverted.SpanExpression, ok bool) { - // Span expressions that are not unions or intersections cannot be reduced. - if spanExpr.Operator == inverted.None { - return nil, false - } - - colID := tabID.ColumnID(index.InvertedColumn().Ordinal()) - colStat, ok := grp.Memo().RequestColStat(grp, opt.MakeColSet(colID)) - if !ok || colStat.Histogram == nil { - // Only attempt to reduce spans if we have histogram statistics. - // TODO(mgartner): We could blindly reduce the spans without a - // histogram, which will probably be better than doing nothing. - return nil, false - } - histogram := colStat.Histogram - - var lowestCardinality float64 - var findLowestCardinalitySpan func(span *inverted.SpanExpression) - findLowestCardinalitySpan = func(span *inverted.SpanExpression) { - switch span.Operator { - case inverted.SetIntersection: - // Recurse into each side looking for the lowest cardinality span. - if len(span.FactoredUnionSpans) > 0 { - // Check that FactoredUnionSpans is empty. A span expression - // with non-empty FactoredUnionSpans is equivalent to a UNION - // between the FactoredUnionSpans and the intersected children, - // so we can't reduce the span. - return - } - l, ok := span.Left.(*inverted.SpanExpression) - if !ok { - return - } - r, ok := span.Right.(*inverted.SpanExpression) - if !ok { - return - } - findLowestCardinalitySpan(l) - findLowestCardinalitySpan(r) - case inverted.SetUnion, inverted.None: - // We cannot recurse into unions because both sides must be scanned. - // So we consider a union a "leaf". - cardinality, ok := cardinalityEstimate(ctx, histogram, span) - if ok && (newSpan == nil || cardinality < lowestCardinality) { - newSpan = span - lowestCardinality = cardinality - } - } - } - findLowestCardinalitySpan(spanExpr) - - return newSpan, newSpan != nil -} - -// cardinalityEstimate returns an estimated number of rows that will be scanned -// with spanExpr based on the given histogram. -func cardinalityEstimate( - ctx context.Context, histogram *props.Histogram, spanExpr *inverted.SpanExpression, -) (cardinality float64, ok bool) { - for i := range spanExpr.SpansToRead { - span := spanExpr.SpansToRead[i] - if !span.IsSingleVal() { - // We can currently only estimate the cardinality of single-valued - // spans. - return 0, false - } - cardinality += histogram.EqEstimate(ctx, tree.NewDEncodedKey(tree.DEncodedKey(span.Start))) - } - return cardinality, true -} - // GenerateTrigramSimilarityInvertedIndexScans generates scans on inverted // trigram indexes that are constrained by similarity filters (e.g., // `s % 'foo'`). It is similar conceptually to GenerateInvertedIndexScans, but diff --git a/pkg/sql/opt/xform/testdata/rules/select b/pkg/sql/opt/xform/testdata/rules/select index 968106f49268..e53bb7b15508 100644 --- a/pkg/sql/opt/xform/testdata/rules/select +++ b/pkg/sql/opt/xform/testdata/rules/select @@ -8517,426 +8517,6 @@ project └── projections └── 1 [as="?column?":15] - -# -------------------------------------------------- -# GenerateMinimalInvertedIndexScans -# -------------------------------------------------- - -exec-ddl -CREATE TABLE min ( - k INT PRIMARY KEY, - j JSON, - a INT[], - INVERTED INDEX j_idx (j), - INVERTED INDEX a_idx (a) -) ----- - -# Histogram boundaries are for arrays with values 1, 2, 3, and 4, including some -# empty arrays. The row_count is lower than the sum of the histogram buckets -# num_eq's because some rows can have multiple inverted index entries, for -# example `{1, 2}`. There are: -# -# - 2000 rows total -# - 10 empty arrays -# - 1990 arrays encoded into 2020 index entries -# -# Histogram boundaries are for JSON values `[]`, `{}`, `[1]`, `[2]`, `[3]`, -# `{"a": "b"}`, `{"c": "d"}`, and `{"e": "f"}`. The row_count is lower than the -# sum of the histogram buckets num_eq's because some rows can have multiple -# inverted index entries, for example `{"a": "b", "c": "d"}`. There are: -# -# - 2000 rows total -# - 10 empty arrays -# - 990 arrays encoded into 1110 index entries -# - 10 empty objects -# - 990 objects encoded into 1110 index entries -# -exec-ddl -ALTER TABLE min INJECT STATISTICS '[ - { - "columns": ["a"], - "created_at": "2018-01-01 1:00:00.00000+00:00", - "row_count": 2000, - "distinct_count": 3, - "null_count": 0, - "histo_col_type": "BYTES", - "histo_buckets": [ - { - "distinct_range": 0, - "num_eq": 10, - "num_range": 0, - "upper_bound": "\\x43" - }, - { - "distinct_range": 0, - "num_eq": 1990, - "num_range": 0, - "upper_bound": "\\x89" - }, - { - "distinct_range": 0, - "num_eq": 10, - "num_range": 0, - "upper_bound": "\\x8a" - }, - { - "distinct_range": 0, - "num_eq": 10, - "num_range": 0, - "upper_bound": "\\x8b" - }, - { - "distinct_range": 0, - "num_eq": 10, - "num_range": 0, - "upper_bound": "\\x8c" - } - ] - }, - { - "columns": ["j"], - "created_at": "2018-01-01 1:00:00.00000+00:00", - "row_count": 2000, - "distinct_count": 10, - "null_count": 0, - "histo_col_type": "BYTES", - "histo_buckets": [ - { - "distinct_range": 0, - "num_eq": 10, - "num_range": 0, - "upper_bound": "\\x37000138" - }, - { - "distinct_range": 0, - "num_eq": 10, - "num_range": 0, - "upper_bound": "\\x37000139" - }, - { - "distinct_range": 0, - "num_eq": 990, - "num_range": 0, - "upper_bound": "\\x37000300012a0200" - }, - { - "distinct_range": 0, - "num_eq": 100, - "num_range": 0, - "upper_bound": "\\x37000300012a0400" - }, - { - "distinct_range": 0, - "num_eq": 10, - "num_range": 0, - "upper_bound": "\\x37000300012a0600" - }, - { - "distinct_range": 0, - "num_eq": 990, - "num_range": 0, - "upper_bound": "\\x3761000112620001" - }, - { - "distinct_range": 0, - "num_eq": 100, - "num_range": 0, - "upper_bound": "\\x3763000112640001" - }, - { - "distinct_range": 0, - "num_eq": 10, - "num_range": 0, - "upper_bound": "\\x3765000112660001" - } - ] - } -]' ----- - -# Scan over 3 since there are fewer rows containing 3 than 1. -# TODO(mgartner): The remaining filters could be reduced. -opt expect=GenerateMinimalInvertedIndexScans -SELECT * FROM min WHERE a @> '{1}' AND a @> '{3}' ----- -select - ├── columns: k:1!null j:2 a:3!null - ├── immutable - ├── key: (1) - ├── fd: (1)-->(2,3) - ├── index-join min - │ ├── columns: k:1!null j:2 a:3 - │ ├── key: (1) - │ ├── fd: (1)-->(2,3) - │ └── scan min@a_idx,inverted - │ ├── columns: k:1!null - │ ├── inverted constraint: /7/1 - │ │ └── spans: [3, 3] - │ └── key: (1) - └── filters - ├── a:3 @> ARRAY[1] [outer=(3), immutable, constraints=(/3: (/NULL - ])] - └── a:3 @> ARRAY[3] [outer=(3), immutable, constraints=(/3: (/NULL - ])] - -# Scan over 2 since there are fewer rows containing 2 than 1. -opt expect=GenerateMinimalInvertedIndexScans -SELECT * FROM min WHERE a @> '{1, 2}' ----- -select - ├── columns: k:1!null j:2 a:3!null - ├── immutable - ├── key: (1) - ├── fd: (1)-->(2,3) - ├── index-join min - │ ├── columns: k:1!null j:2 a:3 - │ ├── key: (1) - │ ├── fd: (1)-->(2,3) - │ └── scan min@a_idx,inverted - │ ├── columns: k:1!null - │ ├── inverted constraint: /7/1 - │ │ └── spans: [2, 2] - │ └── key: (1) - └── filters - └── a:3 @> ARRAY[1,2] [outer=(3), immutable, constraints=(/3: (/NULL - ])] - -opt expect=GenerateMinimalInvertedIndexScans -SELECT * FROM min@a_idx WHERE a @> '{2}' AND (a @> '{1}' OR a @> '{3}') ----- -select - ├── columns: k:1!null j:2 a:3!null - ├── immutable - ├── key: (1) - ├── fd: (1)-->(2,3) - ├── index-join min - │ ├── columns: k:1!null j:2 a:3 - │ ├── key: (1) - │ ├── fd: (1)-->(2,3) - │ └── scan min@a_idx,inverted - │ ├── columns: k:1!null - │ ├── inverted constraint: /7/1 - │ │ └── spans: [2, 2] - │ ├── flags: force-index=a_idx - │ └── key: (1) - └── filters - ├── a:3 @> ARRAY[2] [outer=(3), immutable, constraints=(/3: (/NULL - ])] - └── (a:3 @> ARRAY[1]) OR (a:3 @> ARRAY[3]) [outer=(3), immutable, constraints=(/3: (/NULL - ])] - -opt expect=GenerateMinimalInvertedIndexScans -SELECT * FROM min@a_idx WHERE (a @> '{2}' OR a @> '{4}') AND a @> '{1}' ----- -select - ├── columns: k:1!null j:2 a:3!null - ├── immutable - ├── key: (1) - ├── fd: (1)-->(2,3) - ├── index-join min - │ ├── columns: k:1!null j:2 a:3 - │ ├── key: (1) - │ ├── fd: (1)-->(2,3) - │ └── inverted-filter - │ ├── columns: k:1!null - │ ├── inverted expression: /7 - │ │ ├── tight: true, unique: false - │ │ └── union spans - │ │ ├── [2, 2] - │ │ └── [4, 4] - │ ├── key: (1) - │ └── scan min@a_idx,inverted - │ ├── columns: k:1!null a_inverted_key:7!null - │ ├── inverted constraint: /7/1 - │ │ └── spans - │ │ ├── [2, 2] - │ │ └── [4, 4] - │ └── flags: force-index=a_idx - └── filters - ├── (a:3 @> ARRAY[2]) OR (a:3 @> ARRAY[4]) [outer=(3), immutable, constraints=(/3: (/NULL - ])] - └── a:3 @> ARRAY[1] [outer=(3), immutable, constraints=(/3: (/NULL - ])] - -# TODO(mgartner): Scanning [2 - 3] would be better, but the current -# implementation can only estimate the row count for single-value spans. -opt expect=GenerateMinimalInvertedIndexScans -SELECT * FROM min@a_idx WHERE (a @> '{2}' OR a @> '{3}') AND a @> '{1}' ----- -index-join min - ├── columns: k:1!null j:2 a:3!null - ├── immutable - ├── key: (1) - ├── fd: (1)-->(2,3) - └── inverted-filter - ├── columns: k:1!null - ├── inverted expression: /7 - │ ├── tight: true, unique: false - │ ├── union spans: empty - │ └── INTERSECTION - │ ├── span expression - │ │ ├── tight: true, unique: false - │ │ └── union spans: [2, 4) - │ └── span expression - │ ├── tight: true, unique: true - │ └── union spans: [1, 1] - ├── key: (1) - └── scan min@a_idx,inverted - ├── columns: k:1!null a_inverted_key:7!null - ├── inverted constraint: /7/1 - │ └── spans: [1, 4) - └── flags: force-index=a_idx - -opt expect=GenerateMinimalInvertedIndexScans -SELECT * FROM min@a_idx WHERE a @> '{2, 3}' AND a @> '{1}' ----- -select - ├── columns: k:1!null j:2 a:3!null - ├── immutable - ├── key: (1) - ├── fd: (1)-->(2,3) - ├── index-join min - │ ├── columns: k:1!null j:2 a:3 - │ ├── key: (1) - │ ├── fd: (1)-->(2,3) - │ └── scan min@a_idx,inverted - │ ├── columns: k:1!null - │ ├── inverted constraint: /7/1 - │ │ └── spans: [2, 2] - │ ├── flags: force-index=a_idx - │ └── key: (1) - └── filters - ├── a:3 @> ARRAY[2,3] [outer=(3), immutable, constraints=(/3: (/NULL - ])] - └── a:3 @> ARRAY[1] [outer=(3), immutable, constraints=(/3: (/NULL - ])] - -# The rule only applies when there are multiple spans to reduce. -opt expect-not=GenerateMinimalInvertedIndexScans format=hide-all -SELECT * FROM min WHERE a @> '{1}' ----- -select - ├── scan min - └── filters - └── a @> ARRAY[1] - -opt expect=GenerateMinimalInvertedIndexScans -SELECT * FROM min@j_idx WHERE j @> '[1]' AND j @> '[3]' ----- -select - ├── columns: k:1!null j:2!null a:3 - ├── immutable - ├── key: (1) - ├── fd: (1)-->(2,3) - ├── index-join min - │ ├── columns: k:1!null j:2 a:3 - │ ├── key: (1) - │ ├── fd: (1)-->(2,3) - │ └── scan min@j_idx,inverted - │ ├── columns: k:1!null - │ ├── inverted constraint: /6/1 - │ │ └── spans: [Arr/3, Arr/3] - │ ├── flags: force-index=j_idx - │ └── key: (1) - └── filters - ├── j:2 @> '[1]' [outer=(2), immutable, constraints=(/2: (/NULL - ])] - └── j:2 @> '[3]' [outer=(2), immutable, constraints=(/2: (/NULL - ])] - -opt expect=GenerateMinimalInvertedIndexScans -SELECT * FROM min@j_idx WHERE j @> '[2]' AND j @> '[3]' ----- -select - ├── columns: k:1!null j:2!null a:3 - ├── immutable - ├── key: (1) - ├── fd: (1)-->(2,3) - ├── index-join min - │ ├── columns: k:1!null j:2 a:3 - │ ├── key: (1) - │ ├── fd: (1)-->(2,3) - │ └── scan min@j_idx,inverted - │ ├── columns: k:1!null - │ ├── inverted constraint: /6/1 - │ │ └── spans: [Arr/3, Arr/3] - │ ├── flags: force-index=j_idx - │ └── key: (1) - └── filters - ├── j:2 @> '[2]' [outer=(2), immutable, constraints=(/2: (/NULL - ])] - └── j:2 @> '[3]' [outer=(2), immutable, constraints=(/2: (/NULL - ])] - -# The rule only applies when there are multiple spans to reduce. -opt expect-not=GenerateMinimalInvertedIndexScans format=hide-all -SELECT * FROM min WHERE j @> '[3]' ----- -index-join min - └── scan min@j_idx,inverted - └── inverted constraint: /6/1 - └── spans: [Arr/3, Arr/3] - -# The rule does not apply when for a disjunction of spans. -opt expect-not=GenerateMinimalInvertedIndexScans format=hide-all -SELECT * FROM b WHERE j @> '[3]' OR j @> '[[1, 2]]' ----- -select - ├── index-join b - │ └── inverted-filter - │ ├── inverted expression: /9 - │ │ ├── tight: false, unique: true - │ │ ├── union spans: [Arr/3, Arr/3] - │ │ └── INTERSECTION - │ │ ├── span expression - │ │ │ ├── tight: true, unique: true - │ │ │ └── union spans: [Arr/Arr/1, Arr/Arr/1] - │ │ └── span expression - │ │ ├── tight: true, unique: true - │ │ └── union spans: [Arr/Arr/2, Arr/Arr/2] - │ └── scan b@j_inv_idx,inverted - │ └── inverted constraint: /9/1 - │ └── spans - │ ├── [Arr/3, Arr/3] - │ ├── [Arr/Arr/1, Arr/Arr/1] - │ └── [Arr/Arr/2, Arr/Arr/2] - └── filters - └── (j @> '[3]') OR (j @> '[[1, 2]]') - -opt expect=GenerateMinimalInvertedIndexScans disable=GenerateInvertedIndexZigzagJoins -SELECT * FROM min@j_idx WHERE j->'a' = '"b"' AND j->'c' = '"d"' ----- -select - ├── columns: k:1!null j:2 a:3 - ├── immutable - ├── key: (1) - ├── fd: (1)-->(2,3) - ├── index-join min - │ ├── columns: k:1!null j:2 a:3 - │ ├── key: (1) - │ ├── fd: (1)-->(2,3) - │ └── scan min@j_idx,inverted - │ ├── columns: k:1!null - │ ├── inverted constraint: /6/1 - │ │ └── spans: ["c"/"d", "c"/"d"] - │ ├── flags: force-index=j_idx - │ └── key: (1) - └── filters - ├── (j:2->'a') = '"b"' [outer=(2), immutable] - └── (j:2->'c') = '"d"' [outer=(2), immutable] - -opt expect=GenerateMinimalInvertedIndexScans -SELECT * FROM min@j_idx WHERE j->'a' = '"b"' AND j->'c' = '"d"' AND j->'e' = '"f"' ----- -select - ├── columns: k:1!null j:2 a:3 - ├── immutable - ├── key: (1) - ├── fd: (1)-->(2,3) - ├── index-join min - │ ├── columns: k:1!null j:2 a:3 - │ ├── key: (1) - │ ├── fd: (1)-->(2,3) - │ └── scan min@j_idx,inverted - │ ├── columns: k:1!null - │ ├── inverted constraint: /6/1 - │ │ └── spans: ["e"/"f", "e"/"f"] - │ ├── flags: force-index=j_idx - │ └── key: (1) - └── filters - ├── (j:2->'a') = '"b"' [outer=(2), immutable] - ├── (j:2->'c') = '"d"' [outer=(2), immutable] - └── (j:2->'e') = '"f"' [outer=(2), immutable] - - # -------------------------------------------------- # GenerateZigzagJoins # -------------------------------------------------- @@ -10094,7 +9674,7 @@ ALTER TABLE b INJECT STATISTICS '[ # Query only the primary key with a remaining filter. 2+ paths in containment # query should favor zigzag joins. -opt expect=GenerateInvertedIndexZigzagJoins disable=GenerateMinimalInvertedIndexScans +opt expect=GenerateInvertedIndexZigzagJoins SELECT k FROM b WHERE j @> '{"a": "b", "c": "d"}' ---- project @@ -10117,7 +9697,7 @@ project └── filters (true) # Query requiring a zigzag join with a remaining filter. -opt expect=GenerateInvertedIndexZigzagJoins disable=GenerateMinimalInvertedIndexScans +opt expect=GenerateInvertedIndexZigzagJoins SELECT j, k FROM b WHERE j @> '{"a": "b", "c": "d"}' ---- inner-join (lookup b) @@ -10135,7 +9715,7 @@ inner-join (lookup b) │ └── filters (true) └── filters (true) -opt expect=GenerateInvertedIndexZigzagJoins disable=GenerateMinimalInvertedIndexScans +opt expect=GenerateInvertedIndexZigzagJoins SELECT * FROM b WHERE j @> '{"a": {"b": "c", "d": "e"}, "f": "g"}' ---- inner-join (lookup b) @@ -10155,7 +9735,7 @@ inner-join (lookup b) └── j:4 @> '{"a": {"b": "c", "d": "e"}, "f": "g"}' [outer=(4), immutable, constraints=(/4: (/NULL - ])] # Three or more paths. Should generate zigzag joins. -opt expect=GenerateInvertedIndexZigzagJoins disable=GenerateMinimalInvertedIndexScans +opt expect=GenerateInvertedIndexZigzagJoins SELECT * FROM b WHERE j @> '{"a":[{"b":"c", "d":3}, 5]}' ---- inner-join (lookup b) @@ -10232,7 +9812,7 @@ ALTER TABLE c INJECT STATISTICS '[ # We need a remaining filter since only two of the three values # are covered by the zigzag join. -opt expect=GenerateInvertedIndexZigzagJoins disable=GenerateMinimalInvertedIndexScans +opt expect=GenerateInvertedIndexZigzagJoins SELECT k FROM c WHERE a @> ARRAY[1,3,1,5] ---- project @@ -10256,7 +9836,7 @@ project └── a:2 @> ARRAY[1,3,1,5] [outer=(2), immutable, constraints=(/2: (/NULL - ])] # Regression test for #95270. We should not need any remaining filter. -opt expect=GenerateInvertedIndexZigzagJoins disable=GenerateMinimalInvertedIndexScans +opt expect=GenerateInvertedIndexZigzagJoins SELECT k FROM c WHERE a @> ARRAY[1,2] ---- project @@ -10279,7 +9859,7 @@ project └── filters (true) # The first path can't be used for a zigzag join, but the second two can. -opt expect=GenerateInvertedIndexZigzagJoins disable=GenerateMinimalInvertedIndexScans +opt expect=GenerateInvertedIndexZigzagJoins SELECT * FROM b WHERE j @> '{"a":{}, "b":2, "c":3}' ---- inner-join (lookup b) @@ -10335,7 +9915,7 @@ select └── (j:4 @> '[3]') OR (j:4 @> '[[1, 2]]') [outer=(4), immutable, constraints=(/4: (/NULL - ])] # GenerateInvertedIndexZigzagJoins propagates row-level locking information. -opt expect=GenerateInvertedIndexZigzagJoins disable=GenerateMinimalInvertedIndexScans +opt expect=GenerateInvertedIndexZigzagJoins SELECT * FROM b WHERE j @> '{"a":1, "c":2}' FOR UPDATE ---- inner-join (lookup b) @@ -10364,23 +9944,26 @@ project ├── columns: k:1!null ├── immutable ├── key: (1) - └── select - ├── columns: k:1!null j:4!null - ├── immutable + └── inverted-filter + ├── columns: k:1!null + ├── inverted expression: /9 + │ ├── tight: true, unique: true + │ ├── union spans: empty + │ └── INTERSECTION + │ ├── span expression + │ │ ├── tight: true, unique: true + │ │ └── union spans: ["a"/"b", "a"/"b"] + │ └── span expression + │ ├── tight: true, unique: true + │ └── union spans: ["c"/"d", "c"/"d"] ├── key: (1) - ├── fd: (1)-->(4) - ├── index-join b - │ ├── columns: k:1!null j:4 - │ ├── key: (1) - │ ├── fd: (1)-->(4) - │ └── scan b@j_inv_idx,inverted - │ ├── columns: k:1!null - │ ├── inverted constraint: /9/1 - │ │ └── spans: ["c"/"d", "c"/"d"] - │ ├── flags: no-zigzag-join - │ └── key: (1) - └── filters - └── j:4 @> '{"a": "b", "c": "d"}' [outer=(4), immutable, constraints=(/4: (/NULL - ])] + └── scan b@j_inv_idx,inverted + ├── columns: k:1!null j_inverted_key:9!null + ├── inverted constraint: /9/1 + │ └── spans + │ ├── ["a"/"b", "a"/"b"] + │ └── ["c"/"d", "c"/"d"] + └── flags: no-zigzag-join exec-ddl CREATE TABLE inv_zz_partial ( diff --git a/pkg/sql/plan_opt.go b/pkg/sql/plan_opt.go index ebac9192a34b..468786db010a 100644 --- a/pkg/sql/plan_opt.go +++ b/pkg/sql/plan_opt.go @@ -149,14 +149,7 @@ func (p *planner) prepareUsingOptimizer( stmt.Prepared.StatementNoConstants = pm.StatementNoConstants stmt.Prepared.Columns = pm.Columns stmt.Prepared.Types = pm.Types - if cachedData.Memo.IsOptimized() { - // A cache, fully optimized memo is an "ideal generic - // memo". - stmt.Prepared.GenericMemo = cachedData.Memo - stmt.Prepared.IdealGenericPlan = true - } else { - stmt.Prepared.BaseMemo = cachedData.Memo - } + stmt.Prepared.BaseMemo = cachedData.Memo return opc.flags, nil } opc.log(ctx, "query cache hit but memo is stale (prepare)") @@ -170,7 +163,7 @@ func (p *planner) prepareUsingOptimizer( } // Build the memo. Do not attempt to build a generic plan at PREPARE-time. - memo, _, err := opc.buildReusableMemo(ctx, false /* allowNonIdealGeneric */) + memo, _, err := opc.buildReusableMemo(ctx, false /* buildGeneric */) if err != nil { return 0, err } @@ -220,14 +213,7 @@ func (p *planner) prepareUsingOptimizer( stmt.Prepared.Columns = resultCols stmt.Prepared.Types = p.semaCtx.Placeholders.Types if opc.allowMemoReuse { - if memo.IsOptimized() { - // A memo fully optimized at prepare time is an "ideal generic - // memo". - stmt.Prepared.GenericMemo = memo - stmt.Prepared.IdealGenericPlan = true - } else { - stmt.Prepared.BaseMemo = memo - } + stmt.Prepared.BaseMemo = memo if opc.useCache { // execPrepare sets the PrepareMetadata.InferredTypes field after this // point. However, once the PrepareMetadata goes into the cache, it @@ -433,13 +419,13 @@ const ( // 1. The statement does not contain placeholders nor fold-able stable // operators. // 2. Or, the placeholder fast path is used. -// 3. Or, allowNonIdealGeneric is true and the plan is fully optimized as best -// as possible in the presence of placeholders. +// 3. Or, buildGeneric is true and the plan is fully optimized as best as +// possible in the presence of placeholders. // // The returned memo is fully detached from the planner and can be used with // reuseMemo independently and concurrently by multiple threads. func (opc *optPlanningCtx) buildReusableMemo( - ctx context.Context, allowNonIdealGeneric bool, + ctx context.Context, buildGeneric bool, ) (*memo.Memo, memoType, error) { p := opc.p @@ -520,7 +506,7 @@ func (opc *optPlanningCtx) buildReusableMemo( opc.log(ctx, "placeholder fast path") opc.flags.Set(planFlagOptimized) return opc.optimizer.DetachMemo(ctx), memoTypeIdealGeneric, nil - } else if allowNonIdealGeneric { + } else if buildGeneric { // Build a generic query plan if the placeholder fast path failed and a // generic plan was requested. opc.log(ctx, "optimizing (generic)") @@ -544,7 +530,9 @@ func (opc *optPlanningCtx) buildReusableMemo( // // The returned memo is only safe to use in one thread, during execution of the // current statement. -func (opc *optPlanningCtx) reuseMemo(cachedMemo *memo.Memo) (*memo.Memo, error) { +func (opc *optPlanningCtx) reuseMemo( + ctx context.Context, cachedMemo *memo.Memo, +) (*memo.Memo, error) { opc.incPlanTypeTelemetry(cachedMemo) if cachedMemo.IsOptimized() { // The query could have been already fully optimized in @@ -593,15 +581,11 @@ func (opc *optPlanningCtx) incPlanTypeTelemetry(cachedMemo *memo.Memo) { // useGenericPlan returns true if a generic query plan should be used instead of // a custom plan. func (opc *optPlanningCtx) useGenericPlan() bool { - prep := opc.p.stmt.Prepared - // Always use an ideal generic plan. - if prep.IdealGenericPlan { - return true - } switch opc.p.SessionData().PlanCacheMode { case sessiondatapb.PlanCacheModeForceGeneric: return true case sessiondatapb.PlanCacheModeAuto: + prep := opc.p.stmt.Prepared // We need to build CustomPlanThreshold custom plans before considering // a generic plan. if prep.Costs.NumCustom() < CustomPlanThreshold { @@ -625,7 +609,7 @@ func (opc *optPlanningCtx) useGenericPlan() bool { // from, baseMemo or genericMemo. It returns nil if both memos are stale. It // selects baseMemo or genericMemo based on the following rules, in order: // -// 1. If the generic memo is ideal, it is returned as-is. +// 1. If baseMemo is fully optimized and not stale, it is returned as-is. // 2. If plan_cache_mode=force_generic_plan is true then genericMemo is // returned as-is if it is not stale. // 3. If plan_cache_mode=auto, there have been at least 5 custom plans @@ -638,37 +622,54 @@ func (opc *optPlanningCtx) useGenericPlan() bool { // stale. // 5. Otherwise, nil is returned and the caller is responsible for building a // new memo. -func (opc *optPlanningCtx) chooseValidPreparedMemo(ctx context.Context) (*memo.Memo, error) { - prep := opc.p.stmt.Prepared - if opc.useGenericPlan() { - if prep.GenericMemo == nil { - // A generic plan does not yet exist. - return nil, nil +// +// The logic is structured to avoid unnecessary (*memo.Memo).IsStale calls, +// since they can be expensive. +func (opc *optPlanningCtx) chooseValidPreparedMemo( + ctx context.Context, baseMemo *memo.Memo, genericMemo *memo.Memo, +) (*memo.Memo, error) { + // First check for a fully optimized, non-stale, base memo. + if baseMemo != nil && baseMemo.IsOptimized() { + isStale, err := baseMemo.IsStale(ctx, opc.p.EvalContext(), opc.catalog) + if err != nil { + return nil, err + } else if !isStale { + return baseMemo, nil } - isStale, err := prep.GenericMemo.IsStale(ctx, opc.p.EvalContext(), opc.catalog) + } + + prep := opc.p.stmt.Prepared + reuseGeneric := opc.useGenericPlan() + + // Next check for a non-stale, generic memo. + if reuseGeneric && genericMemo != nil { + isStale, err := genericMemo.IsStale(ctx, opc.p.EvalContext(), opc.catalog) if err != nil { return nil, err } else if !isStale { - return prep.GenericMemo, nil + return genericMemo, nil + } else { + // Clear the generic cost if the memo is stale. DDL or new stats + // could drastically change the cost of generic and custom plans, so + // we should re-consider which to use. + prep.Costs.ClearGeneric() } - // Clear the generic cost if the memo is stale. DDL or new stats - // could drastically change the cost of generic and custom plans, so - // we should re-consider which to use. - prep.Costs.ClearGeneric() - return nil, nil } - if prep.BaseMemo != nil { - isStale, err := prep.BaseMemo.IsStale(ctx, opc.p.EvalContext(), opc.catalog) + // Next, check for a non-stale, normalized memo, if a generic memo should + // not be reused. + if !reuseGeneric && baseMemo != nil && !baseMemo.IsOptimized() { + isStale, err := baseMemo.IsStale(ctx, opc.p.EvalContext(), opc.catalog) if err != nil { return nil, err } else if !isStale { - return prep.BaseMemo, nil + return baseMemo, nil + } else { + // Clear the custom costs if the memo is stale. DDL or new stats + // could drastically change the cost of generic and custom plans, so + // we should re-consider which to use. + prep.Costs.ClearCustom() } - // Clear the custom costs if the memo is stale. DDL or new stats - // could drastically change the cost of generic and custom plans, so - // we should re-consider which to use. - prep.Costs.ClearCustom() } // A valid memo was not found. @@ -706,13 +707,13 @@ func (opc *optPlanningCtx) fetchPreparedMemo(ctx context.Context) (_ *memo.Memo, // If the statement was previously prepared, check for a reusable memo. // First check for a valid (non-stale) memo. - validMemo, err := opc.chooseValidPreparedMemo(ctx) + validMemo, err := opc.chooseValidPreparedMemo(ctx, prep.BaseMemo, prep.GenericMemo) if err != nil { return nil, err } if validMemo != nil { opc.log(ctx, "reusing cached memo") - return opc.reuseMemo(validMemo) + return opc.reuseMemo(ctx, validMemo) } // Otherwise, we need to rebuild the memo. @@ -726,34 +727,60 @@ func (opc *optPlanningCtx) fetchPreparedMemo(ctx context.Context) (_ *memo.Memo, if err != nil { return nil, err } - if opc.allowMemoReuse { - switch typ { - case memoTypeIdealGeneric: - // An "ideal" generic memo will always be used regardless of - // plan_cache_mode, so there is no need to set GenericCost. - prep.GenericMemo = newMemo - prep.IdealGenericPlan = true - case memoTypeGeneric: - prep.GenericMemo = newMemo - prep.Costs.SetGeneric(newMemo.RootExpr().(memo.RelExpr).Cost()) - // Now that the cost of the generic plan is known, we need to - // re-evaluate the decision to use a generic or custom plan. - if !opc.useGenericPlan() { - // The generic plan that we just built is too expensive, so we need - // to build a custom plan. We recursively call fetchPreparedMemo in - // case we have a custom plan that can be reused as a starting point - // for optimization. The function should not recurse more than once. - return opc.fetchPreparedMemo(ctx) - } - case memoTypeCustom: - prep.BaseMemo = newMemo - default: - return nil, errors.AssertionFailedf("unexpected memo type %v", typ) + switch typ { + case memoTypeIdealGeneric: + // If we have an "ideal" generic memo, store it as a base memo. It will + // always be used regardless of plan_cache_mode, so there is no need to + // set GenericCost. + prep.BaseMemo = newMemo + case memoTypeGeneric: + prep.GenericMemo = newMemo + prep.Costs.SetGeneric(newMemo.RootExpr().(memo.RelExpr).Cost()) + // Now that the cost of the generic plan is known, we need to + // re-evaluate the decision to use a generic or custom plan. + if !opc.useGenericPlan() { + // The generic plan that we just built is too expensive, so we need + // to build a custom plan. We recursively call fetchPreparedMemo in + // case we have a custom plan that can be reused as a starting point + // for optimization. The function should not recurse more than once. + return opc.fetchPreparedMemo(ctx) } + case memoTypeCustom: + prep.BaseMemo = newMemo + default: + return nil, errors.AssertionFailedf("unexpected memo type %v", typ) } // Re-optimize the memo, if necessary. - return opc.reuseMemo(newMemo) + return opc.reuseMemo(ctx, newMemo) +} + +// fetchPreparedMemoLegacy attempts to fetch a prepared memo. If a valid (i.e., +// non-stale) memo is found, it is used. Otherwise, a new statement will be +// built. If memo reuse is not allowed, nil is returned. +func (opc *optPlanningCtx) fetchPreparedMemoLegacy(ctx context.Context) (_ *memo.Memo, err error) { + prepared := opc.p.stmt.Prepared + p := opc.p + if opc.allowMemoReuse && prepared != nil && prepared.BaseMemo != nil { + // We are executing a previously prepared statement and a reusable memo is + // available. + + // If the prepared memo has been invalidated by schema or other changes, + // re-prepare it. + if isStale, err := prepared.BaseMemo.IsStale(ctx, p.EvalContext(), opc.catalog); err != nil { + return nil, err + } else if isStale { + opc.log(ctx, "rebuilding cached memo") + prepared.BaseMemo, _, err = opc.buildReusableMemo(ctx, false /* buildGeneric */) + if err != nil { + return nil, err + } + } + opc.log(ctx, "reusing cached memo") + return opc.reuseMemo(ctx, prepared.BaseMemo) + } + + return nil, nil } // buildExecMemo creates a fully optimized memo, possibly reusing a previously @@ -767,19 +794,32 @@ func (opc *optPlanningCtx) buildExecMemo(ctx context.Context) (_ *memo.Memo, _ e // rollback its transaction. Use resumeProc to resume execution in a new // transaction where the control statement left off. opc.log(ctx, "resuming stored procedure execution in a new transaction") - return opc.reuseMemo(resumeProc) + return opc.reuseMemo(ctx, resumeProc) } - // Fetch and reuse a memo if a valid one is available. - m, err := opc.fetchPreparedMemo(ctx) - if err != nil { - return nil, err - } - if m != nil { - return m, nil + p := opc.p + if p.SessionData().PlanCacheMode == sessiondatapb.PlanCacheModeForceCustom { + // Fallback to the legacy logic for reusing memos if plan_cache_mode is + // set to force_custom_plan. + m, err := opc.fetchPreparedMemoLegacy(ctx) + if err != nil { + return nil, err + } + if m != nil { + return m, nil + } + } else { + // Use new logic for reusing memos if plan_cache_mode is set to + // force_generic_plan or auto. + m, err := opc.fetchPreparedMemo(ctx) + if err != nil { + return nil, err + } + if m != nil { + return m, nil + } } - p := opc.p if opc.useCache { // Consult the query cache. cachedData, ok := p.execCfg.QueryCache.Find(&p.queryCacheSession, opc.p.stmt.SQL) @@ -788,7 +828,7 @@ func (opc *optPlanningCtx) buildExecMemo(ctx context.Context) (_ *memo.Memo, _ e return nil, err } else if isStale { opc.log(ctx, "query cache hit but needed update") - cachedData.Memo, _, err = opc.buildReusableMemo(ctx, false /* allowNonIdealGeneric */) + cachedData.Memo, _, err = opc.buildReusableMemo(ctx, false /* buildGeneric */) if err != nil { return nil, err } @@ -801,7 +841,7 @@ func (opc *optPlanningCtx) buildExecMemo(ctx context.Context) (_ *memo.Memo, _ e opc.log(ctx, "query cache hit") opc.flags.Set(planFlagOptCacheHit) } - return opc.reuseMemo(cachedData.Memo) + return opc.reuseMemo(ctx, cachedData.Memo) } opc.flags.Set(planFlagOptCacheMiss) opc.log(ctx, "query cache miss") diff --git a/pkg/sql/prepared_stmt.go b/pkg/sql/prepared_stmt.go index 028a9c707d01..94216d3d7868 100644 --- a/pkg/sql/prepared_stmt.go +++ b/pkg/sql/prepared_stmt.go @@ -52,18 +52,23 @@ type PreparedStatement struct { // BaseMemo is the memoized data structure constructed by the cost-based // optimizer during prepare of a SQL statement. + // + // It may be a fully-optimized memo if it contains an "ideal generic plan" + // that is guaranteed to be optimal across all executions of the prepared + // statement. Ideal generic plans are generated when the statement has no + // placeholders nor fold-able stable expressions, or when the placeholder + // fast-path is utilized. + // + // If it is not an ideal generic plan, it is an unoptimized, normalized + // memo that is used as a starting point for optimization of custom plans. BaseMemo *memo.Memo // GenericMemo, if present, is a fully-optimized memo that can be executed // as-is. + // TODO(mgartner): Put all fully-optimized plans in the GenericMemo field to + // reduce confusion. GenericMemo *memo.Memo - // IdealGenericPlan is true if GenericMemo is guaranteed to be optimal - // across all executions of the prepared statement. Ideal generic plans are - // generated when the statement has no placeholders nor fold-able stable - // expressions, or when the placeholder fast-path is utilized. - IdealGenericPlan bool - // Costs tracks the costs of previously optimized custom and generic plans. Costs planCosts diff --git a/pkg/sql/reassign_owned_by.go b/pkg/sql/reassign_owned_by.go index 89e1d0d14cd0..c00c63e47618 100644 --- a/pkg/sql/reassign_owned_by.go +++ b/pkg/sql/reassign_owned_by.go @@ -8,6 +8,7 @@ package sql import ( "context" + "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/security/username" "github.com/cockroachdb/cockroach/pkg/server/telemetry" "github.com/cockroachdb/cockroach/pkg/sql/catalog" @@ -19,7 +20,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/decodeusername" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" - "github.com/cockroachdb/cockroach/pkg/sql/sem/catconstants" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry" "github.com/cockroachdb/errors" @@ -120,26 +120,27 @@ func (n *reassignOwnedByNode) startExec(params runParams) error { for _, oldRole := range n.normalizedOldRoles { // There should only be one database (current). for _, dbID := range lCtx.dbIDs { - isOwner, err := isOwner(params.ctx, params.p, lCtx.dbDescs[dbID], oldRole) + dbDesc := lCtx.dbDescs[dbID] + owner, err := params.p.getOwnerOfPrivilegeObject(params.ctx, dbDesc) if err != nil { return err } - if isOwner { - if err := n.reassignDatabaseOwner(lCtx.dbDescs[dbID], params); err != nil { + if owner == oldRole { + if err := n.reassignDatabaseOwner(dbDesc, params); err != nil { return err } } } for _, schemaID := range lCtx.schemaIDs { - isOwner, err := isOwner(params.ctx, params.p, lCtx.schemaDescs[schemaID], oldRole) + schemaDesc := lCtx.schemaDescs[schemaID] + owner, err := params.p.getOwnerOfPrivilegeObject(params.ctx, schemaDesc) if err != nil { return err } - if isOwner { - // Don't reassign public schema. - // TODO(richardjcai): revisit this in 22.2, in 22.1 we do not allow - // modifying the public schema. - if lCtx.schemaDescs[schemaID].GetName() == catconstants.PublicSchemaName { + if owner == oldRole { + // Don't reassign the descriptorless public schema for the system + // database. + if schemaID == keys.SystemPublicSchemaID { continue } if err := n.reassignSchemaOwner(lCtx.schemaDescs[schemaID], currentDbDesc, params); err != nil { @@ -149,33 +150,36 @@ func (n *reassignOwnedByNode) startExec(params runParams) error { } for _, tbID := range lCtx.tbIDs { - isOwner, err := isOwner(params.ctx, params.p, lCtx.tbDescs[tbID], oldRole) + tbDesc := lCtx.tbDescs[tbID] + owner, err := params.p.getOwnerOfPrivilegeObject(params.ctx, tbDesc) if err != nil { return err } - if isOwner { + if owner == oldRole { if err := n.reassignTableOwner(lCtx.tbDescs[tbID], params); err != nil { return err } } } for _, typID := range lCtx.typIDs { - isOwner, err := isOwner(params.ctx, params.p, lCtx.typDescs[typID], oldRole) + typDesc := lCtx.typDescs[typID] + owner, err := params.p.getOwnerOfPrivilegeObject(params.ctx, typDesc) if err != nil { return err } - if isOwner && (lCtx.typDescs[typID].AsAliasTypeDescriptor() == nil) { + if owner == oldRole && (lCtx.typDescs[typID].AsAliasTypeDescriptor() == nil) { if err := n.reassignTypeOwner(lCtx.typDescs[typID].(catalog.NonAliasTypeDescriptor), params); err != nil { return err } } } for _, fnID := range lCtx.fnIDs { - isOwner, err := isOwner(params.ctx, params.p, lCtx.fnDescs[fnID], oldRole) + fnDesc := lCtx.fnDescs[fnID] + owner, err := params.p.getOwnerOfPrivilegeObject(params.ctx, fnDesc) if err != nil { return err } - if isOwner { + if owner == oldRole { if err := n.reassignFunctionOwner(lCtx.fnDescs[fnID], params); err != nil { return err } diff --git a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/helpers.go b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/helpers.go index 3117eba70f95..5246a968811e 100644 --- a/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/helpers.go +++ b/pkg/sql/schemachanger/scbuild/internal/scbuildstmt/helpers.go @@ -991,12 +991,24 @@ func panicIfSchemaChangeIsDisallowed(tableElements ElementResultSet, n tree.Stat } _, _, ldrJobIDs := scpb.FindLDRJobIDs(tableElements) - if ldrJobIDs != nil && len(ldrJobIDs.JobIDs) > 0 && !tree.IsAllowedLDRSchemaChange(n) { - _, _, ns := scpb.FindNamespace(tableElements) - if ns == nil { - panic(errors.AssertionFailedf("programming error: Namespace element not found")) + if ldrJobIDs != nil && len(ldrJobIDs.JobIDs) > 0 { + var virtualColNames []string + scpb.ForEachColumnType(tableElements, func(current scpb.Status, target scpb.TargetStatus, colTypeElem *scpb.ColumnType) { + if !colTypeElem.IsVirtual { + return + } + col := tableElements.FilterColumnName().Filter(func(current scpb.Status, target scpb.TargetStatus, colNameElem *scpb.ColumnName) bool { + return colNameElem.ColumnID == colTypeElem.ColumnID && target == scpb.ToPublic + }).MustGetOneElement() + virtualColNames = append(virtualColNames, col.Name) + }) + if !tree.IsAllowedLDRSchemaChange(n, virtualColNames) { + _, _, ns := scpb.FindNamespace(tableElements) + if ns == nil { + panic(errors.AssertionFailedf("programming error: Namespace element not found")) + } + panic(sqlerrors.NewDisallowedSchemaChangeOnLDRTableErr(ns.Name, ldrJobIDs.JobIDs)) } - panic(sqlerrors.NewDisallowedSchemaChangeOnLDRTableErr(ns.Name, ldrJobIDs.JobIDs)) } } diff --git a/pkg/sql/sem/tree/schema_helpers.go b/pkg/sql/sem/tree/schema_helpers.go index a7e4b55f852d..e78a36e376b9 100644 --- a/pkg/sql/sem/tree/schema_helpers.go +++ b/pkg/sql/sem/tree/schema_helpers.go @@ -32,14 +32,51 @@ func IsSetOrResetSchemaLocked(n Statement) bool { // IsAllowedLDRSchemaChange returns true if the schema change statement is // allowed to occur while the table is being referenced by a logical data // replication job as a destination table. -func IsAllowedLDRSchemaChange(n Statement) bool { +func IsAllowedLDRSchemaChange(n Statement, virtualColNames []string) bool { switch s := n.(type) { case *CreateIndex: - // Only allow non-unique and non-partial indexes to be created. A unique or - // partial index on a destination table could cause inserts to fail. - return !s.Unique && s.Predicate == nil + // Don't allow creating an index on a virtual column. + for _, col := range s.Columns { + if slices.Contains(virtualColNames, string(col.Column)) { + return false + } + } + // Disallow unique, partial, or hash-sharded indexes. Having these indexes + // on a destination table could cause inserts to fail. + // NB: hash-sharded indexes are disallowed since they create an index on a + // virtual column. Since it also implicitly creates the virtual column + // at the same time, the check above on virtualColNames would not block it. + return !s.Unique && s.Predicate == nil && s.Sharded == nil case *DropIndex: return true + case *SetZoneConfig: + return true + case *AlterTable: + onlySafeStorageParams := true + for _, cmd := range s.Cmds { + switch c := cmd.(type) { + // Allow safe storage parameter changes. + case *AlterTableSetStorageParams: + // ttl_expire_after is not safe since it creates a new column and + // backfills it. + if c.StorageParams.GetVal("ttl_expire_after") != nil { + onlySafeStorageParams = false + } + case *AlterTableResetStorageParams: + if slices.Contains(c.Params, "ttl_expire_after") { + // Resetting `ttl_expire_after` is not safe since it drops a column + // and rebuilds the primary index. + onlySafeStorageParams = false + } else if slices.Contains(c.Params, "ttl") { + // Resetting `ttl` can also result in the expiration column being + // dropped. + onlySafeStorageParams = false + } + default: + onlySafeStorageParams = false + } + } + return onlySafeStorageParams } return false } diff --git a/pkg/sql/sem/tree/schema_helpers_test.go b/pkg/sql/sem/tree/schema_helpers_test.go index c300979266ea..befdd0c65ac9 100644 --- a/pkg/sql/sem/tree/schema_helpers_test.go +++ b/pkg/sql/sem/tree/schema_helpers_test.go @@ -58,13 +58,31 @@ func TestIsAllowedLDRSchemaChange(t *testing.T) { stmt: "ALTER TABLE t ADD COLUMN a INT, DROP COLUMN b", isAllowed: false, }, + { + stmt: "ALTER TABLE t ADD COLUMN a INT, SET (ttl = 'on', ttl_expiration_expression = 'expires_at')", + isAllowed: false, + }, + { + stmt: "ALTER TABLE t SET (ttl = 'on', ttl_expire_after = '5m')", + isAllowed: false, + }, + { + stmt: "ALTER TABLE t SET (ttl = 'on', ttl_expiration_expression = 'expires_at')", + isAllowed: true, + }, + { + stmt: "ALTER TABLE t RESET (ttl, ttl_expiration_expression)", + isAllowed: false, + }, } { t.Run(tc.stmt, func(t *testing.T) { stmt, err := parser.ParseOne(tc.stmt) if err != nil { t.Fatal(err) } - if got := tree.IsAllowedLDRSchemaChange(stmt.AST); got != tc.isAllowed { + // Tests for virtual column checks are in + // TestLogicalReplicationCreationChecks. + if got := tree.IsAllowedLDRSchemaChange(stmt.AST, nil /* virtualColNames */); got != tc.isAllowed { t.Errorf("expected %v, got %v", tc.isAllowed, got) } }) diff --git a/pkg/storage/pebble.go b/pkg/storage/pebble.go index 936c03a0862c..3e670fef751a 100644 --- a/pkg/storage/pebble.go +++ b/pkg/storage/pebble.go @@ -406,10 +406,10 @@ func ShouldUseEFOS(settings *settings.Values) bool { return UseEFOS.Get(settings) || UseExciseForSnapshots.Get(settings) } -// EngineSuffixCompare implements pebble.Comparer.CompareSuffixes. It compares +// EngineRangeSuffixCompare implements pebble.Comparer.CompareSuffixes. It compares // cockroach suffixes (which are composed of the version and a trailing sentinel // byte); the version can be an MVCC timestamp or a lock key. -func EngineSuffixCompare(a, b []byte) int { +func EngineRangeSuffixCompare(a, b []byte) int { if len(a) == 0 || len(b) == 0 { // Empty suffixes sort before non-empty suffixes. return cmp.Compare(len(a), len(b)) @@ -617,10 +617,12 @@ func normalizeEngineSuffixForCompare(a []byte) []byte { // EngineComparer is a pebble.Comparer object that implements MVCC-specific // comparator settings for use with Pebble. var EngineComparer = &pebble.Comparer{ - Split: EngineKeySplit, - CompareSuffixes: EngineSuffixCompare, - Compare: EngineKeyCompare, - Equal: EngineKeyEqual, + Split: EngineKeySplit, + CompareRangeSuffixes: EngineRangeSuffixCompare, + ComparePointSuffixes: EnginePointSuffixCompare, + + Compare: EngineKeyCompare, + Equal: EngineKeyEqual, AbbreviatedKey: func(k []byte) uint64 { key, ok := GetKeyPartFromEngineKey(k) @@ -840,7 +842,7 @@ func DefaultPebbleOptions() *pebble.Options { Comparer: EngineComparer, FS: vfs.Default, KeySchema: keySchema.Name, - KeySchemas: sstable.MakeKeySchemas(keySchema), + KeySchemas: sstable.MakeKeySchemas(&keySchema), // A value of 2 triggers a compaction when there is 1 sub-level. L0CompactionThreshold: 2, L0StopWritesThreshold: 1000, @@ -1238,6 +1240,10 @@ func newPebble(ctx context.Context, cfg engineConfig) (p *Pebble, err error) { return IngestSplitEnabled.Get(&cfg.settings.SV) } cfg.opts.Experimental.EnableColumnarBlocks = func() bool { + // TODO(radu): disable completely for now since the format is not finalized. + if true { + return false + } return columnarBlocksEnabled.Get(&cfg.settings.SV) } cfg.opts.Experimental.EnableDeleteOnlyCompactionExcises = func() bool { diff --git a/pkg/storage/pebble_key_schema.go b/pkg/storage/pebble_key_schema.go index 8c80008e4139..763a3d9e6bb0 100644 --- a/pkg/storage/pebble_key_schema.go +++ b/pkg/storage/pebble_key_schema.go @@ -11,7 +11,6 @@ import ( "encoding/binary" "fmt" "io" - "sync" "unsafe" "github.com/cockroachdb/cockroach/pkg/util/buildutil" @@ -47,15 +46,14 @@ var keySchema = colblk.KeySchema{ cockroachColUntypedVersion: colblk.DataTypeBytes, }, NewKeyWriter: func() colblk.KeyWriter { - kw := &cockroachKeyWriter{} - kw.roachKeys.Init(16) - kw.wallTimes.Init() - kw.logicalTimes.InitWithDefault() - kw.untypedVersions.Init() - return kw + return makeCockroachKeyWriter() }, - NewKeySeeker: func() colblk.KeySeeker { - return &cockroachKeySeeker{} + InitKeySeekerMetadata: func(meta *colblk.KeySeekerMetadata, d *colblk.DataBlockDecoder) { + ks := (*cockroachKeySeeker)(unsafe.Pointer(meta)) + ks.init(d) + }, + KeySeeker: func(meta *colblk.KeySeekerMetadata) colblk.KeySeeker { + return (*cockroachKeySeeker)(unsafe.Pointer(meta)) }, } @@ -70,6 +68,15 @@ type cockroachKeyWriter struct { // Assert *cockroachKeyWriter implements colblk.KeyWriter. var _ colblk.KeyWriter = (*cockroachKeyWriter)(nil) +func makeCockroachKeyWriter() *cockroachKeyWriter { + kw := &cockroachKeyWriter{} + kw.roachKeys.Init(16) + kw.wallTimes.Init() + kw.logicalTimes.InitWithDefault() + kw.untypedVersions.Init() + return kw +} + func (kw *cockroachKeyWriter) ComparePrev(key []byte) colblk.KeyComparison { var cmpv colblk.KeyComparison cmpv.PrefixLen = int32(EngineKeySplit(key)) // TODO(jackson): Inline @@ -223,9 +230,7 @@ func (kw *cockroachKeyWriter) Finish( } } -var cockroachKeySeekerPool = sync.Pool{ - New: func() interface{} { return &cockroachKeySeeker{} }, -} +func (kw *cockroachKeyWriter) FinishHeader(buf []byte) {} type cockroachKeySeeker struct { roachKeys colblk.PrefixBytes @@ -235,17 +240,18 @@ type cockroachKeySeeker struct { untypedVersions colblk.RawBytes } +// Assert that the cockroachKeySeeker fits inside KeySeekerMetadata. +var _ uint = colblk.KeySeekerMetadataSize - uint(unsafe.Sizeof(cockroachKeySeeker{})) + var _ colblk.KeySeeker = (*cockroachKeySeeker)(nil) -// Init is part of the KeySeeker interface. -func (ks *cockroachKeySeeker) Init(d *colblk.DataBlockDecoder) error { +func (ks *cockroachKeySeeker) init(d *colblk.DataBlockDecoder) { bd := d.BlockDecoder() ks.roachKeys = bd.PrefixBytes(cockroachColRoachKey) ks.roachKeyChanged = d.PrefixChanged() ks.mvccWallTimes = bd.Uints(cockroachColMVCCWallTime) ks.mvccLogical = bd.Uints(cockroachColMVCCLogical) ks.untypedVersions = bd.RawBytes(cockroachColUntypedVersion) - return nil } // IsLowerBound compares the provided key to the first user key @@ -460,11 +466,5 @@ func (ks *cockroachKeySeeker) MaterializeUserKeyWithSyntheticSuffix( return res } -// Release is part of the KeySeeker interface. -func (ks *cockroachKeySeeker) Release() { - *ks = cockroachKeySeeker{} - cockroachKeySeekerPool.Put(ks) -} - //go:linkname memmove runtime.memmove func memmove(to, from unsafe.Pointer, n uintptr) diff --git a/pkg/storage/pebble_key_schema_test.go b/pkg/storage/pebble_key_schema_test.go index 57c917d3a801..eebec84965a3 100644 --- a/pkg/storage/pebble_key_schema_test.go +++ b/pkg/storage/pebble_key_schema_test.go @@ -13,6 +13,7 @@ import ( "strconv" "strings" "testing" + "unsafe" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock" "github.com/cockroachdb/cockroach/pkg/testutils/datapathutils" @@ -121,18 +122,12 @@ func TestKeySchema_KeySeeker(t *testing.T) { var dec colblk.DataBlockDecoder var ks colblk.KeySeeker var maxKeyLen int - enc.Init(keySchema) + enc.Init(&keySchema) initKeySeeker := func() { - if ks == nil || rand.Intn(2) == 1 { - if ks != nil { - ks.Release() - } - ks = keySchema.NewKeySeeker() - } - if err := ks.Init(&dec); err != nil { - t.Fatal(err) - } + ksPointer := &cockroachKeySeeker{} + keySchema.InitKeySeekerMetadata((*colblk.KeySeekerMetadata)(unsafe.Pointer(ksPointer)), &dec) + ks = keySchema.KeySeeker((*colblk.KeySeekerMetadata)(unsafe.Pointer(ksPointer))) } datadriven.RunTest(t, datapathutils.TestDataPath(t, "key_schema_key_seeker"), func(t *testing.T, td *datadriven.TestData) string { @@ -158,7 +153,7 @@ func TestKeySchema_KeySeeker(t *testing.T) { rows++ } blk, _ := enc.Finish(rows, enc.Size()) - dec.Init(keySchema, blk) + dec.Init(&keySchema, blk) return buf.String() case "is-lower-bound": initKeySeeker() diff --git a/pkg/storage/pebble_test.go b/pkg/storage/pebble_test.go index 00a3ccf99491..2d50bbbe127b 100644 --- a/pkg/storage/pebble_test.go +++ b/pkg/storage/pebble_test.go @@ -92,10 +92,33 @@ func TestEngineComparer(t *testing.T) { ts3a := appendBytesToTimestamp(ts3, zeroLogical[:]) ts3b := appendBytesToTimestamp(ts3, slices.Concat(zeroLogical[:], syntheticBit)) - // We group versions by equality and in the expected ordering. - orderedVersions := []any{ + // We group versions by equality and in the expected point key ordering. + orderedVersions := [][]any{ + {ts1}, // Empty version sorts first. + {ts2a, ts2}, + {ts3b, ts3a, ts3}, + {ts4}, + {ts5}, + } + + // Compare range suffixes. + for i := range orderedVersions { + for j := range orderedVersions { + for _, v1 := range orderedVersions[i] { + for _, v2 := range orderedVersions[j] { + result := EngineComparer.ComparePointSuffixes(encodeVersion(v1), encodeVersion(v2)) + if expected := cmp.Compare(i, j); result != expected { + t.Fatalf("CompareSuffixes(%x, %x) = %d, expected %d", v1, v2, result, expected) + } + } + } + } + } + + // CompareRangeSuffixes has a more strict ordering. + rangeOrderedVersions := []any{ ts1, // Empty version sorts first. - ts2a, // Synthetic bit is not ignored when comparing suffixes. + ts2a, // Synthetic bit is not ignored when comparing range suffixes. ts2, ts3b, // Higher timestamps sort before lower timestamps. ts3a, @@ -104,10 +127,10 @@ func TestEngineComparer(t *testing.T) { ts5, } - // Compare suffixes. - for i, v1 := range orderedVersions { - for j, v2 := range orderedVersions { - result := EngineComparer.CompareSuffixes(encodeVersion(v1), encodeVersion(v2)) + // Compare range suffixes. + for i, v1 := range rangeOrderedVersions { + for j, v2 := range rangeOrderedVersions { + result := EngineComparer.CompareRangeSuffixes(encodeVersion(v1), encodeVersion(v2)) if expected := cmp.Compare(i, j); result != expected { t.Fatalf("CompareSuffixes(%x, %x) = %d, expected %d", v1, v2, result, expected) } @@ -116,10 +139,15 @@ func TestEngineComparer(t *testing.T) { lock1 := bytes.Repeat([]byte{1}, engineKeyVersionLockTableLen) lock2 := bytes.Repeat([]byte{2}, engineKeyVersionLockTableLen) - require.Equal(t, 0, EngineComparer.CompareSuffixes(encodeVersion(lock1), encodeVersion(lock1))) - require.Equal(t, 0, EngineComparer.CompareSuffixes(encodeVersion(lock2), encodeVersion(lock2))) - require.Equal(t, +1, EngineComparer.CompareSuffixes(encodeVersion(lock1), encodeVersion(lock2))) - require.Equal(t, -1, EngineComparer.CompareSuffixes(encodeVersion(lock2), encodeVersion(lock1))) + require.Equal(t, 0, EngineComparer.CompareRangeSuffixes(encodeVersion(lock1), encodeVersion(lock1))) + require.Equal(t, 0, EngineComparer.CompareRangeSuffixes(encodeVersion(lock2), encodeVersion(lock2))) + require.Equal(t, +1, EngineComparer.CompareRangeSuffixes(encodeVersion(lock1), encodeVersion(lock2))) + require.Equal(t, -1, EngineComparer.CompareRangeSuffixes(encodeVersion(lock2), encodeVersion(lock1))) + + require.Equal(t, 0, EngineComparer.ComparePointSuffixes(encodeVersion(lock1), encodeVersion(lock1))) + require.Equal(t, 0, EngineComparer.ComparePointSuffixes(encodeVersion(lock2), encodeVersion(lock2))) + require.Equal(t, +1, EngineComparer.ComparePointSuffixes(encodeVersion(lock1), encodeVersion(lock2))) + require.Equal(t, -1, EngineComparer.ComparePointSuffixes(encodeVersion(lock2), encodeVersion(lock1))) keys := []roachpb.Key{ roachpb.Key(""), @@ -128,7 +156,7 @@ func TestEngineComparer(t *testing.T) { roachpb.Key("fg"), } - // We group keys by equality and in the expected ordering. + // We group keys by equality and the groups are in the expected order. var orderedKeys [][][]byte for _, k := range keys { orderedKeys = append(orderedKeys, diff --git a/pkg/testutils/lint/passes/fmtsafe/functions.go b/pkg/testutils/lint/passes/fmtsafe/functions.go index 87c6314f597f..4890f40faecb 100644 --- a/pkg/testutils/lint/passes/fmtsafe/functions.go +++ b/pkg/testutils/lint/passes/fmtsafe/functions.go @@ -120,8 +120,6 @@ var requireConstFmt = map[string]bool{ "(*github.com/cockroachdb/cockroach/pkg/kv/kvserver.raftLogger).Fatalf": true, "(*github.com/cockroachdb/cockroach/pkg/kv/kvserver.raftLogger).Panicf": true, - "(*github.com/cockroachdb/cockroach/pkg/kv/kvserver/rafttrace.traceValue).logf": true, - "(*github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvflowcontrol/rac2.LogTracker).errorf": true, "(github.com/cockroachdb/cockroach/pkg/raft/raftlogger.Logger).Debugf": true, diff --git a/pkg/testutils/lint/passes/redactcheck/redactcheck.go b/pkg/testutils/lint/passes/redactcheck/redactcheck.go index 00e764fdeb6e..025db3047678 100644 --- a/pkg/testutils/lint/passes/redactcheck/redactcheck.go +++ b/pkg/testutils/lint/passes/redactcheck/redactcheck.go @@ -138,12 +138,8 @@ func runAnalyzer(pass *analysis.Pass) (interface{}, error) { "ID": {}, }, "github.com/cockroachdb/cockroach/pkg/raft/raftpb": { - "Epoch": {}, - "PeerID": {}, - "MessageType": {}, - "EntryType": {}, - "ConfChangeType": {}, - "ConfChangeTransition": {}, + "Epoch": {}, + "PeerID": {}, }, "github.com/cockroachdb/cockroach/pkg/repstream/streampb": { "StreamID": {}, @@ -229,10 +225,6 @@ func runAnalyzer(pass *analysis.Pass) (interface{}, error) { "WorkKind": {}, "QueueKind": {}, }, - "github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb": { - "TraceID": {}, - "SpanID": {}, - }, "github.com/cockroachdb/cockroach/pkg/util/hlc": { "ClockTimestamp": {}, "LegacyTimestamp": {}, diff --git a/pkg/testutils/release/cockroach_releases.yaml b/pkg/testutils/release/cockroach_releases.yaml index c6a55d32e95f..0c5378376fb4 100644 --- a/pkg/testutils/release/cockroach_releases.yaml +++ b/pkg/testutils/release/cockroach_releases.yaml @@ -19,13 +19,13 @@ - 23.1.0 predecessor: "22.2" "23.2": - latest: 23.2.12 + latest: 23.2.13 predecessor: "23.1" "24.1": - latest: 24.1.5 + latest: 24.1.6 predecessor: "23.2" "24.2": - latest: 24.2.3 + latest: 24.2.4 withdrawn: - 24.2.1 predecessor: "24.1" diff --git a/pkg/ui/workspaces/cluster-ui/src/api/databaseDetailsApi.ts b/pkg/ui/workspaces/cluster-ui/src/api/databaseDetailsApi.ts index 756ee115ca11..1448bfc514db 100644 --- a/pkg/ui/workspaces/cluster-ui/src/api/databaseDetailsApi.ts +++ b/pkg/ui/workspaces/cluster-ui/src/api/databaseDetailsApi.ts @@ -83,7 +83,6 @@ function newDatabaseDetailsSpanStatsResponse(): DatabaseDetailsSpanStatsResponse approximate_disk_bytes: 0, live_bytes: 0, total_bytes: 0, - range_count: 0, }, error: undefined, }; @@ -332,7 +331,6 @@ export type DatabaseSpanStatsRow = { approximate_disk_bytes: number; live_bytes: number; total_bytes: number; - range_count: number; }; function formatSpanStatsExecutionResult( @@ -357,7 +355,6 @@ function formatSpanStatsExecutionResult( if (txnResult.rows.length === 1) { const row = txnResult.rows[0]; out.spanStats.approximate_disk_bytes = row.approximate_disk_bytes; - out.spanStats.range_count = row.range_count; out.spanStats.live_bytes = row.live_bytes; out.spanStats.total_bytes = row.total_bytes; } else { @@ -511,7 +508,6 @@ export function createDatabaseDetailsSpanStatsReq( ): SqlExecutionRequest { const statement = { sql: `SELECT - sum(range_count) as range_count, sum(approximate_disk_bytes) as approximate_disk_bytes, sum(live_bytes) as live_bytes, sum(total_bytes) as total_bytes diff --git a/pkg/ui/workspaces/cluster-ui/src/databasesPage/databasesPage.tsx b/pkg/ui/workspaces/cluster-ui/src/databasesPage/databasesPage.tsx index ed446a1ae0e8..96aae47eaee6 100644 --- a/pkg/ui/workspaces/cluster-ui/src/databasesPage/databasesPage.tsx +++ b/pkg/ui/workspaces/cluster-ui/src/databasesPage/databasesPage.tsx @@ -571,29 +571,6 @@ export class DatabasesPage extends React.Component< className: cx("databases-table__col-table-count"), name: "tableCount", }, - { - title: ( - - Range Count - - ), - cell: database => ( - - {database.spanStats?.range_count} - - ), - sort: database => database.spanStats?.range_count, - className: cx("databases-table__col-range-count"), - name: "rangeCount", - }, { title: ( { spanStats: { approximate_disk_bytes: 100, live_bytes: 200, - range_count: 300, total_bytes: 400, error: undefined, }, diff --git a/pkg/ui/workspaces/db-console/src/util/api.spec.ts b/pkg/ui/workspaces/db-console/src/util/api.spec.ts index 1f9f2928915d..00029c6a3409 100644 --- a/pkg/ui/workspaces/db-console/src/util/api.spec.ts +++ b/pkg/ui/workspaces/db-console/src/util/api.spec.ts @@ -119,7 +119,6 @@ describe("rest api", function () { approximate_disk_bytes: 100, live_bytes: 200, total_bytes: 300, - range_count: 400, }, ], }, @@ -130,7 +129,6 @@ describe("rest api", function () { expect(res.results.spanStats.approximate_disk_bytes).toEqual(100); expect(res.results.spanStats.live_bytes).toEqual(200); expect(res.results.spanStats.total_bytes).toEqual(300); - expect(res.results.spanStats.range_count).toEqual(400); }); }); }); diff --git a/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards/overview.tsx b/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards/overview.tsx index 3f5269e36faa..cf1c1795b8e8 100644 --- a/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards/overview.tsx +++ b/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards/overview.tsx @@ -30,12 +30,12 @@ export default function (props: GraphDashboardProps) { return [ @@ -60,6 +60,11 @@ export default function (props: GraphDashboardProps) { title="Deletes" nonNegativeRate /> + , diff --git a/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards/sql.tsx b/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards/sql.tsx index dcae25ab7407..424d21ba55a4 100644 --- a/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards/sql.tsx +++ b/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards/sql.tsx @@ -137,12 +137,12 @@ export default function (props: GraphDashboardProps) { , @@ -166,6 +166,11 @@ export default function (props: GraphDashboardProps) { title="Deletes" nonNegativeRate /> + , diff --git a/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/summaryBar.tsx b/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/summaryBar.tsx index ae924b08b8f3..c627352b74e4 100644 --- a/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/summaryBar.tsx +++ b/pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/summaryBar.tsx @@ -10,7 +10,7 @@ import { useSelector } from "react-redux"; import { Link } from "react-router-dom"; import { createSelector } from "reselect"; -import { Tooltip, Anchor } from "src/components"; +import { Anchor, Tooltip } from "src/components"; import { nodeStatusesSelector, nodeSumsSelector } from "src/redux/nodes"; import { howAreCapacityMetricsCalculated } from "src/util/docs"; import { EventBox } from "src/views/cluster/containers/events"; @@ -18,11 +18,11 @@ import { Metric } from "src/views/shared/components/metricQuery"; import { SummaryBar, SummaryLabel, + SummaryMetricsAggregator, SummaryMetricStat, SummaryStat, SummaryStatBreakdown, SummaryStatMessage, - SummaryMetricsAggregator, } from "src/views/shared/components/summaryBar"; /** @@ -145,28 +145,7 @@ export default function (props: ClusterSummaryProps) { > - - -